Update Linux to v5.10.109 Sourced from [1] [1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>

commit: 157378f43faad830e4aa3815bde5fa9f9a9f5be6 [log] [tgz]
author: Olivier Deprez <olivier.deprez@arm.com> Mon Apr 04 15:47:50 2022 +0200
committer: Olivier Deprez <olivier.deprez@arm.com> Mon Apr 04 17:19:45 2022 +0200
tree: a6c9afae04d547459872e71460db6f8a454a070c
parent: 0e64123141f3854e695eb4924d82b52856691466 [diff]
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 0653f4f..519866b 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig

@@ -5,19 +5,19 @@
 	select MMU_NOTIFIER
 	select CRC32
 	select I2C_ALGOBIT
-	---help---
+	help
 	This is a low-level driver for Intel OPA Gen1 adapter.
 config HFI1_DEBUG_SDMA_ORDER
 	bool "HFI1 SDMA Order debug"
 	depends on INFINIBAND_HFI1
 	default n
-	---help---
+	help
 	This is a debug flag to test for out of order
 	sdma completions for unit testing
 config SDMA_VERBOSITY
 	bool "Config SDMA Verbosity"
 	depends on INFINIBAND_HFI1
 	default n
-	---help---
+	help
 	This is a configuration flag to enable verbose
 	SDMA debug

diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0405d26..2e89ec1 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile

@@ -22,9 +22,13 @@
 	init.o \
 	intr.o \
 	iowait.o \
+	ipoib_main.o \
+	ipoib_rx.o \
+	ipoib_tx.o \
 	mad.o \
 	mmu_rb.o \
 	msix.o \
+	netdev_rx.o \
 	opfn.o \
 	pcie.o \
 	pio.o \

diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 1aeea5d..04b1e8f 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
 static const char * const irq_type_names[] = {
 	"SDMA",
 	"RCVCTXT",
+	"NETDEVCTXT",
 	"GENERAL",
 	"OTHER",
 };
@@ -631,22 +632,11 @@
  */
 int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
 {
-	int node = pcibus_to_node(dd->pcidev->bus);
 	struct hfi1_affinity_node *entry;
 	const struct cpumask *local_mask;
 	int curr_cpu, possible, i, ret;
 	bool new_entry = false;
 
-	/*
-	 * If the BIOS does not have the NUMA node information set, select
-	 * NUMA 0 so we get consistent performance.
-	 */
-	if (node < 0) {
-		dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
-		node = 0;
-	}
-	dd->node = node;
-
 	local_mask = cpumask_of_node(dd->node);
 	if (cpumask_first(local_mask) >= nr_cpu_ids)
 		local_mask = topology_core_cpumask(0);
@@ -659,7 +649,7 @@
 	 * create an entry in the global affinity structure and initialize it.
 	 */
 	if (!entry) {
-		entry = node_affinity_allocate(node);
+		entry = node_affinity_allocate(dd->node);
 		if (!entry) {
 			dd_dev_err(dd,
 				   "Unable to allocate global affinity node\n");
@@ -750,6 +740,7 @@
 	if (new_entry)
 		node_affinity_add_tail(entry);
 
+	dd->affinity_entry = entry;
 	mutex_unlock(&node_affinity.lock);
 
 	return 0;
@@ -765,10 +756,9 @@
 {
 	struct hfi1_affinity_node *entry;
 
-	if (dd->node < 0)
-		return;
-
 	mutex_lock(&node_affinity.lock);
+	if (!dd->affinity_entry)
+		goto unlock;
 	entry = node_affinity_lookup(dd->node);
 	if (!entry)
 		goto unlock;
@@ -779,8 +769,8 @@
 	 */
 	_dev_comp_vect_cpu_mask_clean_up(dd, entry);
 unlock:
+	dd->affinity_entry = NULL;
 	mutex_unlock(&node_affinity.lock);
-	dd->node = NUMA_NO_NODE;
 }
 
 /*
@@ -915,6 +905,11 @@
 			set = &entry->rcv_intr;
 		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
 		break;
+	case IRQ_NETDEVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		set = &entry->def_intr;
+		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+		break;
 	default:
 		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
 		return -EINVAL;
@@ -987,6 +982,10 @@
 		if (rcd->ctxt != HFI1_CTRL_CTXT)
 			set = &entry->rcv_intr;
 		break;
+	case IRQ_NETDEVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		set = &entry->def_intr;
+		break;
 	default:
 		mutex_unlock(&node_affinity.lock);
 		return;

diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 6a7e6ea..f94ed5d 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
 enum irq_type {
 	IRQ_SDMA,
 	IRQ_RCVCTXT,
+	IRQ_NETDEVCTXT,
 	IRQ_GENERAL,
 	IRQ_OTHER
 };

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 10924f1..88476a1 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -66,10 +66,7 @@
 #include "affinity.h"
 #include "debugfs.h"
 #include "fault.h"
-
-uint kdeth_qp;
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
+#include "netdev.h"
 
 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
 module_param(num_vls, uint, S_IRUGO);
@@ -128,13 +125,15 @@
 
 /*
  * RSM instance allocation
- *   0 - Verbs
- *   1 - User Fecn Handling
- *   2 - Vnic
+ *   0 - User Fecn Handling
+ *   1 - Vnic
+ *   2 - AIP
+ *   3 - Verbs
  */
-#define RSM_INS_VERBS             0
-#define RSM_INS_FECN              1
-#define RSM_INS_VNIC              2
+#define RSM_INS_FECN              0
+#define RSM_INS_VNIC              1
+#define RSM_INS_AIP               2
+#define RSM_INS_VERBS             3
 
 /* Bit offset into the GUID which carries HFI id information */
 #define GUID_HFI_INDEX_SHIFT     39
@@ -175,6 +174,25 @@
 /* QPN[m+n:1] QW 1, OFFSET 1 */
 #define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
 
+/* RSM fields for AIP */
+/* LRH.BTH above is reused for this rule */
+
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
+#define BTH_DESTQP_QW           1ull
+#define BTH_DESTQP_BIT_OFFSET   16ull
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
+#define BTH_DESTQP_MASK         0xFFull
+#define BTH_DESTQP_VALUE        0x81ull
+
+/* DETH.SQPN: QW 1 Offset 56 for select */
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
+#define DETH_AIP_SQPN_QW 3ull
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
+#define DETH_AIP_SQPN_SELECT_OFFSET \
+	DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
+
 /* RSM fields for Vnic */
 /* L2_TYPE: QW 0, OFFSET 61 - for match */
 #define L2_TYPE_QW             0ull
@@ -6873,7 +6891,7 @@
 		}
 		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
 		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
-		rcvmask |= rcd->rcvhdrtail_kvaddr ?
+		rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		hfi1_rcvctrl(dd, rcvmask, rcd);
 		hfi1_rcd_put(rcd);
@@ -7299,11 +7317,11 @@
 	case 1: return OPA_LINK_WIDTH_1X;
 	case 2: return OPA_LINK_WIDTH_2X;
 	case 3: return OPA_LINK_WIDTH_3X;
+	case 4: return OPA_LINK_WIDTH_4X;
 	default:
 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
 			    __func__, width);
-		/* fall through */
-	case 4: return OPA_LINK_WIDTH_4X;
+		return OPA_LINK_WIDTH_4X;
 	}
 }
 
@@ -7358,12 +7376,13 @@
 		case 0:
 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
 			break;
+		case 1:
+			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+			break;
 		default:
 			dd_dev_err(dd,
 				   "%s: unexpected max rate %d, using 25Gb\n",
 				   __func__, (int)max_rate);
-			/* fall through */
-		case 1:
 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
 			break;
 		}
@@ -8405,20 +8424,107 @@
 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 {
 	u32 tail;
-	int present;
 
-	if (!rcd->rcvhdrtail_kvaddr)
-		present = (rcd->seq_cnt ==
-				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
-	else /* is RDMA rtail */
-		present = (rcd->head != get_rcvhdrtail(rcd));
-
-	if (present)
+	if (hfi1_packet_present(rcd))
 		return 1;
 
 	/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
 	tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
-	return rcd->head != tail;
+	return hfi1_rcd_head(rcd) != tail;
+}
+
+/**
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+	struct hfi1_devdata *dd = rcd->dd;
+
+	trace_hfi1_receive_interrupt(dd, rcd);
+	this_cpu_inc(*dd->int_counter);
+	aspm_ctx_disable(rcd);
+}
+
+/**
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+	if (!rcd->rcvhdrq)
+		return;
+	clear_recv_intr(rcd);
+	if (check_packet_present(rcd))
+		force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ *  Hold IRQs so we can safely clear the interrupt and
+ *  recheck for a packet that may have arrived after the previous
+ *  check and the interrupt clear.  If a packet arrived, force another
+ *  interrupt. This routine can be called at the end of receive packet
+ *  processing in interrupt service routines, interrupt service thread
+ *  and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__hfi1_rcd_eoi_intr(rcd);
+	local_irq_restore(flags);
+}
+
+/**
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
+ * @napi - pointer to napi object
+ * @budget - netdev budget
+ */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
+{
+	struct hfi1_netdev_rxq *rxq = container_of(napi,
+			struct hfi1_netdev_rxq, napi);
+	struct hfi1_ctxtdata *rcd = rxq->rcd;
+	int work_done = 0;
+
+	work_done = rcd->do_interrupt(rcd, budget);
+
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		hfi1_rcd_eoi_intr(rcd);
+	}
+
+	return work_done;
+}
+
+/* Receive packet napi handler for netdevs VNIC and AIP  */
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
+{
+	struct hfi1_ctxtdata *rcd = data;
+
+	receive_interrupt_common(rcd);
+
+	if (likely(rcd->napi)) {
+		if (likely(napi_schedule_prep(rcd->napi)))
+			__napi_schedule_irqoff(rcd->napi);
+		else
+			__hfi1_rcd_eoi_intr(rcd);
+	} else {
+		WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
+			  rcd->ctxt);
+		__hfi1_rcd_eoi_intr(rcd);
+	}
+
+	return IRQ_HANDLED;
 }
 
 /*
@@ -8432,13 +8538,9 @@
 irqreturn_t receive_context_interrupt(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
-	struct hfi1_devdata *dd = rcd->dd;
 	int disposition;
-	int present;
 
-	trace_hfi1_receive_interrupt(dd, rcd);
-	this_cpu_inc(*dd->int_counter);
-	aspm_ctx_disable(rcd);
+	receive_interrupt_common(rcd);
 
 	/* receive interrupt remains blocked while processing packets */
 	disposition = rcd->do_interrupt(rcd, 0);
@@ -8451,17 +8553,7 @@
 	if (disposition == RCV_PKT_LIMIT)
 		return IRQ_WAKE_THREAD;
 
-	/*
-	 * The packet processor detected no more packets.  Clear the receive
-	 * interrupt and recheck for a packet packet that may have arrived
-	 * after the previous check and interrupt clear.  If a packet arrived,
-	 * force another interrupt.
-	 */
-	clear_recv_intr(rcd);
-	present = check_packet_present(rcd);
-	if (present)
-		force_recv_intr(rcd);
-
+	__hfi1_rcd_eoi_intr(rcd);
 	return IRQ_HANDLED;
 }
 
@@ -8472,24 +8564,11 @@
 irqreturn_t receive_context_thread(int irq, void *data)
 {
 	struct hfi1_ctxtdata *rcd = data;
-	int present;
 
 	/* receive interrupt is still blocked from the IRQ handler */
 	(void)rcd->do_interrupt(rcd, 1);
 
-	/*
-	 * The packet processor will only return if it detected no more
-	 * packets.  Hold IRQs here so we can safely clear the interrupt and
-	 * recheck for a packet that may have arrived after the previous
-	 * check and the interrupt clear.  If a packet arrived, force another
-	 * interrupt.
-	 */
-	local_irq_disable();
-	clear_recv_intr(rcd);
-	present = check_packet_present(rcd);
-	if (present)
-		force_recv_intr(rcd);
-	local_irq_enable();
+	hfi1_rcd_eoi_intr(rcd);
 
 	return IRQ_HANDLED;
 }
@@ -10060,7 +10139,7 @@
 	 * the first kernel context would have been allocated by now so
 	 * we are guaranteed a valid value.
 	 */
-	return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+	return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
 }
 
 /*
@@ -10105,7 +10184,7 @@
 		thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
-						dd->rcd[0]->rcvhdrqentsize));
+						get_hdrqentsize(dd->rcd[0])));
 		for (j = 0; j < INIT_SC_PER_VL; j++)
 			sc_set_cr_threshold(
 					pio_select_send_context_vl(dd, j, i),
@@ -11832,7 +11911,7 @@
 	head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
 		& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
 
-	if (rcd->rcvhdrtail_kvaddr)
+	if (hfi1_rcvhdrtail_kvaddr(rcd))
 		tail = get_rcvhdrtail(rcd);
 	else
 		tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11876,6 +11955,84 @@
 	return 0x1;	/* if invalid, go with the minimum size */
 }
 
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+	/* there are only 3 valid receive header entry sizes */
+	if (size == 2)
+		return 1;
+	if (size == 16)
+		return 2;
+	if (size == 32)
+		return 4;
+	return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+		dd_dev_err(dd, "Receive header queue count too small\n");
+		return -EINVAL;
+	}
+
+	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		dd_dev_err(dd,
+			   "Receive header queue count cannot be greater than %u\n",
+			   HFI1_MAX_HDRQ_EGRBUF_CNT);
+		return -EINVAL;
+	}
+
+	if (thecnt % HDRQ_INCREMENT) {
+		dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+			   thecnt, HDRQ_INCREMENT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+	u64 reg;
+
+	reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+	      RCV_HDR_CNT_CNT_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+	reg = ((u64)encode_rcv_header_entry_size(entsize) &
+	       RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+	      RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+	reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+	      RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+	write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+	/*
+	 * Program dummy tail address for every receive context
+	 * before enabling any receive context
+	 */
+	write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+			dd->rcvhdrtail_dummy_dma);
+}
+
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		  struct hfi1_ctxtdata *rcd)
 {
@@ -11897,13 +12054,13 @@
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_dma);
-		if (rcd->rcvhdrtail_kvaddr)
+		if (hfi1_rcvhdrtail_kvaddr(rcd))
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					rcd->rcvhdrqtailaddr_dma);
-		rcd->seq_cnt = 1;
+		hfi1_set_seq_cnt(rcd, 1);
 
 		/* reset the cached receive header queue head value */
-		rcd->head = 0;
+		hfi1_set_rcd_head(rcd, 0);
 
 		/*
 		 * Zero the receive header queue so we don't get false
@@ -11983,7 +12140,7 @@
 			      IS_RCVAVAIL_START + rcd->ctxt, false);
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	}
-	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
+	if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
 		/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -12724,11 +12881,6 @@
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
 {
 	switch (chip_lstate) {
-	default:
-		dd_dev_err(dd,
-			   "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
-			   chip_lstate);
-		/* fall through */
 	case LSTATE_DOWN:
 		return IB_PORT_DOWN;
 	case LSTATE_INIT:
@@ -12737,6 +12889,11 @@
 		return IB_PORT_ARMED;
 	case LSTATE_ACTIVE:
 		return IB_PORT_ACTIVE;
+	default:
+		dd_dev_err(dd,
+			   "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+			   chip_lstate);
+		return IB_PORT_DOWN;
 	}
 }
 
@@ -12744,10 +12901,6 @@
 {
 	/* look at the HFI meta-states only */
 	switch (chip_pstate & 0xf0) {
-	default:
-		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
-			   chip_pstate);
-		/* fall through */
 	case PLS_DISABLED:
 		return IB_PORTPHYSSTATE_DISABLED;
 	case PLS_OFFLINE:
@@ -12760,6 +12913,10 @@
 		return IB_PORTPHYSSTATE_LINKUP;
 	case PLS_PHYTEST:
 		return IB_PORTPHYSSTATE_PHY_TEST;
+	default:
+		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+			   chip_pstate);
+		return IB_PORTPHYSSTATE_DISABLED;
 	}
 }
 
@@ -13237,13 +13394,12 @@
  *                             in array of contexts
  *	freectxts  - number of free user contexts
  *	num_send_contexts - number of PIO send contexts being used
- *	num_vnic_contexts - number of contexts reserved for VNIC
+ *	num_netdev_contexts - number of contexts reserved for netdev
  */
 static int set_up_context_variables(struct hfi1_devdata *dd)
 {
 	unsigned long num_kernel_contexts;
-	u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
-	int total_contexts;
+	u16 num_netdev_contexts;
 	int ret;
 	unsigned ngroups;
 	int rmt_count;
@@ -13280,13 +13436,6 @@
 		num_kernel_contexts = send_contexts - num_vls - 1;
 	}
 
-	/* Accommodate VNIC contexts if possible */
-	if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
-		dd_dev_err(dd, "No receive contexts available for VNIC\n");
-		num_vnic_contexts = 0;
-	}
-	total_contexts = num_kernel_contexts + num_vnic_contexts;
-
 	/*
 	 * User contexts:
 	 *	- default to 1 user context per real (non-HT) CPU core if
@@ -13299,28 +13448,32 @@
 	/*
 	 * Adjust the counts given a global max.
 	 */
-	if (total_contexts + n_usr_ctxts > rcv_contexts) {
+	if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
 		dd_dev_err(dd,
-			   "Reducing # user receive contexts to: %d, from %u\n",
-			   rcv_contexts - total_contexts,
+			   "Reducing # user receive contexts to: %u, from %u\n",
+			   (u32)(rcv_contexts - num_kernel_contexts),
 			   n_usr_ctxts);
 		/* recalculate */
-		n_usr_ctxts = rcv_contexts - total_contexts;
+		n_usr_ctxts = rcv_contexts - num_kernel_contexts;
 	}
 
+	num_netdev_contexts =
+		hfi1_num_netdev_contexts(dd, rcv_contexts -
+					 (num_kernel_contexts + n_usr_ctxts),
+					 &node_affinity.real_cpu_mask);
 	/*
 	 * The RMT entries are currently allocated as shown below:
 	 * 1. QOS (0 to 128 entries);
 	 * 2. FECN (num_kernel_context - 1 + num_user_contexts +
-	 *    num_vnic_contexts);
-	 * 3. VNIC (num_vnic_contexts).
-	 * It should be noted that FECN oversubscribe num_vnic_contexts
-	 * entries of RMT because both VNIC and PSM could allocate any receive
+	 *    num_netdev_contexts);
+	 * 3. netdev (num_netdev_contexts).
+	 * It should be noted that FECN oversubscribe num_netdev_contexts
+	 * entries of RMT because both netdev and PSM could allocate any receive
 	 * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
 	 * and PSM FECN must reserve an RMT entry for each possible PSM receive
 	 * context.
 	 */
-	rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+	rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
 	if (HFI1_CAP_IS_KSET(TID_RDMA))
 		rmt_count += num_kernel_contexts - 1;
 	if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
@@ -13333,21 +13486,20 @@
 		n_usr_ctxts = user_rmt_reduced;
 	}
 
-	total_contexts += n_usr_ctxts;
-
-	/* the first N are kernel contexts, the rest are user/vnic contexts */
-	dd->num_rcv_contexts = total_contexts;
+	/* the first N are kernel contexts, the rest are user/netdev contexts */
+	dd->num_rcv_contexts =
+		num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
 	dd->n_krcv_queues = num_kernel_contexts;
 	dd->first_dyn_alloc_ctxt = num_kernel_contexts;
-	dd->num_vnic_contexts = num_vnic_contexts;
+	dd->num_netdev_contexts = num_netdev_contexts;
 	dd->num_user_contexts = n_usr_ctxts;
 	dd->freectxts = n_usr_ctxts;
 	dd_dev_info(dd,
-		    "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
+		    "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
 		    rcv_contexts,
 		    (int)dd->num_rcv_contexts,
 		    (int)dd->n_krcv_queues,
-		    dd->num_vnic_contexts,
+		    dd->num_netdev_contexts,
 		    dd->num_user_contexts);
 
 	/*
@@ -14026,21 +14178,12 @@
 
 static void init_kdeth_qp(struct hfi1_devdata *dd)
 {
-	/* user changed the KDETH_QP */
-	if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
-		/* out of range or illegal value */
-		dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
-		kdeth_qp = 0;
-	}
-	if (kdeth_qp == 0)	/* not set, or failed range check */
-		kdeth_qp = DEFAULT_KDETH_QP;
-
 	write_csr(dd, SEND_BTH_QP,
-		  (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+		  (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
 		  SEND_BTH_QP_KDETH_QP_SHIFT);
 
 	write_csr(dd, RCV_BTH_QP,
-		  (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+		  (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
 		  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
@@ -14156,6 +14299,12 @@
 	}
 }
 
+/* Is a receive side mapping rule */
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+	return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
+}
+
 /*
  * Add a receive side mapping rule.
  */
@@ -14392,77 +14541,138 @@
 	rmt->used += total_cnt;
 }
 
-/* Initialize RSM for VNIC */
-void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+static inline bool hfi1_is_rmt_full(int start, int spare)
+{
+	return (start + spare) > NUM_MAP_ENTRIES;
+}
+
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
 {
 	u8 i, j;
 	u8 ctx_id = 0;
 	u64 reg;
 	u32 regoff;
-	struct rsm_rule_data rrd;
+	int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+	int ctxt_count = hfi1_netdev_ctxt_count(dd);
 
-	if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
-		dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
-			   dd->vnic.rmt_start);
-		return;
+	/* We already have contexts mapped in RMT */
+	if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
+		dd_dev_info(dd, "Contexts are already mapped in RMT\n");
+		return true;
 	}
 
-	dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
-		dd->vnic.rmt_start,
-		dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+	if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
+		dd_dev_err(dd, "Not enough RMT entries used = %d\n",
+			   rmt_start);
+		return false;
+	}
+
+	dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
+		rmt_start,
+		rmt_start + NUM_NETDEV_MAP_ENTRIES);
 
 	/* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
-	regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+	regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
 	reg = read_csr(dd, regoff);
-	for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
-		/* Update map register with vnic context */
-		j = (dd->vnic.rmt_start + i) % 8;
+	for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
+		/* Update map register with netdev context */
+		j = (rmt_start + i) % 8;
 		reg &= ~(0xffllu << (j * 8));
-		reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
-		/* Wrap up vnic ctx index */
-		ctx_id %= dd->vnic.num_ctxt;
+		reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
+		/* Wrap up netdev ctx index */
+		ctx_id %= ctxt_count;
 		/* Write back map register */
-		if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+		if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
 			dev_dbg(&(dd)->pcidev->dev,
-				"Vnic rsm map reg[%d] =0x%llx\n",
+				"RMT[%d] =0x%llx\n",
 				regoff - RCV_RSM_MAP_TABLE, reg);
 
 			write_csr(dd, regoff, reg);
 			regoff += 8;
-			if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+			if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
 				reg = read_csr(dd, regoff);
 		}
 	}
 
-	/* Add rule for vnic */
-	rrd.offset = dd->vnic.rmt_start;
-	rrd.pkt_type = 4;
-	/* Match 16B packets */
-	rrd.field1_off = L2_TYPE_MATCH_OFFSET;
-	rrd.mask1 = L2_TYPE_MASK;
-	rrd.value1 = L2_16B_VALUE;
-	/* Match ETH L4 packets */
-	rrd.field2_off = L4_TYPE_MATCH_OFFSET;
-	rrd.mask2 = L4_16B_TYPE_MASK;
-	rrd.value2 = L4_16B_ETH_VALUE;
-	/* Calc context from veswid and entropy */
-	rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
-	rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
-	rrd.index2_off = L2_16B_ENTROPY_OFFSET;
-	rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
-	add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+	return true;
+}
 
-	/* Enable RSM if not already enabled */
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
+				 int rule, struct rsm_rule_data *rrd)
+{
+	if (!hfi1_netdev_update_rmt(dd)) {
+		dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
+		return;
+	}
+
+	add_rsm_rule(dd, rule, rrd);
 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 }
 
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
+{
+	/*
+	 * go through with the initialisation only if this rule actually doesn't
+	 * exist yet
+	 */
+	if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
+		int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+		struct rsm_rule_data rrd = {
+			.offset = rmt_start,
+			.pkt_type = IB_PACKET_TYPE,
+			.field1_off = LRH_BTH_MATCH_OFFSET,
+			.mask1 = LRH_BTH_MASK,
+			.value1 = LRH_BTH_VALUE,
+			.field2_off = BTH_DESTQP_MATCH_OFFSET,
+			.mask2 = BTH_DESTQP_MASK,
+			.value2 = BTH_DESTQP_VALUE,
+			.index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
+					ilog2(NUM_NETDEV_MAP_ENTRIES),
+			.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+			.index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
+			.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+		};
+
+		hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
+	}
+}
+
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+	int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+	struct rsm_rule_data rrd = {
+		/* Add rule for vnic */
+		.offset = rmt_start,
+		.pkt_type = 4,
+		/* Match 16B packets */
+		.field1_off = L2_TYPE_MATCH_OFFSET,
+		.mask1 = L2_TYPE_MASK,
+		.value1 = L2_16B_VALUE,
+		/* Match ETH L4 packets */
+		.field2_off = L4_TYPE_MATCH_OFFSET,
+		.mask2 = L4_16B_TYPE_MASK,
+		.value2 = L4_16B_ETH_VALUE,
+		/* Calc context from veswid and entropy */
+		.index1_off = L4_16B_HDR_VESWID_OFFSET,
+		.index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+		.index2_off = L2_16B_ENTROPY_OFFSET,
+		.index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+	};
+
+	hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+}
+
 void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
 {
 	clear_rsm_rule(dd, RSM_INS_VNIC);
+}
 
-	/* Disable RSM if used only by vnic */
-	if (dd->vnic.rmt_start == 0)
-		clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
+{
+	/* only actually clear the rule if it's the last user asking to do so */
+	if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
+		clear_rsm_rule(dd, RSM_INS_AIP);
 }
 
 static int init_rxe(struct hfi1_devdata *dd)
@@ -14481,8 +14691,8 @@
 	init_qos(dd, rmt);
 	init_fecn_handling(dd, rmt);
 	complete_rsm_map_table(dd, rmt);
-	/* record number of used rsm map entries for vnic */
-	dd->vnic.rmt_start = rmt->used;
+	/* record number of used rsm map entries for netdev */
+	hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
 	kfree(rmt);
 
 	/*
@@ -15036,6 +15246,11 @@
 		 (dd->revision >> CCE_REVISION_SW_SHIFT)
 		    & CCE_REVISION_SW_MASK);
 
+	/* alloc netdev data */
+	ret = hfi1_netdev_alloc(dd);
+	if (ret)
+		goto bail_cleanup;
+
 	ret = set_up_context_variables(dd);
 	if (ret)
 		goto bail_cleanup;
@@ -15136,6 +15351,7 @@
 	hfi1_comp_vectors_clean_up(dd);
 	msix_clean_up_interrupts(dd);
 bail_cleanup:
+	hfi1_netdev_free(dd);
 	hfi1_pcie_ddcleanup(dd);
 bail_free:
 	hfi1_free_devdata(dd);

diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index af00619..2c6f2de 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h

@@ -1,7 +1,7 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -358,6 +358,8 @@
 #define MAX_EAGER_BUFFER       (256 * 1024)
 #define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
 #define MAX_EXPECTED_BUFFER    (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 
 /*
  * Receive expected base and count and eager base and count increment -
@@ -699,6 +701,10 @@
 	return read_csr(dd, RCV_ARRAY_CNT);
 }
 
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
 	       u32 dw_len);
 
@@ -1441,6 +1447,7 @@
 irqreturn_t sdma_interrupt(int irq, void *data);
 irqreturn_t receive_context_interrupt(int irq, void *data);
 irqreturn_t receive_context_thread(int irq, void *data);
+irqreturn_t receive_context_interrupt_napi(int irq, void *data);
 
 int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
 void init_qsfp_int(struct hfi1_devdata *dd);
@@ -1449,6 +1456,8 @@
 void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
 void reset_interrupts(struct hfi1_devdata *dd);
 u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd);
 
 /*
  * Interrupt source table.

diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index d47da7b..ff423e5 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -72,13 +72,6 @@
  * compilation unit
  */
 
-/*
- * If a packet's QP[23:16] bits match this value, then it is
- * a PSM packet and the hardware will expect a KDETH header
- * following the BTH.
- */
-#define DEFAULT_KDETH_QP 0x80
-
 /* driver/hw feature set bitmask */
 #define HFI1_CAP_USER_SHIFT      24
 #define HFI1_CAP_MASK            ((1UL << HFI1_CAP_USER_SHIFT) - 1)
@@ -149,7 +142,8 @@
 				   HFI1_CAP_NO_INTEGRITY |		\
 				   HFI1_CAP_PKEY_CHECK |		\
 				   HFI1_CAP_TID_RDMA |			\
-				   HFI1_CAP_OPFN) <<			\
+				   HFI1_CAP_OPFN |			\
+				   HFI1_CAP_AIP) <<			\
 				  HFI1_CAP_USER_SHIFT)
 /*
  * Set of capabilities that need to be enabled for kernel context in
@@ -166,6 +160,7 @@
 				 HFI1_CAP_PKEY_CHECK |			\
 				 HFI1_CAP_MULTI_PKT_EGR |		\
 				 HFI1_CAP_EXTENDED_PSN |		\
+				 HFI1_CAP_AIP |				\
 				 ((HFI1_CAP_HDRSUPP |			\
 				   HFI1_CAP_MULTI_PKT_EGR |		\
 				   HFI1_CAP_STATIC_RATE_CTRL |		\
@@ -323,6 +318,9 @@
 /* RHF receive type error - bypass packet errors */
 #define RHF_RTE_BYPASS_NO_ERR		0x0
 
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
 /* IB - LRH header constants */
 #define HFI1_LRH_GRH 0x0003      /* 1. word of IB LRH - next header: GRH */
 #define HFI1_LRH_BTH 0x0002      /* 1. word of IB LRH - next header: BTH */

diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index c29da2f..2ced236 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c

@@ -379,7 +379,7 @@
 	struct hfi1_devdata *dd = dd_from_dev(ibd);
 
 	++*pos;
-	if (!dd->rcd || *pos >= dd->n_krcv_queues)
+	if (!dd->rcd || *pos >= dd->num_rcv_contexts)
 		return NULL;
 	return pos;
 }

diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 941b465..8085718 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
 #include <linux/module.h>
 #include <linux/prefetch.h>
 #include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -63,6 +64,9 @@
 #include "vnic.h"
 #include "fault.h"
 
+#include "ipoib.h"
+#include "netdev.h"
+
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
 
@@ -411,14 +415,14 @@
 static inline void init_packet(struct hfi1_ctxtdata *rcd,
 			       struct hfi1_packet *packet)
 {
-	packet->rsize = rcd->rcvhdrqentsize; /* words */
-	packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+	packet->rsize = get_hdrqentsize(rcd); /* words */
+	packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
 	packet->rcd = rcd;
 	packet->updegr = 0;
 	packet->etail = -1;
 	packet->rhf_addr = get_rhf_addr(rcd);
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
-	packet->rhqoff = rcd->head;
+	packet->rhqoff = hfi1_rcd_head(rcd);
 	packet->numpkt = 0;
 }
 
@@ -551,22 +555,22 @@
 	mdata->maxcnt = packet->maxcnt;
 	mdata->ps_head = packet->rhqoff;
 
-	if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+	if (get_dma_rtail_setting(rcd)) {
 		mdata->ps_tail = get_rcvhdrtail(rcd);
 		if (rcd->ctxt == HFI1_CTRL_CTXT)
-			mdata->ps_seq = rcd->seq_cnt;
+			mdata->ps_seq = hfi1_seq_cnt(rcd);
 		else
 			mdata->ps_seq = 0; /* not used with DMA_RTAIL */
 	} else {
 		mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
-		mdata->ps_seq = rcd->seq_cnt;
+		mdata->ps_seq = hfi1_seq_cnt(rcd);
 	}
 }
 
 static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
 			  struct hfi1_ctxtdata *rcd)
 {
-	if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+	if (get_dma_rtail_setting(rcd))
 		return mdata->ps_head == mdata->ps_tail;
 	return mdata->ps_seq != rhf_rcv_seq(rhf);
 }
@@ -592,11 +596,9 @@
 		mdata->ps_head = 0;
 
 	/* Control context must do seq counting */
-	if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
-	    (rcd->ctxt == HFI1_CTRL_CTXT)) {
-		if (++mdata->ps_seq > 13)
-			mdata->ps_seq = 1;
-	}
+	if (!get_dma_rtail_setting(rcd) ||
+	    rcd->ctxt == HFI1_CTRL_CTXT)
+		mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
 }
 
 /*
@@ -750,6 +752,39 @@
 	return ret;
 }
 
+static void process_rcv_packet_napi(struct hfi1_packet *packet)
+{
+	packet->etype = rhf_rcv_type(packet->rhf);
+
+	/* total length */
+	packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+	/* retrieve eager buffer details */
+	packet->etail = rhf_egr_index(packet->rhf);
+	packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
+				  &packet->updegr);
+	/*
+	 * Prefetch the contents of the eager buffer.  It is
+	 * OK to send a negative length to prefetch_range().
+	 * The +2 is the size of the RHF.
+	 */
+	prefetch_range(packet->ebuf,
+		       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+				       (rhf_hdrq_offset(packet->rhf)
+					+ 2)) * 4));
+
+	packet->rcd->rhf_rcv_function_map[packet->etype](packet);
+	packet->numpkt++;
+
+	/* Set up for the next packet */
+	packet->rhqoff += packet->rsize;
+	if (packet->rhqoff >= packet->maxcnt)
+		packet->rhqoff = 0;
+
+	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+				      packet->rcd->rhf_offset;
+	packet->rhf = rhf_to_cpu(packet->rhf_addr);
+}
+
 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 {
 	int ret;
@@ -770,7 +805,7 @@
 		 * The +2 is the size of the RHF.
 		 */
 		prefetch_range(packet->ebuf,
-			       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+			       packet->tlen - ((get_hdrqentsize(packet->rcd) -
 					       (rhf_hdrq_offset(packet->rhf)
 						+ 2)) * 4));
 	}
@@ -824,22 +859,50 @@
 	 * The only thing we need to do is a final update and call for an
 	 * interrupt
 	 */
-	update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+	update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
 }
 
 /*
+ * handle_receive_interrupt_napi_fp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for receive interrupt.
+ * This is the fast path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget)
+{
+	struct hfi1_packet packet;
+
+	init_packet(rcd, &packet);
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+		goto bail;
+
+	while (packet.numpkt < budget) {
+		process_rcv_packet_napi(&packet);
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+			break;
+
+		process_rcv_update(0, &packet);
+	}
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
+bail:
+	finish_packet(&packet);
+	return packet.numpkt;
+}
+
+/*
  * Handle receive interrupts when using the no dma rtail option.
  */
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 {
-	u32 seq;
 	int last = RCV_PKT_OK;
 	struct hfi1_packet packet;
 
 	init_packet(rcd, &packet);
-	seq = rhf_rcv_seq(packet.rhf);
-	if (seq != rcd->seq_cnt) {
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
 		last = RCV_PKT_DONE;
 		goto bail;
 	}
@@ -848,15 +911,12 @@
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
-		seq = rhf_rcv_seq(packet.rhf);
-		if (++rcd->seq_cnt > 13)
-			rcd->seq_cnt = 1;
-		if (seq != rcd->seq_cnt)
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
 			last = RCV_PKT_DONE;
 		process_rcv_update(last, &packet);
 	}
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 bail:
 	finish_packet(&packet);
 	return last;
@@ -885,15 +945,14 @@
 		process_rcv_update(last, &packet);
 	}
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 bail:
 	finish_packet(&packet);
 	return last;
 }
 
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_ctxtdata *rcd;
 	u16 i;
 
 	/*
@@ -901,50 +960,17 @@
 	 * interrupt handler only for that context. Otherwise, switch
 	 * interrupt handler for all statically allocated kernel contexts.
 	 */
-	if (ctxt >= dd->first_dyn_alloc_ctxt) {
-		rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
-		if (rcd) {
-			rcd->do_interrupt =
-				&handle_receive_interrupt_nodma_rtail;
-			hfi1_rcd_put(rcd);
-		}
-		return;
-	}
-
-	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
-		rcd = hfi1_rcd_get_by_index(dd, i);
-		if (rcd)
-			rcd->do_interrupt =
-				&handle_receive_interrupt_nodma_rtail;
+	if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+		hfi1_rcd_get(rcd);
+		hfi1_set_fast(rcd);
 		hfi1_rcd_put(rcd);
-	}
-}
-
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
-{
-	struct hfi1_ctxtdata *rcd;
-	u16 i;
-
-	/*
-	 * For dynamically allocated kernel contexts (like vnic) switch
-	 * interrupt handler only for that context. Otherwise, switch
-	 * interrupt handler for all statically allocated kernel contexts.
-	 */
-	if (ctxt >= dd->first_dyn_alloc_ctxt) {
-		rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
-		if (rcd) {
-			rcd->do_interrupt =
-				&handle_receive_interrupt_dma_rtail;
-			hfi1_rcd_put(rcd);
-		}
 		return;
 	}
 
-	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
+	for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
 		rcd = hfi1_rcd_get_by_index(dd, i);
-		if (rcd)
-			rcd->do_interrupt =
-				&handle_receive_interrupt_dma_rtail;
+		if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+			hfi1_set_fast(rcd);
 		hfi1_rcd_put(rcd);
 	}
 }
@@ -960,17 +986,14 @@
 		if (!rcd)
 			continue;
 		if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
-			rcd->do_interrupt = &handle_receive_interrupt;
+			rcd->do_interrupt = rcd->slow_handler;
 
 		hfi1_rcd_put(rcd);
 	}
 }
 
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
-				      struct hfi1_packet *packet,
-				      struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
 {
-	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
 	u8 etype = rhf_rcv_type(packet->rhf);
 	u8 sc = SC15_PACKET;
 
@@ -985,19 +1008,34 @@
 		sc = hfi1_16B_get_sc(hdr);
 	}
 	if (sc != SC15_PACKET) {
-		int hwstate = driver_lstate(rcd->ppd);
+		int hwstate = driver_lstate(packet->rcd->ppd);
+		struct work_struct *lsaw =
+				&packet->rcd->ppd->linkstate_active_work;
 
 		if (hwstate != IB_PORT_ACTIVE) {
-			dd_dev_info(dd,
+			dd_dev_info(packet->rcd->dd,
 				    "Unexpected link state %s\n",
 				    opa_lstate_name(hwstate));
-			return 0;
+			return false;
 		}
 
-		queue_work(rcd->ppd->link_wq, lsaw);
-		return 1;
+		queue_work(packet->rcd->ppd->link_wq, lsaw);
+		return true;
 	}
-	return 0;
+	return false;
+}
+
+/**
+ * armed to active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+	if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+		return false;
+	return __set_armed_to_active(packet);
 }
 
 /*
@@ -1015,15 +1053,15 @@
 	struct hfi1_packet packet;
 	int skip_pkt = 0;
 
+	if (!rcd->rcvhdrq)
+		return RCV_PKT_OK;
 	/* Control context will always use the slow path interrupt handler */
 	needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
 
 	init_packet(rcd, &packet);
 
-	if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
-		u32 seq = rhf_rcv_seq(packet.rhf);
-
-		if (seq != rcd->seq_cnt) {
+	if (!get_dma_rtail_setting(rcd)) {
+		if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
 			last = RCV_PKT_DONE;
 			goto bail;
 		}
@@ -1040,22 +1078,15 @@
 		 * Control context can potentially receive an invalid
 		 * rhf. Drop such packets.
 		 */
-		if (rcd->ctxt == HFI1_CTRL_CTXT) {
-			u32 seq = rhf_rcv_seq(packet.rhf);
-
-			if (seq != rcd->seq_cnt)
+		if (rcd->ctxt == HFI1_CTRL_CTXT)
+			if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
 				skip_pkt = 1;
-		}
 	}
 
 	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
-		if (unlikely(dd->do_drop &&
-			     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
-			     DROP_PACKET_ON)) {
-			dd->do_drop = 0;
-
+		if (hfi1_need_drop(dd)) {
 			/* On to the next packet */
 			packet.rhqoff += packet.rsize;
 			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
@@ -1067,26 +1098,14 @@
 			last = skip_rcv_packet(&packet, thread);
 			skip_pkt = 0;
 		} else {
-			/* Auto activate link on non-SC15 packet receive */
-			if (unlikely(rcd->ppd->host_link_state ==
-				     HLS_UP_ARMED) &&
-			    set_armed_to_active(rcd, &packet, dd))
+			if (set_armed_to_active(&packet))
 				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
 
-		if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
-			u32 seq = rhf_rcv_seq(packet.rhf);
-
-			if (++rcd->seq_cnt > 13)
-				rcd->seq_cnt = 1;
-			if (seq != rcd->seq_cnt)
+		if (!get_dma_rtail_setting(rcd)) {
+			if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
 				last = RCV_PKT_DONE;
-			if (needset) {
-				dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
-				set_nodma_rtail(dd, rcd->ctxt);
-				needset = 0;
-			}
 		} else {
 			if (packet.rhqoff == hdrqtail)
 				last = RCV_PKT_DONE;
@@ -1095,27 +1114,24 @@
 			 * rhf. Drop such packets.
 			 */
 			if (rcd->ctxt == HFI1_CTRL_CTXT) {
-				u32 seq = rhf_rcv_seq(packet.rhf);
+				bool lseq;
 
-				if (++rcd->seq_cnt > 13)
-					rcd->seq_cnt = 1;
-				if (!last && (seq != rcd->seq_cnt))
+				lseq = hfi1_seq_incr(rcd,
+						     rhf_rcv_seq(packet.rhf));
+				if (!last && lseq)
 					skip_pkt = 1;
 			}
-
-			if (needset) {
-				dd_dev_info(dd,
-					    "Switching to DMA_RTAIL\n");
-				set_dma_rtail(dd, rcd->ctxt);
-				needset = 0;
-			}
 		}
 
+		if (needset) {
+			needset = false;
+			set_all_fastpath(dd, rcd);
+		}
 		process_rcv_update(last, &packet);
 	}
 
 	process_rcv_qp_work(&packet);
-	rcd->head = packet.rhqoff;
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
 
 bail:
 	/*
@@ -1127,6 +1143,63 @@
 }
 
 /*
+ * handle_receive_interrupt_napi_sp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for errors or receive interrupt.
+ * This is the slow path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget)
+{
+	struct hfi1_devdata *dd = rcd->dd;
+	int last = RCV_PKT_OK;
+	bool needset = true;
+	struct hfi1_packet packet;
+
+	init_packet(rcd, &packet);
+	if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+		goto bail;
+
+	while (last != RCV_PKT_DONE && packet.numpkt < budget) {
+		if (hfi1_need_drop(dd)) {
+			/* On to the next packet */
+			packet.rhqoff += packet.rsize;
+			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
+					  packet.rhqoff +
+					  rcd->rhf_offset;
+			packet.rhf = rhf_to_cpu(packet.rhf_addr);
+
+		} else {
+			if (set_armed_to_active(&packet))
+				goto bail;
+			process_rcv_packet_napi(&packet);
+		}
+
+		if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+			last = RCV_PKT_DONE;
+
+		if (needset) {
+			needset = false;
+			set_all_fastpath(dd, rcd);
+		}
+
+		process_rcv_update(last, &packet);
+	}
+
+	hfi1_set_rcd_head(rcd, packet.rhqoff);
+
+bail:
+	/*
+	 * Always write head at end, and setup rcv interrupt, even
+	 * if no packets were processed.
+	 */
+	finish_packet(&packet);
+	return packet.numpkt;
+}
+
+/*
  * We may discover in the interrupt that the hardware link state has
  * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
  * and we need to update the driver's notion of the link state.  We cannot
@@ -1603,59 +1676,116 @@
 		show_eflags_errs(packet);
 }
 
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+	struct hfi1_ibport *ibp;
+	struct net_device *netdev;
+	struct hfi1_ctxtdata *rcd = packet->rcd;
+	struct napi_struct *napi = rcd->napi;
+	struct sk_buff *skb;
+	struct hfi1_netdev_rxq *rxq = container_of(napi,
+			struct hfi1_netdev_rxq, napi);
+	u32 extra_bytes;
+	u32 tlen, qpnum;
+	bool do_work, do_cnp;
+	struct hfi1_ipoib_dev_priv *priv;
+
+	trace_hfi1_rcvhdr(packet);
+
+	hfi1_setup_ib_header(packet);
+
+	packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+	packet->grh = NULL;
+
+	if (unlikely(rhf_err_flags(packet->rhf))) {
+		handle_eflags(packet);
+		return;
+	}
+
+	qpnum = ib_bth_get_qpn(packet->ohdr);
+	netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+	if (!netdev)
+		goto drop_no_nd;
+
+	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+	trace_ctxt_rsm_hist(rcd->ctxt);
+
+	/* handle congestion notifications */
+	do_work = hfi1_may_ecn(packet);
+	if (unlikely(do_work)) {
+		do_cnp = (packet->opcode != IB_OPCODE_CNP);
+		(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+						 packet, do_cnp);
+	}
+
+	/*
+	 * We have split point after last byte of DETH
+	 * lets strip padding and CRC and ICRC.
+	 * tlen is whole packet len so we need to
+	 * subtract header size as well.
+	 */
+	tlen = packet->tlen;
+	extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+			packet->hlen;
+	if (unlikely(tlen < extra_bytes))
+		goto drop;
+
+	tlen -= extra_bytes;
+
+	skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+	if (unlikely(!skb))
+		goto drop;
+
+	priv = hfi1_ipoib_priv(netdev);
+	hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
+
+	skb->dev = netdev;
+	skb->pkt_type = PACKET_HOST;
+	netif_receive_skb(skb);
+
+	return;
+
+drop:
+	++netdev->stats.rx_dropped;
+drop_no_nd:
+	ibp = rcd_to_iport(packet->rcd);
+	++ibp->rvp.n_pkt_drops;
+}
+
 /*
  * The following functions are called by the interrupt handler. They are type
  * specific handlers for each packet type.
  */
-static int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
 {
 	if (hfi1_setup_9B_packet(packet))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		handle_eflags(packet);
-		return RHF_RCV_CONTINUE;
+		return;
 	}
 
 	hfi1_ib_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
-static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
-{
-	/* Packet received in VNIC context via RSM */
-	if (packet->rcd->is_vnic)
-		return true;
-
-	if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
-	    (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
-		return true;
-
-	return false;
-}
-
-static int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
 {
 	struct hfi1_devdata *dd = packet->rcd->dd;
 
-	if (hfi1_is_vnic_packet(packet)) {
-		hfi1_vnic_bypass_rcv(packet);
-		return RHF_RCV_CONTINUE;
-	}
-
 	if (hfi1_setup_bypass_packet(packet))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		handle_eflags(packet);
-		return RHF_RCV_CONTINUE;
+		return;
 	}
 
 	if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
@@ -1678,17 +1808,16 @@
 				(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
 		}
 	}
-	return RHF_RCV_CONTINUE;
 }
 
-static int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
 {
 	/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
 	if (unlikely(
 		 hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
 		 (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
 		  packet->rhf & RHF_DC_ERR)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	hfi1_setup_ib_header(packet);
 	handle_eflags(packet);
@@ -1696,32 +1825,29 @@
 	if (unlikely(rhf_err_flags(packet->rhf)))
 		dd_dev_err(packet->rcd->dd,
 			   "Unhandled error packet received. Dropping.\n");
-
-	return RHF_RCV_CONTINUE;
 }
 
-static int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		struct hfi1_ctxtdata *rcd = packet->rcd;
 
 		if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
-			return RHF_RCV_CONTINUE;
+			return;
 	}
 
 	hfi1_kdeth_expected_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
-static int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
-		return RHF_RCV_CONTINUE;
+		return;
 
 	trace_hfi1_rcvhdr(packet);
 	if (unlikely(rhf_err_flags(packet->rhf))) {
@@ -1729,37 +1855,41 @@
 
 		show_eflags_errs(packet);
 		if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
-			return RHF_RCV_CONTINUE;
+			return;
 	}
 
 	hfi1_kdeth_eager_rcv(packet);
-	return RHF_RCV_CONTINUE;
 }
 
-static int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
 		   rhf_rcv_type(packet->rhf));
-	return RHF_RCV_CONTINUE;
 }
 
+#define HFI1_RCVHDR_DUMP_MAX	5
+
 void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_packet packet;
 	struct ps_mdata mdata;
+	int i;
 
-	seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
-		   rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
-		   HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+	seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu  sw head %u\n",
+		   rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+		   get_dma_rtail_setting(rcd) ?
 		   "dma_rtail" : "nodma_rtail",
+		   read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+		   read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
 		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
 		   RCV_HDR_HEAD_HEAD_MASK,
-		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
+		   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+		   rcd->head);
 
 	init_packet(rcd, &packet);
 	init_ps_mdata(&mdata, &packet);
 
-	while (1) {
+	for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
 		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 rcd->rhf_offset;
 		struct ib_header *hdr;
@@ -1811,3 +1941,14 @@
 	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
 	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
 };
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+	[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+	[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+	[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+	[RHF_RCV_TYPE_ERROR] = process_receive_error,
+	[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};

diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index d106d23..c22ab7b 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c

@@ -78,7 +78,7 @@
 	*size = 0;
 	*return_data = NULL;
 
-	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+	if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
 		return -EOPNOTSUPP;
 
 	uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);

diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 986c121..0dfbcfb 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c

@@ -222,11 +222,11 @@
 	while (bit < bitsize) {
 		zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
 		if (zero - 1 != bit)
-			size += snprintf(data + size,
+			size += scnprintf(data + size,
 					 datalen - size - 1,
 					 "0x%lx-0x%lx,", bit, zero - 1);
 		else
-			size += snprintf(data + size,
+			size += scnprintf(data + size,
 					 datalen - size - 1, "0x%lx,",
 					 bit);
 		bit = find_next_bit(fault->opcodes, bitsize, zero);

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 89e1dfd..329ee4f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c

@@ -1,5 +1,6 @@
 /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -206,10 +207,7 @@
 	spin_lock_init(&fd->tid_lock);
 	spin_lock_init(&fd->invalid_lock);
 	fd->rec_cpu_num = -1; /* no cpu affinity by default */
-	fd->mm = current->mm;
-	mmgrab(fd->mm);
 	fd->dd = dd;
-	kobject_get(&fd->dd->kobj);
 	fp->private_data = fd;
 	return 0;
 nomem:
@@ -516,12 +514,12 @@
 			ret = -EINVAL;
 			goto done;
 		}
-		if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
+		if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
 			ret = -EPERM;
 			goto done;
 		}
 		memlen = PAGE_SIZE;
-		memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+		memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
 		flags &= ~VM_MAYWRITE;
 		break;
 	case SUBCTXT_UREGS:
@@ -712,8 +710,6 @@
 
 	deallocate_ctxt(uctxt);
 done:
-	mmdrop(fdata->mm);
-	kobject_put(&dd->kobj);
 
 	if (atomic_dec_and_test(&dd->user_refcount))
 		complete(&dd->user_comp);
@@ -1102,7 +1098,7 @@
 	 * don't have to wait to be sure the DMA update has happened
 	 * (chip resets head/tail to 0 on transition to enable).
 	 */
-	if (uctxt->rcvhdrtail_kvaddr)
+	if (hfi1_rcvhdrtail_kvaddr(uctxt))
 		clear_rcvhdrtail(uctxt);
 
 	/* Setup J_KEY before enabling the context */
@@ -1150,7 +1146,7 @@
 			HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
 			HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
 	/* adjust flag if this fd is not able to cache */
-	if (!fd->handler)
+	if (!fd->use_mn)
 		cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
 
 	cinfo.num_active = hfi1_count_active_units();
@@ -1166,8 +1162,8 @@
 	cinfo.send_ctxt = uctxt->sc->hw_context;
 
 	cinfo.egrtids = uctxt->egrbufs.alloced;
-	cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-	cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+	cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+	cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
 	cinfo.sdma_ring_size = fd->cq->nentries;
 	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
 
@@ -1266,7 +1262,7 @@
 	memset(&binfo, 0, sizeof(binfo));
 	binfo.hw_version = dd->revision;
 	binfo.sw_version = HFI1_KERN_SWVERSION;
-	binfo.bthqp = kdeth_qp;
+	binfo.bthqp = RVT_KDETH_QP_PREFIX;
 	binfo.jkey = uctxt->jkey;
 	/*
 	 * If more than 64 contexts are enabled the allocated credit
@@ -1555,7 +1551,7 @@
 		 * always resets it's tail register back to 0 on a
 		 * transition from disabled to enabled.
 		 */
-		if (uctxt->rcvhdrtail_kvaddr)
+		if (hfi1_rcvhdrtail_kvaddr(uctxt))
 			clear_rcvhdrtail(uctxt);
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
 	} else {
@@ -1696,7 +1692,7 @@
 	snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
 	ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
 			     &dd->user_cdev, &dd->user_device,
-			     true, &dd->kobj);
+			     true, &dd->verbs_dev.rdi.ibdev.dev.kobj);
 	if (ret)
 		user_remove(dd);
 

diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index c090807..2cf102b 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c

@@ -1868,11 +1868,8 @@
 									2;
 				break;
 			case PLATFORM_CONFIG_RX_PRESET_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_TX_PRESET_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
 							table_length_dwords;
@@ -1890,15 +1887,10 @@
 			/* metadata table */
 			switch (table_type) {
 			case PLATFORM_CONFIG_SYSTEM_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_PORT_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_RX_PRESET_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_TX_PRESET_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
-				/* fall through */
 			case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
 				break;
 			default:
@@ -2028,15 +2020,10 @@
 
 	switch (table) {
 	case PLATFORM_CONFIG_SYSTEM_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_PORT_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_RX_PRESET_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_TX_PRESET_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
 		if (field && field < platform_config_table_limits[table])
 			src_ptr =
@@ -2139,11 +2126,8 @@
 			pcfgcache->config_tables[table_type].table;
 		break;
 	case PLATFORM_CONFIG_RX_PRESET_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_TX_PRESET_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
-		/* fall through */
 	case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
 		src_ptr = pcfgcache->config_tables[table_type].table;
 

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index b79931c..2a9a040 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h

@@ -1,7 +1,8 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -197,7 +198,9 @@
 	u32 count;
 };
 
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
 
 struct tid_queue {
 	struct list_head queue_head;
@@ -226,7 +229,13 @@
 	 * be valid. Worst case is we process an extra interrupt and up to 64
 	 * packets with the wrong interrupt handler.
 	 */
-	int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+	intr_handler do_interrupt;
+	/** fast handler after autoactive */
+	intr_handler fast_handler;
+	/** slow handler */
+	intr_handler slow_handler;
+	/* napi pointer assiociated with netdev */
+	struct napi_struct *napi;
 	/* verbs rx_stats per rcd */
 	struct hfi1_opcode_stats_perctx *opstats;
 	/* clear interrupt mask */
@@ -377,11 +386,11 @@
 	u32 rhqoff;
 	u32 dlid;
 	u32 slid;
+	int numpkt;
 	u16 tlen;
 	s16 etail;
 	u16 pkey;
 	u8 hlen;
-	u8 numpkt;
 	u8 rsize;
 	u8 updegr;
 	u8 etype;
@@ -979,7 +988,7 @@
 			      struct hfi1_pkt_state *ps,
 			      struct rvt_swqe *wqe);
 extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
-
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
 
 /* return values for the RHF receive functions */
 #define RHF_RCV_CONTINUE  0	/* keep going */
@@ -1039,23 +1048,10 @@
 #define NUM_MAP_ENTRIES	 256
 #define NUM_MAP_REGS      32
 
-/*
- * Number of VNIC contexts used. Ensure it is less than or equal to
- * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
- */
-#define HFI1_NUM_VNIC_CTXT   8
-
-/* Number of VNIC RSM entries */
-#define NUM_VNIC_MAP_ENTRIES 8
-
 /* Virtual NIC information */
 struct hfi1_vnic_data {
-	struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
 	struct kmem_cache *txreq_cache;
-	struct xarray vesws;
 	u8 num_vports;
-	u8 rmt_start;
-	u8 num_ctxt;
 };
 
 struct hfi1_vnic_vport_info;
@@ -1161,8 +1157,8 @@
 	u64 z_send_schedule;
 
 	u64 __percpu *send_schedule;
-	/* number of reserved contexts for VNIC usage */
-	u16 num_vnic_contexts;
+	/* number of reserved contexts for netdev usage */
+	u16 num_netdev_contexts;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
 	/* number of pio send contexts in use by the driver */
@@ -1312,7 +1308,7 @@
 	struct err_info_constraint err_info_xmit_constraint;
 
 	atomic_t drop_packet;
-	u8 do_drop;
+	bool do_drop;
 	u8 err_info_uncorrectable;
 	u8 err_info_fmconfig;
 
@@ -1407,18 +1403,17 @@
 	bool aspm_enabled;	/* ASPM state: enabled/disabled */
 	struct rhashtable *sdma_rht;
 
-	struct kobject kobj;
-
 	/* vnic data */
 	struct hfi1_vnic_data vnic;
 	/* Lock to protect IRQ SRC register access */
 	spinlock_t irq_src_lock;
-};
+	int vnic_num_vports;
+	struct net_device *dummy_netdev;
+	struct hfi1_affinity_node *affinity_entry;
 
-static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
-{
-	return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
-}
+	/* Keeps track of IPoIB RSM rule users */
+	atomic_t ipoib_rsm_usr_num;
+};
 
 /* 8051 firmware version helper */
 #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
@@ -1449,7 +1444,7 @@
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
 	u32 tid_n_pinned;
-	struct mmu_rb_handler *handler;
+	bool use_mn;
 	struct tid_rb_node **entry_to_rb;
 	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
 	u32 tid_limit;
@@ -1458,7 +1453,6 @@
 	u32 invalid_tid_idx;
 	/* protect invalid_tids array and invalid_tid_idx */
 	spinlock_t invalid_lock;
-	struct mm_struct *mm;
 };
 
 extern struct xarray hfi1_dev_table;
@@ -1496,6 +1490,8 @@
 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
 void set_all_slowpath(struct hfi1_devdata *dd);
 
 extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1512,12 +1508,148 @@
 #define RCV_PKT_LIMIT   0x1 /* stop, hit limit, start thread */
 #define RCV_PKT_DONE    0x2 /* stop, no more packets detected */
 
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+	rcd->head = head;
+}
+
 /* calculate the current RHF address */
 static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
 {
 	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
 }
 
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+	return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
+
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+	if (++seq > RHF_MAX_SEQ)
+		seq = 1;
+	return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+	rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+	return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+	rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+	return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->rcvhdrq_cnt;
+}
+
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+	return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+	if (rcd->ctxt == HFI1_CTRL_CTXT)
+		return false;
+
+	return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+	if (unlikely(!rcd))
+		return;
+	if (unlikely(!hfi1_is_fastpath(rcd)))
+		rcd->do_interrupt = rcd->fast_handler;
+}
+
 int hfi1_reset_device(int);
 
 void receive_interrupt_work(struct work_struct *work);
@@ -2020,9 +2152,21 @@
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty);
 
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+	return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 {
-	*((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+	u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+	if (kv)
+		*kv = 0ULL;
 }
 
 static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -2031,7 +2175,17 @@
 	 * volatile because it's a DMA target from the chip, routine is
 	 * inlined, and don't want register caching or reordering.
 	 */
-	return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+	return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+	if (likely(!rcd->rcvhdrtail_kvaddr)) {
+		u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+		return !last_rcv_seq(rcd, seq);
+	}
+	return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
 }
 
 /*
@@ -2088,7 +2242,6 @@
 extern unsigned long n_krcvqs;
 extern uint krcvqs[];
 extern int krcvqsset;
-extern uint kdeth_qp;
 extern uint loopback;
 extern uint quick_linkup;
 extern uint rcv_intr_timeout;
@@ -2303,6 +2456,25 @@
 	return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
 }
 
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+	if (unlikely(dd->do_drop &&
+		     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+		     DROP_PACKET_ON)) {
+		dd->do_drop = false;
+		return true;
+	}
+	return false;
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 1256dbd..fa2cd76 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
 #include "affinity.h"
 #include "vnic.h"
 #include "exp_rcv.h"
+#include "netdev.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -78,8 +79,6 @@
  */
 #define HFI1_MIN_USER_CTXT_BUFCNT 7
 
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
@@ -122,8 +121,6 @@
 module_param(user_credit_return_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
 
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
 DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 
 static int hfi1_create_kctxt(struct hfi1_devdata *dd,
@@ -154,7 +151,11 @@
 	/* Control context must use DMA_RTAIL */
 	if (rcd->ctxt == HFI1_CTRL_CTXT)
 		rcd->flags |= HFI1_CAP_DMA_RTAIL;
-	rcd->seq_cnt = 1;
+	rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+				handle_receive_interrupt_dma_rtail :
+				handle_receive_interrupt_nodma_rtail;
+
+	hfi1_set_seq_cnt(rcd, 1);
 
 	rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
 	if (!rcd->sc) {
@@ -373,6 +374,9 @@
 		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
 		rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+		rcd->slow_handler = handle_receive_interrupt;
+		rcd->do_interrupt = rcd->slow_handler;
+		rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
 
 		mutex_init(&rcd->exp_mutex);
 		spin_lock_init(&rcd->exp_lock);
@@ -511,23 +515,6 @@
 }
 
 /*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
-	/* there are only 3 valid receive header entry sizes */
-	if (size == 2)
-		return 1;
-	if (size == 16)
-		return 2;
-	else if (size == 32)
-		return 4;
-	return 0; /* invalid */
-}
-
-/*
  * Select the largest ccti value over all SLs to determine the intra-
  * packet gap for the link.
  *
@@ -910,10 +897,10 @@
 
 	if (is_ax(dd)) {
 		atomic_set(&dd->drop_packet, DROP_PACKET_ON);
-		dd->do_drop = 1;
+		dd->do_drop = true;
 	} else {
 		atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
-		dd->do_drop = 0;
+		dd->do_drop = false;
 	}
 
 	/* make sure the link is not "up" */
@@ -929,18 +916,6 @@
 	if (ret)
 		goto done;
 
-	/* allocate dummy tail memory for all receive contexts */
-	dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
-							 sizeof(u64),
-							 &dd->rcvhdrtail_dummy_dma,
-							 GFP_KERNEL);
-
-	if (!dd->rcvhdrtail_dummy_kvaddr) {
-		dd_dev_err(dd, "cannot allocate dummy tail memory\n");
-		ret = -ENOMEM;
-		goto done;
-	}
-
 	/* dd->rcd can be NULL if early initialization failed */
 	for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
 		/*
@@ -953,8 +928,6 @@
 		if (!rcd)
 			continue;
 
-		rcd->do_interrupt = &handle_receive_interrupt;
-
 		lastfail = hfi1_create_rcvhdrq(dd, rcd);
 		if (!lastfail)
 			lastfail = hfi1_setup_eagerbufs(rcd);
@@ -1162,9 +1135,9 @@
 		dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
 				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
 		rcd->rcvhdrq = NULL;
-		if (rcd->rcvhdrtail_kvaddr) {
+		if (hfi1_rcvhdrtail_kvaddr(rcd)) {
 			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					  (void *)rcd->rcvhdrtail_kvaddr,
+					  (void *)hfi1_rcvhdrtail_kvaddr(rcd),
 					  rcd->rcvhdrqtailaddr_dma);
 			rcd->rcvhdrtail_kvaddr = NULL;
 		}
@@ -1175,7 +1148,7 @@
 	rcd->egrbufs.rcvtids = NULL;
 
 	for (e = 0; e < rcd->egrbufs.alloced; e++) {
-		if (rcd->egrbufs.buffers[e].dma)
+		if (rcd->egrbufs.buffers[e].addr)
 			dma_free_coherent(&dd->pcidev->dev,
 					  rcd->egrbufs.buffers[e].len,
 					  rcd->egrbufs.buffers[e].addr,
@@ -1227,13 +1200,13 @@
 }
 
 /**
- * hfi1_clean_devdata - cleans up per-unit data structure
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
  * @dd: pointer to a valid devdata structure
  *
- * It cleans up all data structures set up by
+ * It cleans up and frees all data structures set up by
  * by hfi1_alloc_devdata().
  */
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
+void hfi1_free_devdata(struct hfi1_devdata *dd)
 {
 	struct hfi1_asic_data *ad;
 	unsigned long flags;
@@ -1256,27 +1229,15 @@
 	dd->tx_opstats    = NULL;
 	kfree(dd->comp_vect);
 	dd->comp_vect = NULL;
+	if (dd->rcvhdrtail_dummy_kvaddr)
+		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+				  (void *)dd->rcvhdrtail_dummy_kvaddr,
+				  dd->rcvhdrtail_dummy_dma);
+	dd->rcvhdrtail_dummy_kvaddr = NULL;
 	sdma_clean(dd, dd->num_sdma);
 	rvt_dealloc_device(&dd->verbs_dev.rdi);
 }
 
-static void __hfi1_free_devdata(struct kobject *kobj)
-{
-	struct hfi1_devdata *dd =
-		container_of(kobj, struct hfi1_devdata, kobj);
-
-	hfi1_clean_devdata(dd);
-}
-
-static struct kobj_type hfi1_devdata_type = {
-	.release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
-	kobject_put(&dd->kobj);
-}
-
 /**
  * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
  * @pdev: Valid PCI device
@@ -1303,7 +1264,6 @@
 	dd->pport = (struct hfi1_pportdata *)(dd + 1);
 	dd->pcidev = pdev;
 	pci_set_drvdata(pdev, dd);
-	dd->node = NUMA_NO_NODE;
 
 	ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
 			GFP_KERNEL);
@@ -1313,6 +1273,15 @@
 		goto bail;
 	}
 	rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+	/*
+	 * If the BIOS does not have the NUMA node information set, select
+	 * NUMA 0 so we get consistent performance.
+	 */
+	dd->node = pcibus_to_node(pdev->bus);
+	if (dd->node == NUMA_NO_NODE) {
+		dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+		dd->node = 0;
+	}
 
 	/*
 	 * Initialize all locks for the device. This needs to be as early as
@@ -1362,11 +1331,20 @@
 		goto bail;
 	}
 
-	kobject_init(&dd->kobj, &hfi1_devdata_type);
+	/* allocate dummy tail memory for all receive contexts */
+	dd->rcvhdrtail_dummy_kvaddr =
+		dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
+				   &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
+	if (!dd->rcvhdrtail_dummy_kvaddr) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	atomic_set(&dd->ipoib_rsm_usr_num, 0);
 	return dd;
 
 bail:
-	hfi1_clean_devdata(dd);
+	hfi1_free_devdata(dd);
 	return ERR_PTR(ret);
 }
 
@@ -1569,13 +1547,6 @@
 
 	free_credit_return(dd);
 
-	if (dd->rcvhdrtail_dummy_kvaddr) {
-		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
-				  (void *)dd->rcvhdrtail_dummy_kvaddr,
-				  dd->rcvhdrtail_dummy_dma);
-		dd->rcvhdrtail_dummy_kvaddr = NULL;
-	}
-
 	/*
 	 * Free any resources still in use (usually just kernel contexts)
 	 * at unload; we do for ctxtcnt, because that's what we allocate.
@@ -1624,29 +1595,6 @@
 	hfi1_free_devdata(dd);
 }
 
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
-	if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-		dd_dev_err(dd, "Receive header queue count too small\n");
-		return -EINVAL;
-	}
-
-	if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-		dd_dev_err(dd,
-			   "Receive header queue count cannot be greater than %u\n",
-			   HFI1_MAX_HDRQ_EGRBUF_CNT);
-		return -EINVAL;
-	}
-
-	if (thecnt % HDRQ_INCREMENT) {
-		dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
-			   thecnt, HDRQ_INCREMENT);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
@@ -1674,7 +1622,7 @@
 	}
 
 	/* Validate some global module parameters */
-	ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+	ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
 	if (ret)
 		goto bail;
 
@@ -1733,9 +1681,6 @@
 	/* do the generic initialization */
 	initfail = hfi1_init(dd, 0);
 
-	/* setup vnic */
-	hfi1_vnic_setup(dd);
-
 	ret = hfi1_register_ib_device(dd);
 
 	/*
@@ -1774,7 +1719,6 @@
 			hfi1_device_remove(dd);
 		if (!ret)
 			hfi1_unregister_ib_device(dd);
-		hfi1_vnic_cleanup(dd);
 		postinit_cleanup(dd);
 		if (initfail)
 			ret = initfail;
@@ -1819,8 +1763,8 @@
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
-	/* cleanup vnic */
-	hfi1_vnic_cleanup(dd);
+	/* free netdev data */
+	hfi1_netdev_free(dd);
 
 	/*
 	 * Disable the IB link, disable interrupts on the device,
@@ -1856,7 +1800,6 @@
 int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 {
 	unsigned amt;
-	u64 reg;
 
 	if (!rcd->rcvhdrq) {
 		gfp_t gfp_flags;
@@ -1888,30 +1831,9 @@
 				goto bail_free;
 		}
 	}
-	/*
-	 * These values are per-context:
-	 *	RcvHdrCnt
-	 *	RcvHdrEntSize
-	 *	RcvHdrSize
-	 */
-	reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
-			& RCV_HDR_CNT_CNT_MASK)
-		<< RCV_HDR_CNT_CNT_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
-	reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
-			& RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
-		<< RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
-	reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
-		<< RCV_HDR_SIZE_HDR_SIZE_SHIFT;
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
 
-	/*
-	 * Program dummy tail address for every receive context
-	 * before enabling any receive context
-	 */
-	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
-			dd->rcvhdrtail_dummy_dma);
+	set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+		      rcd->rcvhdrq_cnt);
 
 	return 0;
 

diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 07847cb..d580aa1 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h

@@ -399,7 +399,7 @@
  * @wait_head: the wait queue
  *
  * This function is called to insert an iowait struct into a
- * wait queue after a resource (eg, sdma decriptor or pio
+ * wait queue after a resource (eg, sdma descriptor or pio
  * buffer) is run out.
  */
 static inline void iowait_queue(bool pkts_sent, struct iowait *w,

diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
new file mode 100644
index 0000000..1ee361c
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib.h

@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB functionality
+ */
+
+#ifndef HFI1_IPOIB_H
+#define HFI1_IPOIB_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/if_infiniband.h>
+
+#include "hfi.h"
+#include "iowait.h"
+#include "netdev.h"
+
+#include <rdma/ib_verbs.h>
+
+#define HFI1_IPOIB_ENTROPY_SHIFT   24
+
+#define HFI1_IPOIB_TXREQ_NAME_LEN   32
+
+#define HFI1_IPOIB_PSEUDO_LEN 20
+#define HFI1_IPOIB_ENCAP_LEN 4
+
+struct hfi1_ipoib_dev_priv;
+
+union hfi1_ipoib_flow {
+	u16 as_int;
+	struct {
+		u8 tx_queue;
+		u8 sc5;
+	} __attribute__((__packed__));
+};
+
+/**
+ * struct hfi1_ipoib_circ_buf - List of items to be processed
+ * @items: ring of items
+ * @head: ring head
+ * @tail: ring tail
+ * @max_items: max items + 1 that the ring can contain
+ * @producer_lock: producer sync lock
+ * @consumer_lock: consumer sync lock
+ */
+struct ipoib_txreq;
+struct hfi1_ipoib_circ_buf {
+	struct ipoib_txreq **items;
+	unsigned long head;
+	unsigned long tail;
+	unsigned long max_items;
+	spinlock_t producer_lock; /* head sync lock */
+	spinlock_t consumer_lock; /* tail sync lock */
+};
+
+/**
+ * struct hfi1_ipoib_txq - IPOIB per Tx queue information
+ * @priv: private pointer
+ * @sde: sdma engine
+ * @tx_list: tx request list
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @flow: tracks when list needs to be flushed for a flow change
+ * @q_idx: ipoib Tx queue index
+ * @pkts_sent: indicator packets have been sent from this queue
+ * @wait: iowait structure
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @napi: pointer to tx napi interface
+ * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
+ */
+struct hfi1_ipoib_txq {
+	struct hfi1_ipoib_dev_priv *priv;
+	struct sdma_engine *sde;
+	struct list_head tx_list;
+	u64 sent_txreqs;
+	atomic_t stops;
+	atomic_t ring_full;
+	atomic_t no_desc;
+	union hfi1_ipoib_flow flow;
+	u8 q_idx;
+	bool pkts_sent;
+	struct iowait wait;
+
+	atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
+	struct napi_struct *napi;
+	struct hfi1_ipoib_circ_buf tx_ring;
+};
+
+struct hfi1_ipoib_dev_priv {
+	struct hfi1_devdata *dd;
+	struct net_device   *netdev;
+	struct ib_device    *device;
+	struct hfi1_ipoib_txq *txqs;
+	struct kmem_cache *txreq_cache;
+	struct napi_struct *tx_napis;
+	u16 pkey;
+	u16 pkey_index;
+	u32 qkey;
+	u8 port_num;
+
+	const struct net_device_ops *netdev_ops;
+	struct rvt_qp *qp;
+	struct pcpu_sw_netstats __percpu *netstats;
+};
+
+/* hfi1 ipoib rdma netdev's private data structure */
+struct hfi1_ipoib_rdma_netdev {
+	struct rdma_netdev rn;  /* keep this first */
+	/* followed by device private data */
+	struct hfi1_ipoib_dev_priv dev_priv;
+};
+
+static inline struct hfi1_ipoib_dev_priv *
+hfi1_ipoib_priv(const struct net_device *dev)
+{
+	return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
+}
+
+static inline void
+hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
+			      u64 packets,
+			      u64 bytes)
+{
+	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+	u64_stats_update_begin(&netstats->syncp);
+	netstats->rx_packets += packets;
+	netstats->rx_bytes += bytes;
+	u64_stats_update_end(&netstats->syncp);
+}
+
+static inline void
+hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
+			      u64 packets,
+			      u64 bytes)
+{
+	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+	u64_stats_update_begin(&netstats->syncp);
+	netstats->tx_packets += packets;
+	netstats->tx_bytes += bytes;
+	u64_stats_update_end(&netstats->syncp);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+			struct sk_buff *skb,
+			struct ib_ah *address,
+			u32 dqpn);
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
+
+int hfi1_ipoib_rxq_init(struct net_device *dev);
+void hfi1_ipoib_rxq_deinit(struct net_device *dev);
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+				       int size, void *data);
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+			     u8 port_num,
+			     enum rdma_netdev_t type,
+			     struct rdma_netdev_alloc_params *params);
+
+#endif /* _IPOIB_H */

diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
new file mode 100644
index 0000000..22299b0
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c

@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for ipoib functionality
+ */
+
+#include "ipoib.h"
+#include "hfi.h"
+
+static u32 qpn_from_mac(u8 *mac_arr)
+{
+	return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3];
+}
+
+static int hfi1_ipoib_dev_init(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int ret;
+
+	priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+
+	ret = priv->netdev_ops->ndo_init(dev);
+	if (ret)
+		return ret;
+
+	ret = hfi1_netdev_add_data(priv->dd,
+				   qpn_from_mac(priv->netdev->dev_addr),
+				   dev);
+	if (ret < 0) {
+		priv->netdev_ops->ndo_uninit(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+	priv->netdev_ops->ndo_uninit(dev);
+}
+
+static int hfi1_ipoib_dev_open(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int ret;
+
+	ret = priv->netdev_ops->ndo_open(dev);
+	if (!ret) {
+		struct hfi1_ibport *ibp = to_iport(priv->device,
+						   priv->port_num);
+		struct rvt_qp *qp;
+		u32 qpn = qpn_from_mac(priv->netdev->dev_addr);
+
+		rcu_read_lock();
+		qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+		if (!qp) {
+			rcu_read_unlock();
+			priv->netdev_ops->ndo_stop(dev);
+			return -EINVAL;
+		}
+		rvt_get_qp(qp);
+		priv->qp = qp;
+		rcu_read_unlock();
+
+		hfi1_netdev_enable_queues(priv->dd);
+		hfi1_ipoib_napi_tx_enable(dev);
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_dev_stop(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	if (!priv->qp)
+		return 0;
+
+	hfi1_ipoib_napi_tx_disable(dev);
+	hfi1_netdev_disable_queues(priv->dd);
+
+	rvt_put_qp(priv->qp);
+	priv->qp = NULL;
+
+	return priv->netdev_ops->ndo_stop(dev);
+}
+
+static void hfi1_ipoib_dev_get_stats64(struct net_device *dev,
+				       struct rtnl_link_stats64 *storage)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	netdev_stats_to_stats64(storage, &dev->stats);
+	dev_fetch_sw_netstats(storage, priv->netstats);
+}
+
+static const struct net_device_ops hfi1_ipoib_netdev_ops = {
+	.ndo_init         = hfi1_ipoib_dev_init,
+	.ndo_uninit       = hfi1_ipoib_dev_uninit,
+	.ndo_open         = hfi1_ipoib_dev_open,
+	.ndo_stop         = hfi1_ipoib_dev_stop,
+	.ndo_get_stats64  = hfi1_ipoib_dev_get_stats64,
+};
+
+static int hfi1_ipoib_send(struct net_device *dev,
+			   struct sk_buff *skb,
+			   struct ib_ah *address,
+			   u32 dqpn)
+{
+	return hfi1_ipoib_send_dma(dev, skb, address, dqpn);
+}
+
+static int hfi1_ipoib_mcast_attach(struct net_device *dev,
+				   struct ib_device *device,
+				   union ib_gid *mgid,
+				   u16 mlid,
+				   int set_qkey,
+				   u32 qkey)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+	struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+	struct rvt_qp *qp;
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+
+	qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+	if (qp) {
+		rvt_get_qp(qp);
+		rcu_read_unlock();
+		if (set_qkey)
+			priv->qkey = qkey;
+
+		/* attach QP to multicast group */
+		ret = ib_attach_mcast(&qp->ibqp, mgid, mlid);
+		rvt_put_qp(qp);
+	} else {
+		rcu_read_unlock();
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_mcast_detach(struct net_device *dev,
+				   struct ib_device *device,
+				   union ib_gid *mgid,
+				   u16 mlid)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+	struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+	struct rvt_qp *qp;
+	int ret = -EINVAL;
+
+	rcu_read_lock();
+
+	qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+	if (qp) {
+		rvt_get_qp(qp);
+		rcu_read_unlock();
+		ret = ib_detach_mcast(&qp->ibqp, mgid, mlid);
+		rvt_put_qp(qp);
+	} else {
+		rcu_read_unlock();
+	}
+	return ret;
+}
+
+static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	hfi1_ipoib_txreq_deinit(priv);
+	hfi1_ipoib_rxq_deinit(priv->netdev);
+
+	free_percpu(priv->netstats);
+}
+
+static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+	priv->pkey_index = (u16)id;
+	ib_query_pkey(priv->device,
+		      priv->port_num,
+		      priv->pkey_index,
+		      &priv->pkey);
+}
+
+static int hfi1_ipoib_setup_rn(struct ib_device *device,
+			       u8 port_num,
+			       struct net_device *netdev,
+			       void *param)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(device);
+	struct rdma_netdev *rn = netdev_priv(netdev);
+	struct hfi1_ipoib_dev_priv *priv;
+	int rc;
+
+	rn->send = hfi1_ipoib_send;
+	rn->attach_mcast = hfi1_ipoib_mcast_attach;
+	rn->detach_mcast = hfi1_ipoib_mcast_detach;
+	rn->set_id = hfi1_ipoib_set_id;
+	rn->hca = device;
+	rn->port_num = port_num;
+	rn->mtu = netdev->mtu;
+
+	priv = hfi1_ipoib_priv(netdev);
+	priv->dd = dd;
+	priv->netdev = netdev;
+	priv->device = device;
+	priv->port_num = port_num;
+	priv->netdev_ops = netdev->netdev_ops;
+
+	ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+	rc = hfi1_ipoib_txreq_init(priv);
+	if (rc) {
+		dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+		return rc;
+	}
+
+	rc = hfi1_ipoib_rxq_init(netdev);
+	if (rc) {
+		dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+		hfi1_ipoib_txreq_deinit(priv);
+		return rc;
+	}
+
+	netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
+	netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+	netdev->needs_free_netdev = true;
+
+	return 0;
+}
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+			     u8 port_num,
+			     enum rdma_netdev_t type,
+			     struct rdma_netdev_alloc_params *params)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(device);
+
+	if (type != RDMA_NETDEV_IPOIB)
+		return -EOPNOTSUPP;
+
+	if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts)
+		return -EOPNOTSUPP;
+
+	if (!port_num || port_num > dd->num_pports)
+		return -EINVAL;
+
+	params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev);
+	params->txqs = dd->num_sdma;
+	params->rxqs = dd->num_netdev_contexts;
+	params->param = NULL;
+	params->initialize_rdma_netdev = hfi1_ipoib_setup_rn;
+
+	return 0;
+}

diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644
index 0000000..3afa754
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c

@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+	void *dst_data;
+
+	skb_checksum_none_assert(skb);
+	skb->protocol = *((__be16 *)data);
+
+	dst_data = skb_put(skb, size);
+	memcpy(dst_data, data, size);
+	skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+	skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+	struct sk_buff *skb;
+	int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+	void *frag;
+
+	skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	skb_size = SKB_DATA_ALIGN(skb_size);
+	frag = napi_alloc_frag(skb_size);
+
+	if (unlikely(!frag))
+		return napi_alloc_skb(napi, size);
+
+	skb = build_skb(frag, skb_size);
+
+	if (unlikely(!skb)) {
+		skb_free_frag(frag);
+		return NULL;
+	}
+
+	skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+	return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+				       int size, void *data)
+{
+	struct napi_struct *napi = &rxq->napi;
+	int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+	struct sk_buff *skb;
+
+	/*
+	 * For smaller(4k + skb overhead) allocations we will go using
+	 * napi cache. Otherwise we will try to use napi frag cache.
+	 */
+	if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+		skb = napi_alloc_skb(napi, skb_size);
+	else
+		skb = prepare_frag_skb(napi, skb_size);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	copy_ipoib_buf(skb, data, size);
+
+	return skb;
+}
+
+int hfi1_ipoib_rxq_init(struct net_device *netdev)
+{
+	struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+	struct hfi1_devdata *dd = ipoib_priv->dd;
+	int ret;
+
+	ret = hfi1_netdev_rx_init(dd);
+	if (ret)
+		return ret;
+
+	hfi1_init_aip_rsm(dd);
+
+	return ret;
+}
+
+void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
+{
+	struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+	struct hfi1_devdata *dd = ipoib_priv->dd;
+
+	hfi1_deinit_aip_rsm(dd);
+	hfi1_netdev_rx_destroy(dd);
+}

diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
new file mode 100644
index 0000000..ab1eeff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c

@@ -0,0 +1,856 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB SDMA functionality
+ */
+
+#include <linux/log2.h>
+#include <linux/circ_buf.h>
+
+#include "sdma.h"
+#include "verbs.h"
+#include "trace_ibhdrs.h"
+#include "ipoib.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+	struct sdma_txreq           txreq;
+	struct hfi1_sdma_header     sdma_hdr;
+	int                         sdma_status;
+	struct hfi1_ipoib_dev_priv *priv;
+	struct hfi1_ipoib_txq      *txq;
+	struct sk_buff             *skb;
+};
+
+struct ipoib_txparms {
+	struct hfi1_devdata        *dd;
+	struct rdma_ah_attr        *ah_attr;
+	struct hfi1_ibport         *ibp;
+	struct hfi1_ipoib_txq      *txq;
+	union hfi1_ipoib_flow       flow;
+	u32                         dqpn;
+	u8                          hdr_dwords;
+	u8                          entropy;
+};
+
+static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+{
+	return sent - completed;
+}
+
+static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
+{
+	return hfi1_ipoib_txreqs(txq->sent_txreqs,
+				 atomic64_read(&txq->complete_txreqs));
+}
+
+static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
+{
+	if (atomic_inc_return(&txq->stops) == 1)
+		netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
+{
+	if (atomic_dec_and_test(&txq->stops))
+		netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
+{
+	return min_t(uint, txq->priv->netdev->tx_queue_len,
+		     txq->tx_ring.max_items - 1);
+}
+
+static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
+{
+	return min_t(uint, txq->priv->netdev->tx_queue_len,
+		     txq->tx_ring.max_items) >> 1;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+	++txq->sent_txreqs;
+	if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
+	    !atomic_xchg(&txq->ring_full, 1))
+		hfi1_ipoib_stop_txq(txq);
+}
+
+static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
+{
+	struct net_device *dev = txq->priv->netdev;
+
+	/* If shutting down just return as queue state is irrelevant */
+	if (unlikely(dev->reg_state != NETREG_REGISTERED))
+		return;
+
+	/*
+	 * When the queue has been drained to less than half full it will be
+	 * restarted.
+	 * The size of the txreq ring is fixed at initialization.
+	 * The tx queue len can be adjusted upward while the interface is
+	 * running.
+	 * The tx queue len can be large enough to overflow the txreq_ring.
+	 * Use the minimum of the current tx_queue_len or the rings max txreqs
+	 * to protect against ring overflow.
+	 */
+	if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
+	    atomic_xchg(&txq->ring_full, 0))
+		hfi1_ipoib_wake_txq(txq);
+}
+
+static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
+{
+	struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+	if (likely(!tx->sdma_status)) {
+		hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len);
+	} else {
+		++priv->netdev->stats.tx_errors;
+		dd_dev_warn(priv->dd,
+			    "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
+			    __func__, tx->sdma_status,
+			    le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+			    tx->txq->sde->this_idx);
+	}
+
+	napi_consume_skb(tx->skb, budget);
+	sdma_txclean(priv->dd, &tx->txreq);
+	kmem_cache_free(priv->txreq_cache, tx);
+}
+
+static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget)
+{
+	struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+	unsigned long head;
+	unsigned long tail;
+	unsigned int max_tx;
+	int work_done;
+	int tx_count;
+
+	spin_lock_bh(&tx_ring->consumer_lock);
+
+	/* Read index before reading contents at that index. */
+	head = smp_load_acquire(&tx_ring->head);
+	tail = tx_ring->tail;
+	max_tx = tx_ring->max_items;
+
+	work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget);
+
+	for (tx_count = work_done; tx_count; tx_count--) {
+		hfi1_ipoib_free_tx(tx_ring->items[tail], budget);
+		tail = CIRC_NEXT(tail, max_tx);
+	}
+
+	atomic64_add(work_done, &txq->complete_txreqs);
+
+	/* Finished freeing tx items so store the tail value. */
+	smp_store_release(&tx_ring->tail, tail);
+
+	spin_unlock_bh(&tx_ring->consumer_lock);
+
+	hfi1_ipoib_check_queue_stopped(txq);
+
+	return work_done;
+}
+
+static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev);
+	struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis];
+
+	int work_done = hfi1_ipoib_drain_tx_ring(txq, budget);
+
+	if (work_done < budget)
+		napi_complete_done(napi, work_done);
+
+	return work_done;
+}
+
+static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx)
+{
+	struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring;
+	unsigned long head;
+	unsigned long tail;
+	size_t max_tx;
+
+	spin_lock(&tx_ring->producer_lock);
+
+	head = tx_ring->head;
+	tail = READ_ONCE(tx_ring->tail);
+	max_tx = tx_ring->max_items;
+
+	if (likely(CIRC_SPACE(head, tail, max_tx))) {
+		tx_ring->items[head] = tx;
+
+		/* Finish storing txreq before incrementing head. */
+		smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx));
+		napi_schedule(tx->txq->napi);
+	} else {
+		struct hfi1_ipoib_txq *txq = tx->txq;
+		struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+		/* Ring was full */
+		hfi1_ipoib_free_tx(tx, 0);
+		atomic64_inc(&txq->complete_txreqs);
+		dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx);
+	}
+
+	spin_unlock(&tx_ring->producer_lock);
+}
+
+static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
+{
+	struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+
+	tx->sdma_status = status;
+
+	hfi1_ipoib_add_tx(tx);
+}
+
+static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+					struct ipoib_txparms *txp)
+{
+	struct hfi1_devdata *dd = txp->dd;
+	struct sdma_txreq *txreq = &tx->txreq;
+	struct sk_buff *skb = tx->skb;
+	int ret = 0;
+	int i;
+
+	if (skb_headlen(skb)) {
+		ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
+		if (unlikely(ret))
+			return ret;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		ret = sdma_txadd_page(dd,
+				      txreq,
+				      skb_frag_page(frag),
+				      frag->bv_offset,
+				      skb_frag_size(frag));
+		if (unlikely(ret))
+			break;
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
+				    struct ipoib_txparms *txp)
+{
+	struct hfi1_devdata *dd = txp->dd;
+	struct sdma_txreq *txreq = &tx->txreq;
+	struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+	u16 pkt_bytes =
+		sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
+	int ret;
+
+	ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
+	if (unlikely(ret))
+		return ret;
+
+	/* add pbc + headers */
+	ret = sdma_txadd_kvaddr(dd,
+				txreq,
+				sdma_hdr,
+				sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
+	if (unlikely(ret))
+		return ret;
+
+	/* add the ulp payload */
+	return hfi1_ipoib_build_ulp_payload(tx, txp);
+}
+
+static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
+					   struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = tx->priv;
+	struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+	struct sk_buff *skb = tx->skb;
+	struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
+	struct rdma_ah_attr *ah_attr = txp->ah_attr;
+	struct ib_other_headers *ohdr;
+	struct ib_grh *grh;
+	u16 dwords;
+	u16 slid;
+	u16 dlid;
+	u16 lrh0;
+	u32 bth0;
+	u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
+			 priv->netdev->dev_addr[2] << 8 |
+			 priv->netdev->dev_addr[3]);
+	u16 payload_dwords;
+	u8 pad_cnt;
+
+	pad_cnt = -skb->len & 3;
+
+	/* Includes ICRC */
+	payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
+
+	/* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+	txp->hdr_dwords = 7;
+
+	if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+		grh = &sdma_hdr->hdr.ibh.u.l.grh;
+		txp->hdr_dwords +=
+			hfi1_make_grh(txp->ibp,
+				      grh,
+				      rdma_ah_read_grh(ah_attr),
+				      txp->hdr_dwords - LRH_9B_DWORDS,
+				      payload_dwords);
+		lrh0 = HFI1_LRH_GRH;
+		ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
+	} else {
+		lrh0 = HFI1_LRH_BTH;
+		ohdr = &sdma_hdr->hdr.ibh.u.oth;
+	}
+
+	lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+	lrh0 |= (txp->flow.sc5 & 0xf) << 12;
+
+	dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+	if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+		slid = be16_to_cpu(IB_LID_PERMISSIVE);
+	} else {
+		u16 lid = (u16)ppd->lid;
+
+		if (lid) {
+			lid |= rdma_ah_get_path_bits(ah_attr) &
+				((1 << ppd->lmc) - 1);
+			slid = lid;
+		} else {
+			slid = be16_to_cpu(IB_LID_PERMISSIVE);
+		}
+	}
+
+	/* Includes ICRC */
+	dwords = txp->hdr_dwords + payload_dwords;
+
+	/* Build the lrh */
+	sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
+	hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
+
+	/* Build the bth */
+	bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
+
+	ohdr->bth[0] = cpu_to_be32(bth0);
+	ohdr->bth[1] = cpu_to_be32(txp->dqpn);
+	ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs));
+
+	/* Build the deth */
+	ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
+	ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
+					  HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
+
+	/* Construct the pbc. */
+	sdma_hdr->pbc =
+		cpu_to_le64(create_pbc(ppd,
+				       ib_is_sc5(txp->flow.sc5) <<
+							      PBC_DC_INFO_SHIFT,
+				       0,
+				       sc_to_vlt(priv->dd, txp->flow.sc5),
+				       dwords - SIZE_OF_CRC +
+						(sizeof(sdma_hdr->pbc) >> 2)));
+}
+
+static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
+						      struct sk_buff *skb,
+						      struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct ipoib_txreq *tx;
+	int ret;
+
+	tx = kmem_cache_alloc_node(priv->txreq_cache,
+				   GFP_ATOMIC,
+				   priv->dd->node);
+	if (unlikely(!tx))
+		return ERR_PTR(-ENOMEM);
+
+	/* so that we can test if the sdma descriptors are there */
+	tx->txreq.num_desc = 0;
+	tx->priv = priv;
+	tx->txq = txp->txq;
+	tx->skb = skb;
+	INIT_LIST_HEAD(&tx->txreq.list);
+
+	hfi1_ipoib_build_ib_tx_headers(tx, txp);
+
+	ret = hfi1_ipoib_build_tx_desc(tx, txp);
+	if (likely(!ret)) {
+		if (txp->txq->flow.as_int != txp->flow.as_int) {
+			txp->txq->flow.tx_queue = txp->flow.tx_queue;
+			txp->txq->flow.sc5 = txp->flow.sc5;
+			txp->txq->sde =
+				sdma_select_engine_sc(priv->dd,
+						      txp->flow.tx_queue,
+						      txp->flow.sc5);
+		}
+
+		return tx;
+	}
+
+	sdma_txclean(priv->dd, &tx->txreq);
+	kmem_cache_free(priv->txreq_cache, tx);
+
+	return ERR_PTR(ret);
+}
+
+static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
+				     struct hfi1_ipoib_txq *txq)
+{
+	int ret;
+	u16 count_out;
+
+	ret = sdma_send_txlist(txq->sde,
+			       iowait_get_ib_work(&txq->wait),
+			       &txq->tx_list,
+			       &count_out);
+	if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
+		return ret;
+
+	dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
+
+	return ret;
+}
+
+static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
+				    struct hfi1_ipoib_txq *txq)
+{
+	int ret = 0;
+
+	if (!list_empty(&txq->tx_list)) {
+		/* Flush the current list */
+		ret = hfi1_ipoib_submit_tx_list(dev, txq);
+
+		if (unlikely(ret))
+			if (ret != -EBUSY)
+				++dev->stats.tx_carrier_errors;
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
+				struct ipoib_txreq *tx)
+{
+	int ret;
+
+	ret = sdma_send_txreq(txq->sde,
+			      iowait_get_ib_work(&txq->wait),
+			      &tx->txreq,
+			      txq->pkts_sent);
+	if (likely(!ret)) {
+		txq->pkts_sent = true;
+		iowait_starve_clear(txq->pkts_sent, &txq->wait);
+	}
+
+	return ret;
+}
+
+static int hfi1_ipoib_send_dma_single(struct net_device *dev,
+				      struct sk_buff *skb,
+				      struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct hfi1_ipoib_txq *txq = txp->txq;
+	struct ipoib_txreq *tx;
+	int ret;
+
+	tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+	if (IS_ERR(tx)) {
+		int ret = PTR_ERR(tx);
+
+		dev_kfree_skb_any(skb);
+
+		if (ret == -ENOMEM)
+			++dev->stats.tx_errors;
+		else
+			++dev->stats.tx_carrier_errors;
+
+		return NETDEV_TX_OK;
+	}
+
+	ret = hfi1_ipoib_submit_tx(txq, tx);
+	if (likely(!ret)) {
+tx_ok:
+		trace_sdma_output_ibhdr(tx->priv->dd,
+					&tx->sdma_hdr.hdr,
+					ib_is_sc5(txp->flow.sc5));
+		hfi1_ipoib_check_queue_depth(txq);
+		return NETDEV_TX_OK;
+	}
+
+	txq->pkts_sent = false;
+
+	if (ret == -EBUSY || ret == -ECOMM)
+		goto tx_ok;
+
+	sdma_txclean(priv->dd, &tx->txreq);
+	dev_kfree_skb_any(skb);
+	kmem_cache_free(priv->txreq_cache, tx);
+	++dev->stats.tx_carrier_errors;
+
+	return NETDEV_TX_OK;
+}
+
+static int hfi1_ipoib_send_dma_list(struct net_device *dev,
+				    struct sk_buff *skb,
+				    struct ipoib_txparms *txp)
+{
+	struct hfi1_ipoib_txq *txq = txp->txq;
+	struct ipoib_txreq *tx;
+
+	/* Has the flow change ? */
+	if (txq->flow.as_int != txp->flow.as_int) {
+		int ret;
+
+		ret = hfi1_ipoib_flush_tx_list(dev, txq);
+		if (unlikely(ret)) {
+			if (ret == -EBUSY)
+				++dev->stats.tx_dropped;
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_OK;
+		}
+	}
+	tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+	if (IS_ERR(tx)) {
+		int ret = PTR_ERR(tx);
+
+		dev_kfree_skb_any(skb);
+
+		if (ret == -ENOMEM)
+			++dev->stats.tx_errors;
+		else
+			++dev->stats.tx_carrier_errors;
+
+		return NETDEV_TX_OK;
+	}
+
+	list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+	hfi1_ipoib_check_queue_depth(txq);
+
+	trace_sdma_output_ibhdr(tx->priv->dd,
+				&tx->sdma_hdr.hdr,
+				ib_is_sc5(txp->flow.sc5));
+
+	if (!netdev_xmit_more())
+		(void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+	return NETDEV_TX_OK;
+}
+
+static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
+{
+	if (skb_transport_header_was_set(skb)) {
+		u8 *hdr = (u8 *)skb_transport_header(skb);
+
+		return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
+	}
+
+	return (u8)skb_get_queue_mapping(skb);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+			struct sk_buff *skb,
+			struct ib_ah *address,
+			u32 dqpn)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	struct ipoib_txparms txp;
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
+		dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
+			    skb->len,
+			    rn->mtu + HFI1_IPOIB_ENCAP_LEN);
+		++dev->stats.tx_dropped;
+		++dev->stats.tx_errors;
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	txp.dd = priv->dd;
+	txp.ah_attr = &ibah_to_rvtah(address)->attr;
+	txp.ibp = to_iport(priv->device, priv->port_num);
+	txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
+	txp.dqpn = dqpn;
+	txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
+	txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
+	txp.entropy = hfi1_ipoib_calc_entropy(skb);
+
+	if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
+		return hfi1_ipoib_send_dma_list(dev, skb, &txp);
+
+	return hfi1_ipoib_send_dma_single(dev, skb,  &txp);
+}
+
+/*
+ * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
+				 struct iowait_work *wait,
+				 struct sdma_txreq *txreq,
+				 uint seq,
+				 bool pkts_sent)
+{
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait->iow, struct hfi1_ipoib_txq, wait);
+
+	write_seqlock(&sde->waitlock);
+
+	if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
+		if (sdma_progress(sde, seq, txreq)) {
+			write_sequnlock(&sde->waitlock);
+			return -EAGAIN;
+		}
+
+		if (list_empty(&txreq->list))
+			/* came from non-list submit */
+			list_add_tail(&txreq->list, &txq->tx_list);
+		if (list_empty(&txq->wait.list)) {
+			if (!atomic_xchg(&txq->no_desc, 1))
+				hfi1_ipoib_stop_txq(txq);
+			iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+		}
+
+		write_sequnlock(&sde->waitlock);
+		return -EBUSY;
+	}
+
+	write_sequnlock(&sde->waitlock);
+	return -EINVAL;
+}
+
+/*
+ * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ */
+static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
+{
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait, struct hfi1_ipoib_txq, wait);
+
+	if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
+		iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
+}
+
+static void hfi1_ipoib_flush_txq(struct work_struct *work)
+{
+	struct iowait_work *ioww =
+		container_of(work, struct iowait_work, iowork);
+	struct iowait *wait = iowait_ioww_to_iow(ioww);
+	struct hfi1_ipoib_txq *txq =
+		container_of(wait, struct hfi1_ipoib_txq, wait);
+	struct net_device *dev = txq->priv->netdev;
+
+	if (likely(dev->reg_state == NETREG_REGISTERED) &&
+	    likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
+		if (atomic_xchg(&txq->no_desc, 0))
+			hfi1_ipoib_wake_txq(txq);
+}
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
+{
+	struct net_device *dev = priv->netdev;
+	char buf[HFI1_IPOIB_TXREQ_NAME_LEN];
+	unsigned long tx_ring_size;
+	int i;
+
+	/*
+	 * Ring holds 1 less than tx_ring_size
+	 * Round up to next power of 2 in order to hold at least tx_queue_len
+	 */
+	tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1);
+
+	snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit);
+	priv->txreq_cache = kmem_cache_create(buf,
+					      sizeof(struct ipoib_txreq),
+					      0,
+					      0,
+					      NULL);
+	if (!priv->txreq_cache)
+		return -ENOMEM;
+
+	priv->tx_napis = kcalloc_node(dev->num_tx_queues,
+				      sizeof(struct napi_struct),
+				      GFP_KERNEL,
+				      priv->dd->node);
+	if (!priv->tx_napis)
+		goto free_txreq_cache;
+
+	priv->txqs = kcalloc_node(dev->num_tx_queues,
+				  sizeof(struct hfi1_ipoib_txq),
+				  GFP_KERNEL,
+				  priv->dd->node);
+	if (!priv->txqs)
+		goto free_tx_napis;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		iowait_init(&txq->wait,
+			    0,
+			    hfi1_ipoib_flush_txq,
+			    NULL,
+			    hfi1_ipoib_sdma_sleep,
+			    hfi1_ipoib_sdma_wakeup,
+			    NULL,
+			    NULL);
+		txq->priv = priv;
+		txq->sde = NULL;
+		INIT_LIST_HEAD(&txq->tx_list);
+		atomic64_set(&txq->complete_txreqs, 0);
+		atomic_set(&txq->stops, 0);
+		atomic_set(&txq->ring_full, 0);
+		atomic_set(&txq->no_desc, 0);
+		txq->q_idx = i;
+		txq->flow.tx_queue = 0xff;
+		txq->flow.sc5 = 0xff;
+		txq->pkts_sent = false;
+
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     priv->dd->node);
+
+		txq->tx_ring.items =
+			kcalloc_node(tx_ring_size,
+				     sizeof(struct ipoib_txreq *),
+				     GFP_KERNEL, priv->dd->node);
+		if (!txq->tx_ring.items)
+			goto free_txqs;
+
+		spin_lock_init(&txq->tx_ring.producer_lock);
+		spin_lock_init(&txq->tx_ring.consumer_lock);
+		txq->tx_ring.max_items = tx_ring_size;
+
+		txq->napi = &priv->tx_napis[i];
+		netif_tx_napi_add(dev, txq->napi,
+				  hfi1_ipoib_process_tx_ring,
+				  NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+
+free_txqs:
+	for (i--; i >= 0; i--) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		netif_napi_del(txq->napi);
+		kfree(txq->tx_ring.items);
+	}
+
+	kfree(priv->txqs);
+	priv->txqs = NULL;
+
+free_tx_napis:
+	kfree(priv->tx_napis);
+	priv->tx_napis = NULL;
+
+free_txreq_cache:
+	kmem_cache_destroy(priv->txreq_cache);
+	priv->txreq_cache = NULL;
+	return -ENOMEM;
+}
+
+static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
+{
+	struct sdma_txreq *txreq;
+	struct sdma_txreq *txreq_tmp;
+	atomic64_t *complete_txreqs = &txq->complete_txreqs;
+
+	list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
+		struct ipoib_txreq *tx =
+			container_of(txreq, struct ipoib_txreq, txreq);
+
+		list_del(&txreq->list);
+		sdma_txclean(txq->priv->dd, &tx->txreq);
+		dev_kfree_skb_any(tx->skb);
+		kmem_cache_free(txq->priv->txreq_cache, tx);
+		atomic64_inc(complete_txreqs);
+	}
+
+	if (hfi1_ipoib_used(txq))
+		dd_dev_warn(txq->priv->dd,
+			    "txq %d not empty found %llu requests\n",
+			    txq->q_idx,
+			    hfi1_ipoib_txreqs(txq->sent_txreqs,
+					      atomic64_read(complete_txreqs)));
+}
+
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->netdev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		iowait_cancel_work(&txq->wait);
+		iowait_sdma_drain(&txq->wait);
+		hfi1_ipoib_drain_tx_list(txq);
+		netif_napi_del(txq->napi);
+		(void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+		kfree(txq->tx_ring.items);
+	}
+
+	kfree(priv->txqs);
+	priv->txqs = NULL;
+
+	kfree(priv->tx_napis);
+	priv->tx_napis = NULL;
+
+	kmem_cache_destroy(priv->txreq_cache);
+	priv->txreq_cache = NULL;
+}
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		napi_enable(txq->napi);
+	}
+}
+
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
+{
+	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+		napi_disable(txq->napi);
+		(void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+	}
+}

diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index d8ff063..3222e3a 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c

@@ -721,7 +721,7 @@
 			/* Bad mkey not a violation below level 2 */
 			if (ibp->rvp.mkeyprot < 2)
 				break;
-			/* fall through */
+			fallthrough;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
 			if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -1272,7 +1272,7 @@
 	case IB_PORT_NOP:
 		if (phys_state == IB_PORTPHYSSTATE_NOP)
 			break;
-		/* FALLTHROUGH */
+		fallthrough;
 	case IB_PORT_DOWN:
 		if (phys_state == IB_PORTPHYSSTATE_NOP) {
 			link_state = HLS_DN_DOWNDEF;
@@ -2300,7 +2300,6 @@
 	 * can be changed from the default values
 	 */
 	case OPA_VLARB_PREEMPT_ELEMENTS:
-		/* FALLTHROUGH */
 	case OPA_VLARB_PREEMPT_MATRIX:
 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
 		break;
@@ -2381,7 +2380,7 @@
 		__be64 port_vl_rcv_bubble;
 		__be64 port_vl_mark_fecn;
 		__be64 port_vl_xmit_discards;
-	} vls[0]; /* real array size defined by # bits set in vl_select_mask */
+	} vls[]; /* real array size defined by # bits set in vl_select_mask */
 };
 
 enum counter_selects {
@@ -2423,7 +2422,7 @@
 	__be16 attr_id;
 	__be16 err_reqlength;	/* 1 bit, 8 res, 7 bit */
 	__be32 attr_mod;
-	u8 data[0];
+	u8 data[];
 };
 
 #define MSK_LLI 0x000000f0
@@ -4170,7 +4169,7 @@
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
-		/* FALLTHROUGH */
+		fallthrough;
 	default:
 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
 		ret = reply((struct ib_mad_hdr *)smp);
@@ -4240,7 +4239,7 @@
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
-		/* FALLTHROUGH */
+		fallthrough;
 	default:
 		smp->status |= IB_SMP_UNSUP_METH_ATTR;
 		ret = reply((struct ib_mad_hdr *)smp);
@@ -4915,16 +4914,11 @@
  */
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
-		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index)
+		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
+		     size_t *out_mad_size, u16 *out_mad_pkey_index)
 {
-	switch (in_mad->base_version) {
+	switch (in_mad->mad_hdr.base_version) {
 	case OPA_MGMT_BASE_VERSION:
-		if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
-			dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
-			return IB_MAD_RESULT_FAILURE;
-		}
 		return hfi1_process_opa_mad(ibdev, mad_flags, port,
 					    in_wc, in_grh,
 					    (struct opa_mad *)in_mad,
@@ -4932,10 +4926,8 @@
 					    out_mad_size,
 					    out_mad_pkey_index);
 	case IB_MGMT_BASE_VERSION:
-		return hfi1_process_ib_mad(ibdev, mad_flags, port,
-					  in_wc, in_grh,
-					  (const struct ib_mad *)in_mad,
-					  (struct ib_mad *)out_mad);
+		return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+					   in_grh, in_mad, out_mad);
 	default:
 		break;
 	}

diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 2f48e69..889e63d 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h

@@ -165,7 +165,7 @@
 		} __packed ntc_2048;
 
 	};
-	u8	class_data[0];
+	u8	class_data[];
 };
 
 #define IB_VLARB_LOWPRI_0_31    1

diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 14d2a90..d213f65 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -48,23 +49,11 @@
 #include <linux/rculist.h>
 #include <linux/mmu_notifier.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
 
 #include "mmu_rb.h"
 #include "trace.h"
 
-struct mmu_rb_handler {
-	struct mmu_notifier mn;
-	struct rb_root_cached root;
-	void *ops_arg;
-	spinlock_t lock;        /* protect the RB tree */
-	struct mmu_rb_ops *ops;
-	struct mm_struct *mm;
-	struct list_head lru_list;
-	struct work_struct del_work;
-	struct list_head del_list;
-	struct workqueue_struct *wq;
-};
-
 static unsigned long mmu_node_start(struct mmu_rb_node *);
 static unsigned long mmu_node_last(struct mmu_rb_node *);
 static int mmu_notifier_range_start(struct mmu_notifier *,
@@ -92,37 +81,36 @@
 	return PAGE_ALIGN(node->addr + node->len) - 1;
 }
 
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+int hfi1_mmu_rb_register(void *ops_arg,
 			 struct mmu_rb_ops *ops,
 			 struct workqueue_struct *wq,
 			 struct mmu_rb_handler **handler)
 {
-	struct mmu_rb_handler *handlr;
+	struct mmu_rb_handler *h;
 	int ret;
 
-	handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
-	if (!handlr)
+	h = kzalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
 		return -ENOMEM;
 
-	handlr->root = RB_ROOT_CACHED;
-	handlr->ops = ops;
-	handlr->ops_arg = ops_arg;
-	INIT_HLIST_NODE(&handlr->mn.hlist);
-	spin_lock_init(&handlr->lock);
-	handlr->mn.ops = &mn_opts;
-	handlr->mm = mm;
-	INIT_WORK(&handlr->del_work, handle_remove);
-	INIT_LIST_HEAD(&handlr->del_list);
-	INIT_LIST_HEAD(&handlr->lru_list);
-	handlr->wq = wq;
+	h->root = RB_ROOT_CACHED;
+	h->ops = ops;
+	h->ops_arg = ops_arg;
+	INIT_HLIST_NODE(&h->mn.hlist);
+	spin_lock_init(&h->lock);
+	h->mn.ops = &mn_opts;
+	INIT_WORK(&h->del_work, handle_remove);
+	INIT_LIST_HEAD(&h->del_list);
+	INIT_LIST_HEAD(&h->lru_list);
+	h->wq = wq;
 
-	ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+	ret = mmu_notifier_register(&h->mn, current->mm);
 	if (ret) {
-		kfree(handlr);
+		kfree(h);
 		return ret;
 	}
 
-	*handler = handlr;
+	*handler = h;
 	return 0;
 }
 
@@ -134,7 +122,7 @@
 	struct list_head del_list;
 
 	/* Unregister first so we don't get any more notifications. */
-	mmu_notifier_unregister(&handler->mn, handler->mm);
+	mmu_notifier_unregister(&handler->mn, handler->mn.mm);
 
 	/*
 	 * Make sure the wq delete handler is finished running.  It will not
@@ -166,6 +154,10 @@
 	int ret = 0;
 
 	trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
+
+	if (current->mm != handler->mn.mm)
+		return -EPERM;
+
 	spin_lock_irqsave(&handler->lock, flags);
 	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
 	if (node) {
@@ -180,6 +172,7 @@
 		__mmu_int_rb_remove(mnode, &handler->root);
 		list_del(&mnode->list); /* remove from LRU list */
 	}
+	mnode->handler = handler;
 unlock:
 	spin_unlock_irqrestore(&handler->lock, flags);
 	return ret;
@@ -217,6 +210,9 @@
 	unsigned long flags;
 	bool ret = false;
 
+	if (current->mm != handler->mn.mm)
+		return ret;
+
 	spin_lock_irqsave(&handler->lock, flags);
 	node = __mmu_rb_search(handler, addr, len);
 	if (node) {
@@ -239,6 +235,9 @@
 	unsigned long flags;
 	bool stop = false;
 
+	if (current->mm != handler->mn.mm)
+		return;
+
 	INIT_LIST_HEAD(&del_list);
 
 	spin_lock_irqsave(&handler->lock, flags);
@@ -272,6 +271,9 @@
 {
 	unsigned long flags;
 
+	if (current->mm != handler->mn.mm)
+		return;
+
 	/* Validity of handler and node pointers has been checked by caller. */
 	trace_hfi1_mmu_rb_remove(node->addr, node->len);
 	spin_lock_irqsave(&handler->lock, flags);
@@ -333,7 +335,7 @@
 
 /*
  * Work queue function to remove all nodes that have been queued up to
- * be removed.  The key feature is that mm->mmap_sem is not being held
+ * be removed.  The key feature is that mm->mmap_lock is not being held
  * and the remove callback can sleep while taking it, if needed.
  */
 static void handle_remove(struct work_struct *work)

diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index f04cec1..423aacc 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h

@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -54,6 +55,7 @@
 	unsigned long len;
 	unsigned long __last;
 	struct rb_node node;
+	struct mmu_rb_handler *handler;
 	struct list_head list;
 };
 
@@ -71,7 +73,19 @@
 		     void *evict_arg, bool *stop);
 };
 
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+struct mmu_rb_handler {
+	struct mmu_notifier mn;
+	struct rb_root_cached root;
+	void *ops_arg;
+	spinlock_t lock;        /* protect the RB tree */
+	struct mmu_rb_ops *ops;
+	struct list_head lru_list;
+	struct work_struct del_work;
+	struct list_head del_list;
+	struct workqueue_struct *wq;
+};
+
+int hfi1_mmu_rb_register(void *ops_arg,
 			 struct mmu_rb_ops *ops,
 			 struct workqueue_struct *wq,
 			 struct mmu_rb_handler **handler);

diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d920b16..d61ee85 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
 #include "hfi.h"
 #include "affinity.h"
 #include "sdma.h"
+#include "netdev.h"
 
 /**
  * msix_initialize() - Calculate, request and configure MSIx IRQs
@@ -69,7 +70,7 @@
 	 *	one for each VNIC context
 	 *      ...any new IRQs should be added here.
 	 */
-	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+	total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts;
 
 	if (total >= CCE_NUM_MSIX_VECTORS)
 		return -EINVAL;
@@ -115,13 +116,11 @@
  */
 static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
 			    irq_handler_t handler, irq_handler_t thread,
-			    u32 idx, enum irq_type type)
+			    enum irq_type type, const char *name)
 {
 	unsigned long nr;
 	int irq;
 	int ret;
-	const char *err_info;
-	char name[MAX_NAME_SIZE];
 	struct hfi1_msix_entry *me;
 
 	/* Allocate an MSIx vector */
@@ -135,43 +134,15 @@
 	if (nr == dd->msix_info.max_requested)
 		return -ENOSPC;
 
-	/* Specific verification and determine the name */
-	switch (type) {
-	case IRQ_GENERAL:
-		/* general interrupt must be MSIx vector 0 */
-		if (nr) {
-			spin_lock(&dd->msix_info.msix_lock);
-			__clear_bit(nr, dd->msix_info.in_use_msix);
-			spin_unlock(&dd->msix_info.msix_lock);
-			dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
-				   nr);
-			return -EINVAL;
-		}
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
-		err_info = "general";
-		break;
-	case IRQ_SDMA:
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
-			 dd->unit, idx);
-		err_info = "sdma";
-		break;
-	case IRQ_RCVCTXT:
-		snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
-			 dd->unit, idx);
-		err_info = "receive context";
-		break;
-	case IRQ_OTHER:
-	default:
+	if (type < IRQ_SDMA || type >= IRQ_OTHER)
 		return -EINVAL;
-	}
-	name[sizeof(name) - 1] = 0;
 
 	irq = pci_irq_vector(dd->pcidev, nr);
 	ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
 	if (ret) {
 		dd_dev_err(dd,
-			   "%s: request for IRQ %d failed, MSIx %d, err %d\n",
-			   err_info, irq, idx, ret);
+			   "%s: request for IRQ %d failed, MSIx %lx, err %d\n",
+			   name, irq, nr, ret);
 		spin_lock(&dd->msix_info.msix_lock);
 		__clear_bit(nr, dd->msix_info.in_use_msix);
 		spin_unlock(&dd->msix_info.msix_lock);
@@ -190,22 +161,19 @@
 	/* This is a request, so a failure is not fatal */
 	ret = hfi1_get_irq_affinity(dd, me);
 	if (ret)
-		dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+		dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret);
 
 	return nr;
 }
 
-/**
- * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
- * @rcd: valid rcd context
- *
- */
-int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+				       irq_handler_t handler,
+				       irq_handler_t thread,
+				       const char *name)
 {
-	int nr;
-
-	nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
-			      receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+	int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+				  rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
+				  name);
 	if (nr < 0)
 		return nr;
 
@@ -222,6 +190,37 @@
 }
 
 /**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+		 rcd->dd->unit, rcd->ctxt);
+
+	return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+					   receive_context_thread, name);
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * for netdev context
+ * @rcd: valid netdev contexti
+ */
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d",
+		 rcd->dd->unit, rcd->ctxt);
+	return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi,
+					   NULL, name);
+}
+
+/**
  * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
  * @sde: valid sdma engine
  *
@@ -229,9 +228,12 @@
 int msix_request_sdma_irq(struct sdma_engine *sde)
 {
 	int nr;
+	char name[MAX_NAME_SIZE];
 
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+		 sde->dd->unit, sde->this_idx);
 	nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
-			      sde->this_idx, IRQ_SDMA);
+			      IRQ_SDMA, name);
 	if (nr < 0)
 		return nr;
 	sde->msix_intr = nr;
@@ -241,6 +243,32 @@
 }
 
 /**
+ * msix_request_general_irq(void) - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+	int nr;
+	char name[MAX_NAME_SIZE];
+
+	snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+	nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+			      name);
+	if (nr < 0)
+		return nr;
+
+	/* general interrupt must be MSIx vector 0 */
+	if (nr) {
+		msix_free_irq(dd, (u8)nr);
+		dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * enable_sdma_src() - Helper to enable SDMA IRQ srcs
  * @dd: valid devdata structure
  * @i: index of SDMA engine
@@ -265,10 +293,9 @@
 int msix_request_irqs(struct hfi1_devdata *dd)
 {
 	int i;
-	int ret;
+	int ret = msix_request_general_irq(dd);
 
-	ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
-	if (ret < 0)
+	if (ret)
 		return ret;
 
 	for (i = 0; i < dd->num_sdma; i++) {
@@ -345,15 +372,16 @@
 }
 
 /**
- * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * msix_netdev_syncrhonize_irq() - netdev IRQ synchronize
  * @dd: valid devdata
  */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
 {
 	int i;
+	int ctxt_count = hfi1_netdev_ctxt_count(dd);
 
-	for (i = 0; i < dd->vnic.num_ctxt; i++) {
-		struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+	for (i = 0; i < ctxt_count; i++) {
+		struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
 		struct hfi1_msix_entry *me;
 
 		me = &dd->msix_info.msix_entries[rcd->msix_intr];

diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index a514881..e63e944 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h

@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -54,11 +54,13 @@
 int msix_initialize(struct hfi1_devdata *dd);
 int msix_request_irqs(struct hfi1_devdata *dd);
 void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
 int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
 int msix_request_sdma_irq(struct sdma_engine *sde);
 void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
 
-/* VNIC interface */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+/* Netdev interface */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
 
 #endif

diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644
index 0000000..947543a
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev.h

@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * dummy netdev. Both IPoIB and VNIC netdevices will be working on
+ * top of this device.
+ * @napi: napi object
+ * @priv: ptr to netdev_priv
+ * @rcd:  ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+	struct napi_struct napi;
+	struct hfi1_netdev_priv *priv;
+	struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS   8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_priv: data required to setup and run HFI netdev.
+ * @dd:		hfi1_devdata
+ * @rxq:	pointer to dummy netdev receive queues.
+ * @num_rx_q:	number of receive queues
+ * @rmt_index:	first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl:	netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled:	atomic counter of netdevs enabling receive queues.
+ *		When 0 NAPI will be disabled.
+ * @netdevs:	atomic counter of netdevs using dummy netdev.
+ *		When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_priv {
+	struct hfi1_devdata *dd;
+	struct hfi1_netdev_rxq *rxq;
+	int num_rx_q;
+	int rmt_start;
+	struct xarray dev_tbl;
+	/* count of enabled napi polls */
+	atomic_t enabled;
+	/* count of netdevs on top */
+	atomic_t netdevs;
+};
+
+static inline
+struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
+{
+	return (struct hfi1_netdev_priv *)&dev[1];
+}
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->rxq[ctxt].rcd;
+}
+
+static inline
+int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return priv->rmt_start;
+}
+
+static inline
+void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	priv->rmt_start = rmt_idx;
+}
+
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+			     struct cpumask *cpu_mask);
+
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd);
+int hfi1_netdev_alloc(struct hfi1_devdata *dd);
+void hfi1_netdev_free(struct hfi1_devdata *dd);
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+/* chip.c  */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget);
+
+#endif /* HFI1_NETDEV_H */

diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644
index 0000000..ea95baa
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c

@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv,
+				  struct hfi1_ctxtdata *uctxt)
+{
+	unsigned int rcvctrl_ops;
+	struct hfi1_devdata *dd = priv->dd;
+	int ret;
+
+	uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
+	uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
+
+	/* Now allocate the RcvHdr queue and eager buffers. */
+	ret = hfi1_create_rcvhdrq(dd, uctxt);
+	if (ret)
+		goto done;
+
+	ret = hfi1_setup_eagerbufs(uctxt);
+	if (ret)
+		goto done;
+
+	clear_rcvhdrtail(uctxt);
+
+	rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
+	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
+
+	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
+done:
+	return ret;
+}
+
+static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
+				     struct hfi1_ctxtdata **ctxt)
+{
+	struct hfi1_ctxtdata *uctxt;
+	int ret;
+
+	if (dd->flags & HFI1_FROZEN)
+		return -EIO;
+
+	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
+	if (ret < 0) {
+		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+		return -ENOMEM;
+	}
+
+	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+		HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+		HFI1_CAP_KGET(NODROP_EGR_FULL) |
+		HFI1_CAP_KGET(DMA_RTAIL);
+	/* Netdev contexts are always NO_RDMA_RTAIL */
+	uctxt->fast_handler = handle_receive_interrupt_napi_fp;
+	uctxt->slow_handler = handle_receive_interrupt_napi_sp;
+	hfi1_set_seq_cnt(uctxt, 1);
+	uctxt->is_vnic = true;
+
+	hfi1_stats.sps_ctxts++;
+
+	dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
+	*ctxt = uctxt;
+
+	return 0;
+}
+
+static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
+					struct hfi1_ctxtdata *uctxt)
+{
+	flush_wc();
+
+	/*
+	 * Disable receive context and interrupt available, reset all
+	 * RcvCtxtCtrl bits to default values.
+	 */
+	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+		     HFI1_RCVCTRL_TIDFLOW_DIS |
+		     HFI1_RCVCTRL_INTRAVAIL_DIS |
+		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+
+	if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
+		msix_free_irq(dd, uctxt->msix_intr);
+
+	uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
+	uctxt->event_flags = 0;
+
+	hfi1_clear_tids(uctxt);
+	hfi1_clear_ctxt_pkey(dd, uctxt);
+
+	hfi1_stats.sps_ctxts--;
+
+	hfi1_free_ctxt(uctxt);
+}
+
+static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv,
+				  struct hfi1_ctxtdata **ctxt)
+{
+	int rc;
+	struct hfi1_devdata *dd = priv->dd;
+
+	rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
+	if (rc) {
+		dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
+		return rc;
+	}
+
+	rc = hfi1_netdev_setup_ctxt(priv, *ctxt);
+	if (rc) {
+		dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
+		hfi1_netdev_deallocate_ctxt(dd, *ctxt);
+		*ctxt = NULL;
+	}
+
+	return rc;
+}
+
+/**
+ * hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
+ * @dd: device on which to allocate netdev contexts
+ * @available_contexts: count of available receive contexts
+ * @cpu_mask: mask of possible cpus to include for contexts
+ *
+ * Return: count of physical cores on a node or the remaining available recv
+ * contexts for netdev recv context usage up to the maximum of
+ * HFI1_MAX_NETDEV_CTXTS.
+ * A value of 0 can be returned when acceleration is explicitly turned off,
+ * a memory allocation error occurs or when there are no available contexts.
+ *
+ */
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+			     struct cpumask *cpu_mask)
+{
+	cpumask_var_t node_cpu_mask;
+	unsigned int available_cpus;
+
+	if (!HFI1_CAP_IS_KSET(AIP))
+		return 0;
+
+	/* Always give user contexts priority over netdev contexts */
+	if (available_contexts == 0) {
+		dd_dev_info(dd, "No receive contexts available for netdevs.\n");
+		return 0;
+	}
+
+	if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
+		dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
+		return 0;
+	}
+
+	cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
+
+	available_cpus = cpumask_weight(node_cpu_mask);
+
+	free_cpumask_var(node_cpu_mask);
+
+	return min3(available_cpus, available_contexts,
+		    (u32)HFI1_MAX_NETDEV_CTXTS);
+}
+
+static int hfi1_netdev_rxq_init(struct net_device *dev)
+{
+	int i;
+	int rc;
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+	struct hfi1_devdata *dd = priv->dd;
+
+	priv->num_rx_q = dd->num_netdev_contexts;
+	priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq),
+				 GFP_KERNEL, dd->node);
+
+	if (!priv->rxq) {
+		dd_dev_err(dd, "Unable to allocate netdev queue data\n");
+		return (-ENOMEM);
+	}
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd);
+		if (rc)
+			goto bail_context_irq_failure;
+
+		hfi1_rcd_get(rxq->rcd);
+		rxq->priv = priv;
+		rxq->rcd->napi = &rxq->napi;
+		dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
+			    i, rxq->rcd->ctxt);
+		/*
+		 * Disable BUSY_POLL on this NAPI as this is not supported
+		 * right now.
+		 */
+		set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
+		netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+		rc = msix_netdev_request_rcd_irq(rxq->rcd);
+		if (rc)
+			goto bail_context_irq_failure;
+	}
+
+	return 0;
+
+bail_context_irq_failure:
+	dd_dev_err(dd, "Unable to allot receive context\n");
+	for (; i >= 0; i--) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		if (rxq->rcd) {
+			hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+			hfi1_rcd_put(rxq->rcd);
+			rxq->rcd = NULL;
+		}
+	}
+	kfree(priv->rxq);
+	priv->rxq = NULL;
+
+	return rc;
+}
+
+static void hfi1_netdev_rxq_deinit(struct net_device *dev)
+{
+	int i;
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+	struct hfi1_devdata *dd = priv->dd;
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		netif_napi_del(&rxq->napi);
+		hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+		hfi1_rcd_put(rxq->rcd);
+		rxq->rcd = NULL;
+	}
+
+	kfree(priv->rxq);
+	priv->rxq = NULL;
+	priv->num_rx_q = 0;
+}
+
+static void enable_queues(struct hfi1_netdev_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i,
+			    rxq->rcd->ctxt);
+		napi_enable(&rxq->napi);
+		hfi1_rcvctrl(priv->dd,
+			     HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
+			     rxq->rcd);
+	}
+}
+
+static void disable_queues(struct hfi1_netdev_priv *priv)
+{
+	int i;
+
+	msix_netdev_synchronize_irq(priv->dd);
+
+	for (i = 0; i < priv->num_rx_q; i++) {
+		struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+		dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i,
+			    rxq->rcd->ctxt);
+
+		/* wait for napi if it was scheduled */
+		hfi1_rcvctrl(priv->dd,
+			     HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
+			     rxq->rcd);
+		napi_synchronize(&rxq->napi);
+		napi_disable(&rxq->napi);
+	}
+}
+
+/**
+ * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
+ * it allocates receive queue data and calls netif_napi_add
+ * for each queue.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+	int res;
+
+	if (atomic_fetch_inc(&priv->netdevs))
+		return 0;
+
+	mutex_lock(&hfi1_mutex);
+	init_dummy_netdev(dd->dummy_netdev);
+	res = hfi1_netdev_rxq_init(dd->dummy_netdev);
+	mutex_unlock(&hfi1_mutex);
+	return res;
+}
+
+/**
+ * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
+ * napi is deleted and receive queses memory is freed.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	/* destroy the RX queues only if it is the last netdev going away */
+	if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) {
+		mutex_lock(&hfi1_mutex);
+		hfi1_netdev_rxq_deinit(dd->dummy_netdev);
+		mutex_unlock(&hfi1_mutex);
+	}
+
+	return 0;
+}
+
+/**
+ * hfi1_netdev_alloc - Allocates netdev and private data. It is required
+ * because RMT index and MSI-X interrupt can be set only
+ * during driver initialization.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_alloc(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+	const int netdev_size = sizeof(*dd->dummy_netdev) +
+		sizeof(struct hfi1_netdev_priv);
+
+	dd_dev_info(dd, "allocating netdev size %d\n", netdev_size);
+	dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node);
+
+	if (!dd->dummy_netdev)
+		return -ENOMEM;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	priv->dd = dd;
+	xa_init(&priv->dev_tbl);
+	atomic_set(&priv->enabled, 0);
+	atomic_set(&priv->netdevs, 0);
+
+	return 0;
+}
+
+void hfi1_netdev_free(struct hfi1_devdata *dd)
+{
+	if (dd->dummy_netdev) {
+		dd_dev_info(dd, "hfi1 netdev freed\n");
+		kfree(dd->dummy_netdev);
+		dd->dummy_netdev = NULL;
+	}
+}
+
+/**
+ * hfi1_netdev_enable_queues - This is napi enable function.
+ * It enables napi objects associated with queues.
+ * When at least one device has called it it increments atomic counter.
+ * Disable function decrements counter and when it is 0,
+ * calls napi_disable for every queue.
+ *
+ * @dd: hfi1 dev data
+ */
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+
+	if (!dd->dummy_netdev)
+		return;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	if (atomic_fetch_inc(&priv->enabled))
+		return;
+
+	mutex_lock(&hfi1_mutex);
+	enable_queues(priv);
+	mutex_unlock(&hfi1_mutex);
+}
+
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
+{
+	struct hfi1_netdev_priv *priv;
+
+	if (!dd->dummy_netdev)
+		return;
+
+	priv = hfi1_netdev_priv(dd->dummy_netdev);
+	if (atomic_dec_if_positive(&priv->enabled))
+		return;
+
+	mutex_lock(&hfi1_mutex);
+	disable_queues(priv);
+	mutex_unlock(&hfi1_mutex);
+}
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_erase(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+	return xa_load(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+	struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+	unsigned long index = *start_id;
+	void *ret;
+
+	ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+	*start_id = (int)index;
+	return ret;
+}

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 61362bd..18d32f0 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c

@@ -161,7 +161,7 @@
 		return -EINVAL;
 	}
 
-	dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+	dd->kregbase1 = ioremap(addr, RCV_ARRAY);
 	if (!dd->kregbase1) {
 		dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
 		return -ENOMEM;
@@ -179,7 +179,7 @@
 	dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
 	dd->base2_start  = RCV_ARRAY + rcv_array_count * 8;
 
-	dd->kregbase2 = ioremap_nocache(
+	dd->kregbase2 = ioremap(
 		addr + dd->base2_start,
 		TXE_PIO_SEND - dd->base2_start);
 	if (!dd->kregbase2) {
@@ -306,7 +306,7 @@
 	ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
 	if (ret) {
 		dd_dev_err(dd, "Unable to read from PCI config\n");
-		return ret;
+		return pcibios_err_to_errno(ret);
 	}
 
 	if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
@@ -334,10 +334,14 @@
 	return 0;
 }
 
-/* restore command and BARs after a reset has wiped them out */
+/**
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
 int restore_pci_variables(struct hfi1_devdata *dd)
 {
-	int ret = 0;
+	int ret;
 
 	ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
 	if (ret)
@@ -386,13 +390,17 @@
 
 error:
 	dd_dev_err(dd, "Unable to write to PCI config\n");
-	return ret;
+	return pcibios_err_to_errno(ret);
 }
 
-/* Save BARs and command to rewrite after device reset */
+/**
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
 int save_pci_variables(struct hfi1_devdata *dd)
 {
-	int ret = 0;
+	int ret;
 
 	ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
 				    &dd->pcibar0);
@@ -441,7 +449,7 @@
 
 error:
 	dd_dev_err(dd, "Unable to read from PCI config\n");
-	return ret;
+	return pcibios_err_to_errno(ret);
 }
 
 /*

diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 79126b2..1cd8f80 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c

@@ -86,7 +86,7 @@
 	switch (op) {
 	case PSC_GLOBAL_ENABLE:
 		reg |= SEND_CTRL_SEND_ENABLE_SMASK;
-	/* Fall through */
+		fallthrough;
 	case PSC_DATA_VL_ENABLE:
 		mask = 0;
 		for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
@@ -920,6 +920,7 @@
 {
 	u64 reg;
 	struct pio_buf *pbuf;
+	LIST_HEAD(wake_list);
 
 	if (!sc)
 		return;
@@ -954,19 +955,21 @@
 	spin_unlock(&sc->release_lock);
 
 	write_seqlock(&sc->waitlock);
-	while (!list_empty(&sc->piowait)) {
+	if (!list_empty(&sc->piowait))
+		list_move(&sc->piowait, &wake_list);
+	write_sequnlock(&sc->waitlock);
+	while (!list_empty(&wake_list)) {
 		struct iowait *wait;
 		struct rvt_qp *qp;
 		struct hfi1_qp_priv *priv;
 
-		wait = list_first_entry(&sc->piowait, struct iowait, list);
+		wait = list_first_entry(&wake_list, struct iowait, list);
 		qp = iowait_to_qp(wait);
 		priv = qp->priv;
 		list_del_init(&priv->s_iowait.list);
 		priv->s_iowait.lock = NULL;
 		hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 	}
-	write_sequnlock(&sc->waitlock);
 
 	spin_unlock_irq(&sc->alloc_lock);
 }

diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index c9a58b6..0102262 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h

@@ -243,7 +243,7 @@
  */
 struct pio_map_elem {
 	u32 mask;
-	struct send_context *ksc[0];
+	struct send_context *ksc[];
 };
 
 /*
@@ -263,7 +263,7 @@
 	u32 mask;
 	u8 actual_vls;
 	u8 vls;
-	struct pio_map_elem *map[0];
+	struct pio_map_elem *map[];
 };
 
 int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,

diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024ce..4a4ec23 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c

@@ -191,25 +191,24 @@
 	switch (n) {
 	case 7:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 6:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 5:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 4:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 3:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 2:
 		*dest++ = *src++;
-		/* fall through */
+		fallthrough;
 	case 1:
 		*dest++ = *src++;
-		/* fall through */
 	}
 }
 

diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index cbf7faa..4642d6c 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c

@@ -634,7 +634,7 @@
 			   u32 config_data, const char *message)
 {
 	u8 i;
-	int ret = HCMD_SUCCESS;
+	int ret;
 
 	for (i = 0; i < 4; i++) {
 		ret = load_8051_config(ppd->dd, field_id, i, config_data);
@@ -668,8 +668,8 @@
 
 	/* active optical cables only */
 	switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
-	case 0x0 ... 0x9: /* fallthrough */
-	case 0xC: /* fallthrough */
+	case 0x0 ... 0x9: fallthrough;
+	case 0xC: fallthrough;
 	case 0xE:
 		/* active AOC */
 		power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -899,8 +899,8 @@
 
 		*ptr_tuning_method = OPA_PASSIVE_TUNING;
 		break;
-	case 0x0 ... 0x9: /* fallthrough */
-	case 0xC: /* fallthrough */
+	case 0x0 ... 0x9: fallthrough;
+	case 0xC: fallthrough;
 	case 0xE:
 		ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
 				       ptr_total_atten);
@@ -909,7 +909,7 @@
 
 		*ptr_tuning_method = OPA_ACTIVE_TUNING;
 		break;
-	case 0xD: /* fallthrough */
+	case 0xD: fallthrough;
 	case 0xF:
 	default:
 		dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index acd4400..356518e 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2019 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -186,15 +186,6 @@
 	write_sequnlock_irqrestore(lock, flags);
 }
 
-static inline int opa_mtu_enum_to_int(int mtu)
-{
-	switch (mtu) {
-	case OPA_MTU_8192:  return 8192;
-	case OPA_MTU_10240: return 10240;
-	default:            return -1;
-	}
-}
-
 /**
  * This function is what we would push to the core layer if we wanted to be a
  * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
@@ -202,15 +193,10 @@
  */
 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 {
-	int val;
-
 	/* Constraining 10KB packets to 8KB packets */
 	if (mtu == (enum ib_mtu)OPA_MTU_10240)
-		mtu = OPA_MTU_8192;
-	val = opa_mtu_enum_to_int((int)mtu);
-	if (val > 0)
-		return val;
-	return ib_mtu_enum_to_int(mtu);
+		mtu = (enum ib_mtu)OPA_MTU_8192;
+	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
 }
 
 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -326,7 +312,7 @@
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_RC:
 		hfi1_setup_tid_rdma_wqe(qp, wqe);
-		/* fall through */
+		fallthrough;
 	case IB_QPT_UC:
 		if (wqe->length > 0x80000000U)
 			return -EINVAL;

diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b670321..b0d053d 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h

@@ -113,20 +113,6 @@
 }
 
 /**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata);
-
-/**
  * hfi1_qp_wakeup - wake up on the indicated event
  * @qp: the QP
  * @flag: flag the qp on which the qp is stalled

diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index b596699..8386c84 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c

@@ -231,7 +231,7 @@
 		break;
 	case 2:
 		offset_bytes[1] = (offset >> 8) & 0xff;
-		/* fall through */
+		fallthrough;
 	case 1:
 		num_msgs = 2;
 		offset_bytes[0] = offset & 0xff;
@@ -279,7 +279,7 @@
 		break;
 	case 2:
 		offset_bytes[1] = (offset >> 8) & 0xff;
-		/* fall through */
+		fallthrough;
 	case 1:
 		num_msgs = 2;
 		offset_bytes[0] = offset & 0xff;

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a3c647..1bb5f57 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c

@@ -141,7 +141,7 @@
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		release_rdma_sge_mr(e);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(ATOMIC_ACKNOWLEDGE):
 		/*
 		 * We can increment the tail pointer now that the last
@@ -160,7 +160,7 @@
 			qp->s_acked_ack_queue = next;
 		qp->s_tail_ack_queue = next;
 		trace_hfi1_rsp_make_rc_ack(qp, e->psn);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_ONLY):
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
@@ -267,7 +267,7 @@
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		ps->s_txreq->ss = &qp->s_ack_rdma_sge;
 		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -881,8 +881,7 @@
 				goto bail;
 			}
 			qp->s_num_rd_atomic++;
-
-			/* FALLTHROUGH */
+			fallthrough;
 		case IB_WR_OPFN:
 			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
@@ -946,10 +945,10 @@
 		 * See restart_rc().
 		 */
 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_FIRST):
 		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
 		ss = &qp->s_sge;
@@ -991,10 +990,10 @@
 		 * See restart_rc().
 		 */
 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(RDMA_WRITE_FIRST):
 		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
 		ss = &qp->s_sge;
@@ -2599,7 +2598,7 @@
 	 * to be sent before sending this one.
 	 */
 	e = NULL;
-	old_req = 1;
+	old_req = true;
 	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
@@ -2901,7 +2900,7 @@
 		if (!ret)
 			goto rnr_nak;
 		qp->r_rcv_len = 0;
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_MIDDLE):
 	case OP(RDMA_WRITE_MIDDLE):
 send_middle:
@@ -2941,7 +2940,7 @@
 			goto no_immediate_data;
 		if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
 			goto send_last_inv;
-		/* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
+		fallthrough;	/* for SEND_ONLY_WITH_IMMEDIATE */
 	case OP(SEND_LAST_WITH_IMMEDIATE):
 send_last_imm:
 		wc.ex.imm_data = ohdr->u.imm_data;
@@ -2957,7 +2956,7 @@
 		goto send_last;
 	case OP(RDMA_WRITE_LAST):
 		copy_last = rvt_is_user_qp(qp);
-		/* fall through */
+		fallthrough;
 	case OP(SEND_LAST):
 no_immediate_data:
 		wc.wc_flags = 0;
@@ -3010,7 +3009,7 @@
 
 	case OP(RDMA_WRITE_ONLY):
 		copy_last = rvt_is_user_qp(qp);
-		/* fall through */
+		fallthrough;
 	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))

diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 248be21..0b73dc7 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c

@@ -232,11 +232,11 @@
 static void sdma_complete(struct kref *);
 static void sdma_finalput(struct sdma_state *);
 static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
 static void sdma_put(struct sdma_state *);
 static void sdma_set_state(struct sdma_engine *, enum sdma_states);
 static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
 static void sdma_sendctrl(struct sdma_engine *, unsigned);
 static void init_sdma_regs(struct sdma_engine *, u32, uint);
 static void sdma_process_event(
@@ -545,9 +545,10 @@
 	schedule_work(&sde->err_halt_worker);
 }
 
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
 {
-	struct sdma_engine *sde = (struct sdma_engine *)opaque;
+	struct sdma_engine *sde = from_tasklet(sde, t,
+					       sdma_hw_clean_up_task);
 	u64 statuscsr;
 
 	while (1) {
@@ -604,9 +605,9 @@
 		sdma_desc_avail(sde, sdma_descq_freecnt(sde));
 }
 
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
 {
-	struct sdma_engine *sde = (struct sdma_engine *)opaque;
+	struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
 	unsigned long flags;
 
 	spin_lock_irqsave(&sde->tail_lock, flags);
@@ -833,7 +834,7 @@
 struct sdma_rht_map_elem {
 	u32 mask;
 	u8 ctr;
-	struct sdma_engine *sde[0];
+	struct sdma_engine *sde[];
 };
 
 struct sdma_rht_node {
@@ -848,7 +849,7 @@
 	.nelem_hint = NR_CPUS_HINT,
 	.head_offset = offsetof(struct sdma_rht_node, node),
 	.key_offset = offsetof(struct sdma_rht_node, cpu_id),
-	.key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+	.key_len = sizeof_field(struct sdma_rht_node, cpu_id),
 	.max_size = NR_CPUS,
 	.min_size = 8,
 	.automatic_shrinking = true,
@@ -879,10 +880,10 @@
 	if (current->nr_cpus_allowed != 1)
 		goto out;
 
-	cpu_id = smp_processor_id();
 	rcu_read_lock();
-	rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
-					  sdma_rht_params);
+	cpu_id = smp_processor_id();
+	rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+				     sdma_rht_params);
 
 	if (rht_node && rht_node->map[vl]) {
 		struct sdma_rht_map_elem *map = rht_node->map[vl];
@@ -1454,11 +1455,10 @@
 		sde->tail_csr =
 			get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
 
-		tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
-			     (unsigned long)sde);
-
-		tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-			     (unsigned long)sde);
+		tasklet_setup(&sde->sdma_hw_clean_up_task,
+			      sdma_hw_clean_up_task);
+		tasklet_setup(&sde->sdma_sw_clean_up_task,
+			      sdma_sw_clean_up_task);
 		INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
 		INIT_WORK(&sde->flush_worker, sdma_field_flush);
 
@@ -2584,7 +2584,7 @@
 			 * 7220, e.g.
 			 */
 			ss->go_s99_running = 1;
-			/* fall through -- and start dma engine */
+			fallthrough;	/* and start dma engine */
 		case sdma_event_e10_go_hw_start:
 			/* This reference means the state machine is started */
 			sdma_get(&sde->state);
@@ -2726,7 +2726,6 @@
 		case sdma_event_e70_go_idle:
 			break;
 		case sdma_event_e85_link_down:
-			/* fall through */
 		case sdma_event_e80_hw_freeze:
 			sdma_set_state(sde, sdma_state_s80_hw_freeze);
 			atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3007,7 +3006,7 @@
 		case sdma_event_e60_hw_halted:
 			need_progress = 1;
 			sdma_err_progress_check_schedule(sde);
-			/* fall through */
+			fallthrough;
 		case sdma_event_e90_sw_halted:
 			/*
 			* SW initiated halt does not perform engines
@@ -3021,7 +3020,7 @@
 			break;
 		case sdma_event_e85_link_down:
 			ss->go_s99_running = 0;
-			/* fall through */
+			fallthrough;
 		case sdma_event_e80_hw_freeze:
 			sdma_set_state(sde, sdma_state_s80_hw_freeze);
 			atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3251,7 +3250,7 @@
 		tx->num_desc++;
 		tx->descs[2].qw[0] = 0;
 		tx->descs[2].qw[1] = 0;
-		/* FALLTHROUGH */
+		fallthrough;
 	case SDMA_AHG_APPLY_UPDATE2:
 		tx->num_desc++;
 		tx->descs[1].qw[0] = 0;

diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 1e2e40f..7a85119 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h

@@ -1002,7 +1002,7 @@
  */
 struct sdma_map_elem {
 	u32 mask;
-	struct sdma_engine *sde[0];
+	struct sdma_engine *sde[];
 };
 
 /**
@@ -1024,7 +1024,7 @@
 	u32 mask;
 	u8 actual_vls;
 	u8 vls;
-	struct sdma_map_elem *map[0];
+	struct sdma_map_elem *map[];
 };
 
 int sdma_map_init(

diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index c018fc6..73d197e 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c

@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
  *
  */
 
@@ -194,7 +194,7 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+	p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
 	p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
 	p->jkey = priv->rcd->jkey;
 	p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
@@ -3228,7 +3228,7 @@
 	case IB_WR_RDMA_READ:
 		if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
 			break;
-		/* fall through */
+		fallthrough;
 	case IB_WR_TID_RDMA_READ:
 		switch (prev->wr.opcode) {
 		case IB_WR_RDMA_READ:
@@ -5068,7 +5068,7 @@
 		if (priv->s_state == TID_OP(WRITE_REQ))
 			hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
 		priv->s_state = TID_OP(WRITE_DATA);
-		/* fall through */
+		fallthrough;
 
 	case TID_OP(WRITE_DATA):
 		/*

diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 9a3d236..b219ea9 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -47,6 +47,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 #include "exp_rcv.h"
+#include "ipoib.h"
 
 static u8 __get_ib_hdr_len(struct ib_header *hdr)
 {
@@ -126,6 +127,7 @@
 #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
 #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
 #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
 #define IETH_PRN "ieth rkey:0x%.8x"
 #define ATOMICACKETH_PRN "origdata:%llx"
 #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
@@ -444,6 +446,12 @@
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
+		trace_seq_printf(p, DETH_ENTROPY_PRN,
+				 be32_to_cpu(eh->ud.deth[0]),
+				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
+				 be32_to_cpu(eh->ud.deth[1]) >>
+					     HFI1_IPOIB_ENTROPY_SHIFT);
+		break;
 	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, DETH_PRN,
 				 be32_to_cpu(eh->ud.deth[0]),
@@ -512,6 +520,38 @@
 	return EXP_TID_GET(ent, IDX);
 }
 
+struct hfi1_ctxt_hist {
+	atomic_t count;
+	atomic_t data[255];
+};
+
+struct hfi1_ctxt_hist hist = {
+	.count = ATOMIC_INIT(0)
+};
+
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt)
+{
+	int i, len = ARRAY_SIZE(hist.data);
+	const char *ret = trace_seq_buffer_ptr(p);
+	unsigned long packet_count = atomic_fetch_inc(&hist.count);
+
+	trace_seq_printf(p, "packet[%lu]", packet_count);
+	for (i = 0; i < len; ++i) {
+		unsigned long val;
+		atomic_t *count = &hist.data[i];
+
+		if (ctxt == i)
+			val = atomic_fetch_inc(count);
+		else
+			val = atomic_read(count);
+
+		if (val)
+			trace_seq_printf(p, "(%d:%lu)", i, val);
+	}
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
 __hfi1_trace_fn(AFFINITY);
 __hfi1_trace_fn(PKT);
 __hfi1_trace_fn(PROC);

diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index e00c8a7..d8c168d 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h

@@ -1,5 +1,5 @@
 /*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2020 Intel Corporation.
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
@@ -80,7 +80,7 @@
 			   __entry->credits = uctxt->sc->credits;
 			   __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
 			   __entry->piobase = uctxt->sc->base_addr;
-			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+			   __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
 			   __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
 			   __entry->eager_cnt = uctxt->egrbufs.alloced;
 			   __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
@@ -138,6 +138,15 @@
 		      )
 );
 
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt);
+TRACE_EVENT(ctxt_rsm_hist,
+	    TP_PROTO(unsigned int ctxt),
+	    TP_ARGS(ctxt),
+	    TP_STRUCT__entry(__field(unsigned int, ctxt)),
+	    TP_fast_assign(__entry->ctxt = ctxt;),
+	    TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt))
+);
+
 #endif /* __HFI1_TRACE_CTXTS_H */
 
 #undef TRACE_INCLUDE_PATH

diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 3cec960..168079e 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h

@@ -106,19 +106,8 @@
 			     ),
 	    TP_fast_assign(DD_DEV_ASSIGN(dd);
 			__entry->ctxt = rcd->ctxt;
-			if (rcd->do_interrupt ==
-			    &handle_receive_interrupt) {
-				__entry->slow_path = 1;
-				__entry->dma_rtail = 0xFF;
-			} else if (rcd->do_interrupt ==
-					&handle_receive_interrupt_dma_rtail){
-				__entry->dma_rtail = 1;
-				__entry->slow_path = 0;
-			} else if (rcd->do_interrupt ==
-					&handle_receive_interrupt_nodma_rtail) {
-				__entry->dma_rtail = 0;
-				__entry->slow_path = 0;
-			}
+			__entry->slow_path = hfi1_is_slowpath(rcd);
+			__entry->dma_rtail = get_dma_rtail_setting(rcd);
 			),
 	    TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
 		      __get_str(dev),

diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index 343fb98..985ffa9 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h

@@ -138,10 +138,10 @@
 	TP_ARGS(dd, index, type, pa, order),
 	TP_STRUCT__entry(/* entry */
 		DD_DEV_ENTRY(dd)
-		__field(unsigned long, pa);
-		__field(u32, index);
-		__field(u32, type);
-		__field(u16, order);
+		__field(unsigned long, pa)
+		__field(u32, index)
+		__field(u32, type)
+		__field(u16, order)
 	),
 	TP_fast_assign(/* assign */
 		DD_DEV_ASSIGN(dd);

diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 09eb0c9..769e5e4 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h

@@ -588,7 +588,7 @@
 	    TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
 	    TP_ARGS(dd, ctxt, subctxt, i),
 	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd);
+		    DD_DEV_ENTRY(dd)
 		    __field(u16, ctxt)
 		    __field(u8, subctxt)
 		    __field(u8, ver_opcode)

diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 0c77f18..1fb9183 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c

@@ -216,7 +216,7 @@
 
 	case OP(SEND_FIRST):
 		qp->s_state = OP(SEND_MIDDLE);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_MIDDLE):
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -241,7 +241,7 @@
 
 	case OP(RDMA_WRITE_FIRST):
 		qp->s_state = OP(RDMA_WRITE_MIDDLE);
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(RDMA_WRITE_MIDDLE):
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -414,7 +414,7 @@
 			goto no_immediate_data;
 		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
 			goto send_last_imm;
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(SEND_MIDDLE):
 		/* Check for invalid length PMTU or posted rwqe len. */
 		/*
@@ -515,7 +515,7 @@
 			wc.ex.imm_data = ohdr->u.rc.imm_data;
 			goto rdma_last_imm;
 		}
-		/* FALLTHROUGH */
+		fallthrough;
 	case OP(RDMA_WRITE_MIDDLE):
 		/* Check for invalid length PMTU or posted rwqe len. */
 		if (unlikely(tlen != (hdrsize + pmtu + 4)))

diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4d73235..b94fc7f 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -59,11 +60,11 @@
 			      struct tid_user_buf *tbuf,
 			      u32 rcventry, struct tid_group *grp,
 			      u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
 				    struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+			      const struct mmu_notifier_range *range,
+			      unsigned long cur_seq);
 static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
 			    struct tid_group *grp,
 			    unsigned int start, u16 count,
@@ -73,10 +74,8 @@
 			      struct tid_group **grp);
 static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
 
-static struct mmu_rb_ops tid_rb_ops = {
-	.insert = tid_rb_insert,
-	.remove = tid_rb_remove,
-	.invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+	.invalidate = tid_rb_invalidate,
 };
 
 /*
@@ -87,7 +86,6 @@
 int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
 			   struct hfi1_ctxtdata *uctxt)
 {
-	struct hfi1_devdata *dd = uctxt->dd;
 	int ret = 0;
 
 	fd->entry_to_rb = kcalloc(uctxt->expected_count,
@@ -106,20 +104,7 @@
 			fd->entry_to_rb = NULL;
 			return -ENOMEM;
 		}
-
-		/*
-		 * Register MMU notifier callbacks. If the registration
-		 * fails, continue without TID caching for this context.
-		 */
-		ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
-					   dd->pport->hfi1_wq,
-					   &fd->handler);
-		if (ret) {
-			dd_dev_info(dd,
-				    "Failed MMU notifier registration %d\n",
-				    ret);
-			ret = 0;
-		}
+		fd->use_mn = true;
 	}
 
 	/*
@@ -136,7 +121,7 @@
 	 * init.
 	 */
 	spin_lock(&fd->tid_lock);
-	if (uctxt->subctxt_cnt && fd->handler) {
+	if (uctxt->subctxt_cnt && fd->use_mn) {
 		u16 remainder;
 
 		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -155,20 +140,12 @@
 {
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 
-	/*
-	 * The notifier would have been removed when the process'es mm
-	 * was freed.
-	 */
-	if (fd->handler) {
-		hfi1_mmu_rb_unregister(fd->handler);
-	} else {
-		mutex_lock(&uctxt->exp_mutex);
-		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
-			unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
-		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
-			unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
-		mutex_unlock(&uctxt->exp_mutex);
-	}
+	mutex_lock(&uctxt->exp_mutex);
+	if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+		unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+	if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+		unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+	mutex_unlock(&uctxt->exp_mutex);
 
 	kfree(fd->invalid_tids);
 	fd->invalid_tids = NULL;
@@ -197,15 +174,18 @@
 {
 	struct page **pages;
 	struct hfi1_devdata *dd = fd->uctxt->dd;
+	struct mm_struct *mm;
 
 	if (mapped) {
 		pci_unmap_single(dd->pcidev, node->dma_addr,
-				 node->mmu.len, PCI_DMA_FROMDEVICE);
+				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
 		pages = &node->pages[idx];
+		mm = mm_from_tid_node(node);
 	} else {
 		pages = &tidbuf->pages[idx];
+		mm = current->mm;
 	}
-	hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+	hfi1_release_user_pages(mm, pages, npages, mapped);
 	fd->tid_n_pinned -= npages;
 }
 
@@ -230,13 +210,6 @@
 		return -EINVAL;
 	}
 
-	/* Verify that access is OK for the user buffer */
-	if (!access_ok((void __user *)vaddr,
-		       npages * PAGE_SIZE)) {
-		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-			   (void *)vaddr, npages);
-		return -EFAULT;
-	}
 	/* Allocate the array of struct page pointers needed for pinning */
 	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
 	if (!pages)
@@ -247,12 +220,12 @@
 	 * pages, accept the amount pinned so far and program only that.
 	 * User space knows how to deal with partially programmed buffers.
 	 */
-	if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+	if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
 		kfree(pages);
 		return -ENOMEM;
 	}
 
-	pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+	pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
 	if (pinned <= 0) {
 		kfree(pages);
 		return pinned;
@@ -776,8 +749,7 @@
 		return -EFAULT;
 	}
 
-	node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
-	node->mmu.len = npages * PAGE_SIZE;
+	node->fdata = fd;
 	node->phys = page_to_phys(pages[0]);
 	node->npages = npages;
 	node->rcventry = rcventry;
@@ -786,23 +758,35 @@
 	node->freed = false;
 	memcpy(node->pages, pages, sizeof(struct page *) * npages);
 
-	if (!fd->handler)
-		ret = tid_rb_insert(fd, &node->mmu);
-	else
-		ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
-	if (ret) {
-		hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
-			  node->rcventry, node->mmu.addr, node->phys, ret);
-		pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
-				 PCI_DMA_FROMDEVICE);
-		kfree(node);
-		return -EFAULT;
+	if (fd->use_mn) {
+		ret = mmu_interval_notifier_insert(
+			&node->notifier, current->mm,
+			tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+			&tid_mn_ops);
+		if (ret)
+			goto out_unmap;
+		/*
+		 * FIXME: This is in the wrong order, the notifier should be
+		 * established before the pages are pinned by pin_rcv_pages.
+		 */
+		mmu_interval_read_begin(&node->notifier);
 	}
+	fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
 	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
 	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
-			       node->mmu.addr, node->phys, phys);
+			       node->notifier.interval_tree.start, node->phys,
+			       phys);
 	return 0;
+
+out_unmap:
+	hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+		  node->rcventry, node->notifier.interval_tree.start,
+		  node->phys, ret);
+	pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+			 PCI_DMA_FROMDEVICE);
+	kfree(node);
+	return -EFAULT;
 }
 
 static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@@ -832,10 +816,9 @@
 	if (grp)
 		*grp = node->grp;
 
-	if (!fd->handler)
-		cacheless_tid_rb_remove(fd, node);
-	else
-		hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+	if (fd->use_mn)
+		mmu_interval_notifier_remove(&node->notifier);
+	cacheless_tid_rb_remove(fd, node);
 
 	return 0;
 }
@@ -846,7 +829,8 @@
 	struct hfi1_devdata *dd = uctxt->dd;
 
 	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
-				 node->npages, node->mmu.addr, node->phys,
+				 node->npages,
+				 node->notifier.interval_tree.start, node->phys,
 				 node->dma_addr);
 
 	/*
@@ -893,30 +877,29 @@
 				if (!node || node->rcventry != rcventry)
 					continue;
 
+				if (fd->use_mn)
+					mmu_interval_notifier_remove(
+						&node->notifier);
 				cacheless_tid_rb_remove(fd, node);
 			}
 		}
 	}
 }
 
-/*
- * Always return 0 from this function.  A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM.  Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away.  PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+			      const struct mmu_notifier_range *range,
+			      unsigned long cur_seq)
 {
-	struct hfi1_filedata *fdata = arg;
-	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
 	struct tid_rb_node *node =
-		container_of(mnode, struct tid_rb_node, mmu);
+		container_of(mni, struct tid_rb_node, notifier);
+	struct hfi1_filedata *fdata = node->fdata;
+	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
 
 	if (node->freed)
-		return 0;
+		return true;
 
-	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+				 node->notifier.interval_tree.start,
 				 node->rcventry, node->npages, node->dma_addr);
 	node->freed = true;
 
@@ -945,18 +928,7 @@
 		fdata->invalid_tid_idx++;
 	}
 	spin_unlock(&fdata->invalid_lock);
-	return 0;
-}
-
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
-{
-	struct hfi1_filedata *fdata = arg;
-	struct tid_rb_node *tnode =
-		container_of(node, struct tid_rb_node, mmu);
-	u32 base = fdata->uctxt->expected_base;
-
-	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
-	return 0;
+	return true;
 }
 
 static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -967,12 +939,3 @@
 	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
 	clear_tid_node(fdata, tnode);
 }
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
-	struct hfi1_filedata *fdata = arg;
-	struct tid_rb_node *tnode =
-		container_of(node, struct tid_rb_node, mmu);
-
-	cacheless_tid_rb_remove(fdata, tnode);
-}

diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 43b105d..d45c7b6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h

@@ -1,6 +1,7 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -65,14 +66,15 @@
 };
 
 struct tid_rb_node {
-	struct mmu_rb_node mmu;
+	struct mmu_interval_notifier notifier;
+	struct hfi1_filedata *fdata;
 	unsigned long phys;
 	struct tid_group *grp;
 	u32 rcventry;
 	dma_addr_t dma_addr;
 	bool freed;
 	unsigned int npages;
-	struct page *pages[0];
+	struct page *pages[];
 };
 
 static inline int num_user_pages(unsigned long addr,
@@ -94,4 +96,9 @@
 int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
 			      struct hfi1_tid_info *tinfo);
 
+static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
+{
+	return node->notifier.mm;
+}
+
 #endif /* _HFI1_USER_EXP_RCV_H */

diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 469acb9..3b50500 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c

@@ -106,7 +106,7 @@
 	int ret;
 	unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
 
-	ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
+	ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages);
 	if (ret < 0)
 		return ret;
 
@@ -118,7 +118,7 @@
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty)
 {
-	put_user_pages_dirty_lock(p, npages, dirty);
+	unpin_user_pages_dirty_lock(p, npages, dirty);
 
 	if (mm) { /* during close after signal, mm can be NULL */
 		atomic64_sub(npages, &mm->pinned_vm);

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a92346e..4a4956f 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -188,7 +189,6 @@
 	atomic_set(&pq->n_reqs, 0);
 	init_waitqueue_head(&pq->wait);
 	atomic_set(&pq->n_locked, 0);
-	pq->mm = fd->mm;
 
 	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
 		    activate_packet_queue, NULL, NULL);
@@ -230,7 +230,7 @@
 
 	cq->nentries = hfi1_sdma_comp_ring_size;
 
-	ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+	ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
 				   &pq->handler);
 	if (ret) {
 		dd_dev_err(dd, "Failed to register with MMU %d", ret);
@@ -980,13 +980,13 @@
 
 	npages -= node->npages;
 retry:
-	if (!hfi1_can_pin_pages(pq->dd, pq->mm,
+	if (!hfi1_can_pin_pages(pq->dd, current->mm,
 				atomic_read(&pq->n_locked), npages)) {
 		cleared = sdma_cache_evict(pq, npages);
 		if (cleared >= npages)
 			goto retry;
 	}
-	pinned = hfi1_acquire_user_pages(pq->mm,
+	pinned = hfi1_acquire_user_pages(current->mm,
 					 ((unsigned long)iovec->iov.iov_base +
 					 (node->npages * PAGE_SIZE)), npages, 0,
 					 pages + node->npages);
@@ -995,7 +995,7 @@
 		return pinned;
 	}
 	if (pinned != npages) {
-		unpin_vector_pages(pq->mm, pages, node->npages, pinned);
+		unpin_vector_pages(current->mm, pages, node->npages, pinned);
 		return -EFAULT;
 	}
 	kfree(node->pages);
@@ -1008,7 +1008,8 @@
 static void unpin_sdma_pages(struct sdma_mmu_node *node)
 {
 	if (node->npages) {
-		unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
+		unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+				   node->npages);
 		atomic_sub(node->npages, &node->pq->n_locked);
 	}
 }

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 9972e0e..1e8c02f 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h

@@ -1,6 +1,7 @@
 #ifndef _HFI1_USER_SDMA_H
 #define _HFI1_USER_SDMA_H
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -133,7 +134,6 @@
 	unsigned long unpinned;
 	struct mmu_rb_handler *handler;
 	atomic_t n_locked;
-	struct mm_struct *mm;
 };
 
 struct hfi1_user_sdma_comp_q {
@@ -250,4 +250,9 @@
 				   struct iovec *iovec, unsigned long dim,
 				   unsigned long *count);
 
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+	return node->rb.handler->mn.mm;
+}
+
 #endif /* _HFI1_USER_SDMA_H */

diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2f6323a..3591923 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -66,6 +66,7 @@
 #include "vnic.h"
 #include "fault.h"
 #include "affinity.h"
+#include "ipoib.h"
 
 static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -1342,7 +1343,7 @@
 			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
 			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
 			IB_DEVICE_MEM_MGT_EXTENSIONS |
-			IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
+			IB_DEVICE_RDMA_NETDEV_OPA;
 	rdi->dparms.props.page_size_cap = PAGE_SIZE;
 	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
 	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1360,7 +1361,6 @@
 	rdi->dparms.props.max_cq = hfi1_max_cqs;
 	rdi->dparms.props.max_ah = hfi1_max_ahs;
 	rdi->dparms.props.max_cqe = hfi1_max_cqes;
-	rdi->dparms.props.max_map_per_fmr = 32767;
 	rdi->dparms.props.max_pd = hfi1_max_pds;
 	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
 	rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1424,7 +1424,7 @@
 	props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
 	props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
 	/* see rate_show() in ib core/sysfs.c */
-	props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+	props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
 	props->max_vl_num = ppd->vls_supported;
 
 	/* Once we are a "first class" citizen and have added the OPA MTUs to
@@ -1439,6 +1439,8 @@
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+	props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
+				ib_mtu_enum_to_int(props->max_mtu);
 
 	return 0;
 }
@@ -1793,6 +1795,7 @@
 	.modify_device = modify_device,
 	/* keep process mad in the driver */
 	.process_mad = hfi1_process_mad,
+	.rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
 };
 
 /**
@@ -1863,9 +1866,8 @@
 	dd->verbs_dev.rdi.dparms.qpn_start = 0;
 	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
 	dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
-	dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
-	dd->verbs_dev.rdi.dparms.qpn_res_end =
-	dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+	dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
 	dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
 	dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
 	dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;

diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index ae9582d..d36e3e1 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h

@@ -107,9 +107,9 @@
 	HFI1_HAS_GRH = (1 << 0),
 };
 
-#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
 #define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
-#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
 #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
 
 /* 24Bits for qpn, upper 8Bits reserved */
@@ -330,9 +330,8 @@
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
-		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
-		     u16 *out_mad_pkey_index);
+		     const struct ib_mad *in_mad, struct ib_mad *out_mad,
+		     size_t *out_mad_size, u16 *out_mad_pkey_index);
 
 /*
  * The PSN_MASK and PSN_SHIFT allow for

diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index bfa6e08..d2d526c 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h

@@ -91,7 +91,7 @@
 	tx->mr = NULL;
 	tx->sde = priv->s_sde;
 	tx->psc = priv->s_sendcontext;
-	/* so that we can test if the sdma decriptors are there */
+	/* so that we can test if the sdma descriptors are there */
 	tx->txreq.num_desc = 0;
 	/* Set the header type */
 	tx->phdr.hdr.hdr_type = priv->hdr_type;

diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 5ae7815..66150a1 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h

@@ -1,7 +1,7 @@
 #ifndef _HFI1_VNIC_H
 #define _HFI1_VNIC_H
 /*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
 #define HFI1_VNIC_SC_SHIFT      4
 
 #define HFI1_VNIC_MAX_QUEUE 16
+#define HFI1_NUM_VNIC_CTXT 8
 
 /**
  * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
@@ -104,7 +105,6 @@
 	struct hfi1_vnic_vport_info *vinfo;
 	struct net_device           *netdev;
 	struct napi_struct           napi;
-	struct sk_buff_head          skbq;
 };
 
 /**
@@ -146,7 +146,6 @@
 
 /* vnic hfi1 internal functions */
 void hfi1_vnic_setup(struct hfi1_devdata *dd);
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
 int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
 void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
 

diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index b49e60e..a90824d 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c

@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2017 - 2018 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
 #include <linux/if_vlan.h>
 
 #include "vnic.h"
+#include "netdev.h"
 
 #define HFI_TX_TIMEOUT_MS 1000
 
@@ -62,114 +63,6 @@
 
 static DEFINE_SPINLOCK(vport_cntr_lock);
 
-static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
-{
-	unsigned int rcvctrl_ops = 0;
-	int ret;
-
-	uctxt->do_interrupt = &handle_receive_interrupt;
-
-	/* Now allocate the RcvHdr queue and eager buffers. */
-	ret = hfi1_create_rcvhdrq(dd, uctxt);
-	if (ret)
-		goto done;
-
-	ret = hfi1_setup_eagerbufs(uctxt);
-	if (ret)
-		goto done;
-
-	if (uctxt->rcvhdrtail_kvaddr)
-		clear_rcvhdrtail(uctxt);
-
-	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
-	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
-
-	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
-		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
-		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
-		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
-	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
-		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
-
-	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
-done:
-	return ret;
-}
-
-static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
-			      struct hfi1_ctxtdata **vnic_ctxt)
-{
-	struct hfi1_ctxtdata *uctxt;
-	int ret;
-
-	if (dd->flags & HFI1_FROZEN)
-		return -EIO;
-
-	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
-	if (ret < 0) {
-		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
-		return -ENOMEM;
-	}
-
-	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
-			HFI1_CAP_KGET(NODROP_RHQ_FULL) |
-			HFI1_CAP_KGET(NODROP_EGR_FULL) |
-			HFI1_CAP_KGET(DMA_RTAIL);
-	uctxt->seq_cnt = 1;
-	uctxt->is_vnic = true;
-
-	msix_request_rcd_irq(uctxt);
-
-	hfi1_stats.sps_ctxts++;
-	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
-	*vnic_ctxt = uctxt;
-
-	return 0;
-}
-
-static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
-				 struct hfi1_ctxtdata *uctxt)
-{
-	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
-	flush_wc();
-
-	/*
-	 * Disable receive context and interrupt available, reset all
-	 * RcvCtxtCtrl bits to default values.
-	 */
-	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
-		     HFI1_RCVCTRL_TIDFLOW_DIS |
-		     HFI1_RCVCTRL_INTRAVAIL_DIS |
-		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
-		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
-		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
-
-	/* msix_intr will always be > 0, only clean up if this is true */
-	if (uctxt->msix_intr)
-		msix_free_irq(dd, uctxt->msix_intr);
-
-	uctxt->event_flags = 0;
-
-	hfi1_clear_tids(uctxt);
-	hfi1_clear_ctxt_pkey(dd, uctxt);
-
-	hfi1_stats.sps_ctxts--;
-
-	hfi1_free_ctxt(uctxt);
-}
-
-void hfi1_vnic_setup(struct hfi1_devdata *dd)
-{
-	xa_init(&dd->vnic.vesws);
-}
-
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
-{
-	WARN_ON(!xa_empty(&dd->vnic.vesws));
-}
-
 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 		u64 *src64, *dst64;                            \
 		for (src64 = &qstats->x_grp.unicast,           \
@@ -179,6 +72,9 @@
 		}                                              \
 	} while (0)
 
+#define VNIC_MASK (0xFF)
+#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
+
 /* hfi1_vnic_update_stats - update statistics */
 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 				   struct opa_vnic_stats *stats)
@@ -454,71 +350,25 @@
 	return rc;
 }
 
-static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
+						  int vesw_id)
 {
-	unsigned char *pad_info;
-	struct sk_buff *skb;
+	int vnic_id = VNIC_ID(vesw_id);
 
-	skb = skb_dequeue(&rxq->skbq);
-	if (unlikely(!skb))
+	return hfi1_netdev_get_data(dd, vnic_id);
+}
+
+static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
+{
+	struct hfi1_vnic_vport_info *vinfo;
+	int next_id = VNIC_ID(0);
+
+	vinfo = hfi1_netdev_get_first_data(dd, &next_id);
+
+	if (next_id > VNIC_ID(VNIC_MASK))
 		return NULL;
 
-	/* remove tail padding and icrc */
-	pad_info = skb->data + skb->len - 1;
-	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
-		       ((*pad_info) & 0x7)));
-
-	return skb;
-}
-
-/* hfi1_vnic_handle_rx - handle skb receive */
-static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
-				int *work_done, int work_to_do)
-{
-	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
-	struct sk_buff *skb;
-	int rc;
-
-	while (1) {
-		if (*work_done >= work_to_do)
-			break;
-
-		skb = hfi1_vnic_get_skb(rxq);
-		if (unlikely(!skb))
-			break;
-
-		rc = hfi1_vnic_decap_skb(rxq, skb);
-		/* update rx counters */
-		hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
-		if (unlikely(rc)) {
-			dev_kfree_skb_any(skb);
-			continue;
-		}
-
-		skb_checksum_none_assert(skb);
-		skb->protocol = eth_type_trans(skb, rxq->netdev);
-
-		napi_gro_receive(&rxq->napi, skb);
-		(*work_done)++;
-	}
-}
-
-/* hfi1_vnic_napi - napi receive polling callback function */
-static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
-{
-	struct hfi1_vnic_rx_queue *rxq = container_of(napi,
-					      struct hfi1_vnic_rx_queue, napi);
-	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
-	int work_done = 0;
-
-	v_dbg("napi %d budget %d\n", rxq->idx, budget);
-	hfi1_vnic_handle_rx(rxq, &work_done, budget);
-
-	v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
-	if (work_done < budget)
-		napi_complete(napi);
-
-	return work_done;
+	return vinfo;
 }
 
 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
@@ -527,13 +377,14 @@
 	struct hfi1_vnic_vport_info *vinfo = NULL;
 	struct hfi1_vnic_rx_queue *rxq;
 	struct sk_buff *skb;
-	int l4_type, vesw_id = -1;
+	int l4_type, vesw_id = -1, rc;
 	u8 q_idx;
+	unsigned char *pad_info;
 
 	l4_type = hfi1_16B_get_l4(packet->ebuf);
 	if (likely(l4_type == OPA_16B_L4_ETHR)) {
 		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
-		vinfo = xa_load(&dd->vnic.vesws, vesw_id);
+		vinfo = get_vnic_port(dd, vesw_id);
 
 		/*
 		 * In case of invalid vesw id, count the error on
@@ -541,10 +392,8 @@
 		 */
 		if (unlikely(!vinfo)) {
 			struct hfi1_vnic_vport_info *vinfo_tmp;
-			unsigned long index = 0;
 
-			vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
-					XA_PRESENT);
+			vinfo_tmp = get_first_vnic_port(dd);
 			if (vinfo_tmp) {
 				spin_lock(&vport_cntr_lock);
 				vinfo_tmp->stats[0].netstats.rx_nohandler++;
@@ -563,12 +412,6 @@
 	rxq = &vinfo->rxq[q_idx];
 	if (unlikely(!netif_oper_up(vinfo->netdev))) {
 		vinfo->stats[q_idx].rx_drop_state++;
-		skb_queue_purge(&rxq->skbq);
-		return;
-	}
-
-	if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
-		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 		return;
 	}
 
@@ -580,62 +423,65 @@
 
 	memcpy(skb->data, packet->ebuf, packet->tlen);
 	skb_put(skb, packet->tlen);
-	skb_queue_tail(&rxq->skbq, skb);
 
-	if (napi_schedule_prep(&rxq->napi)) {
-		v_dbg("napi %d scheduling\n", q_idx);
-		__napi_schedule(&rxq->napi);
+	pad_info = skb->data + skb->len - 1;
+	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+		       ((*pad_info) & 0x7)));
+
+	rc = hfi1_vnic_decap_skb(rxq, skb);
+
+	/* update rx counters */
+	hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+	if (unlikely(rc)) {
+		dev_kfree_skb_any(skb);
+		return;
 	}
+
+	skb_checksum_none_assert(skb);
+	skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+	napi_gro_receive(&rxq->napi, skb);
 }
 
 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
 	struct net_device *netdev = vinfo->netdev;
-	int i, rc;
+	int rc;
 
 	/* ensure virtual eth switch id is valid */
 	if (!vinfo->vesw_id)
 		return -EINVAL;
 
-	rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
+	rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
 	if (rc < 0)
 		return rc;
 
-	for (i = 0; i < vinfo->num_rx_q; i++) {
-		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
-		skb_queue_head_init(&rxq->skbq);
-		napi_enable(&rxq->napi);
-	}
+	rc = hfi1_netdev_rx_init(dd);
+	if (rc)
+		goto err_remove;
 
 	netif_carrier_on(netdev);
 	netif_tx_start_all_queues(netdev);
 	set_bit(HFI1_VNIC_UP, &vinfo->flags);
 
 	return 0;
+
+err_remove:
+	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+	return rc;
 }
 
 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	u8 i;
 
 	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
 	netif_carrier_off(vinfo->netdev);
 	netif_tx_disable(vinfo->netdev);
-	xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
+	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
 
-	/* ensure irqs see the change */
-	msix_vnic_synchronize_irq(dd);
-
-	/* remove unread skbs */
-	for (i = 0; i < vinfo->num_rx_q; i++) {
-		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
-		napi_disable(&rxq->napi);
-		skb_queue_purge(&rxq->skbq);
-	}
+	hfi1_netdev_rx_destroy(dd);
 }
 
 static int hfi1_netdev_open(struct net_device *netdev)
@@ -660,70 +506,31 @@
 	return 0;
 }
 
-static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
-				struct hfi1_ctxtdata **vnic_ctxt)
-{
-	int rc;
-
-	rc = allocate_vnic_ctxt(dd, vnic_ctxt);
-	if (rc) {
-		dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
-		return rc;
-	}
-
-	rc = setup_vnic_ctxt(dd, *vnic_ctxt);
-	if (rc) {
-		dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
-		deallocate_vnic_ctxt(dd, *vnic_ctxt);
-		*vnic_ctxt = NULL;
-	}
-
-	return rc;
-}
-
 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	int i, rc = 0;
+	int rc = 0;
 
 	mutex_lock(&hfi1_mutex);
-	if (!dd->vnic.num_vports) {
+	if (!dd->vnic_num_vports) {
 		rc = hfi1_vnic_txreq_init(dd);
 		if (rc)
 			goto txreq_fail;
 	}
 
-	for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
-		rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
-		if (rc)
-			break;
-		hfi1_rcd_get(dd->vnic.ctxt[i]);
-		dd->vnic.ctxt[i]->vnic_q_idx = i;
-	}
-
-	if (i < vinfo->num_rx_q) {
-		/*
-		 * If required amount of contexts is not
-		 * allocated successfully then remaining contexts
-		 * are released.
-		 */
-		while (i-- > dd->vnic.num_ctxt) {
-			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
-			hfi1_rcd_put(dd->vnic.ctxt[i]);
-			dd->vnic.ctxt[i] = NULL;
-		}
+	rc = hfi1_netdev_rx_init(dd);
+	if (rc) {
+		dd_dev_err(dd, "Unable to initialize netdev contexts\n");
 		goto alloc_fail;
 	}
 
-	if (dd->vnic.num_ctxt != i) {
-		dd->vnic.num_ctxt = i;
-		hfi1_init_vnic_rsm(dd);
-	}
+	hfi1_init_vnic_rsm(dd);
 
-	dd->vnic.num_vports++;
+	dd->vnic_num_vports++;
 	hfi1_vnic_sdma_init(vinfo);
+
 alloc_fail:
-	if (!dd->vnic.num_vports)
+	if (!dd->vnic_num_vports)
 		hfi1_vnic_txreq_deinit(dd);
 txreq_fail:
 	mutex_unlock(&hfi1_mutex);
@@ -733,20 +540,14 @@
 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
 {
 	struct hfi1_devdata *dd = vinfo->dd;
-	int i;
 
 	mutex_lock(&hfi1_mutex);
-	if (--dd->vnic.num_vports == 0) {
-		for (i = 0; i < dd->vnic.num_ctxt; i++) {
-			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
-			hfi1_rcd_put(dd->vnic.ctxt[i]);
-			dd->vnic.ctxt[i] = NULL;
-		}
+	if (--dd->vnic_num_vports == 0) {
 		hfi1_deinit_vnic_rsm(dd);
-		dd->vnic.num_ctxt = 0;
 		hfi1_vnic_txreq_deinit(dd);
 	}
 	mutex_unlock(&hfi1_mutex);
+	hfi1_netdev_rx_destroy(dd);
 }
 
 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
@@ -804,7 +605,7 @@
 	struct rdma_netdev *rn;
 	int i, size, rc;
 
-	if (!dd->num_vnic_contexts)
+	if (!dd->num_netdev_contexts)
 		return ERR_PTR(-ENOMEM);
 
 	if (!port_num || (port_num > dd->num_pports))
@@ -815,15 +616,16 @@
 
 	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
-				  dd->num_sdma, dd->num_vnic_contexts);
+				  chip_sdma_engines(dd),
+				  dd->num_netdev_contexts);
 	if (!netdev)
 		return ERR_PTR(-ENOMEM);
 
 	rn = netdev_priv(netdev);
 	vinfo = opa_vnic_dev_priv(netdev);
 	vinfo->dd = dd;
-	vinfo->num_tx_q = dd->num_sdma;
-	vinfo->num_rx_q = dd->num_vnic_contexts;
+	vinfo->num_tx_q = chip_sdma_engines(dd);
+	vinfo->num_rx_q = dd->num_netdev_contexts;
 	vinfo->netdev = netdev;
 	rn->free_rdma_netdev = hfi1_vnic_free_rn;
 	rn->set_id = hfi1_vnic_set_vesw_id;
@@ -841,7 +643,6 @@
 		rxq->idx = i;
 		rxq->vinfo = vinfo;
 		rxq->netdev = netdev;
-		netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
 	}
 
 	rc = hfi1_vnic_init(vinfo);
commit	157378f43faad830e4aa3815bde5fa9f9a9f5be6	[log] [tgz]
author	Olivier Deprez <olivier.deprez@arm.com>	Mon Apr 04 15:47:50 2022 +0200
committer	Olivier Deprez <olivier.deprez@arm.com>	Mon Apr 04 17:19:45 2022 +0200
tree	a6c9afae04d547459872e71460db6f8a454a070c
parent	0e64123141f3854e695eb4924d82b52856691466 [diff]