Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 0653f4f..519866b 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -5,19 +5,19 @@
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
- ---help---
+ help
This is a low-level driver for Intel OPA Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
bool "HFI1 SDMA Order debug"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a debug flag to test for out of order
sdma completions for unit testing
config SDMA_VERBOSITY
bool "Config SDMA Verbosity"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a configuration flag to enable verbose
SDMA debug
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0405d26..2e89ec1 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -22,9 +22,13 @@
init.o \
intr.o \
iowait.o \
+ ipoib_main.o \
+ ipoib_rx.o \
+ ipoib_tx.o \
mad.o \
mmu_rb.o \
msix.o \
+ netdev_rx.o \
opfn.o \
pcie.o \
pio.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 1aeea5d..04b1e8f 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
static const char * const irq_type_names[] = {
"SDMA",
"RCVCTXT",
+ "NETDEVCTXT",
"GENERAL",
"OTHER",
};
@@ -631,22 +632,11 @@
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
- int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i, ret;
bool new_entry = false;
- /*
- * If the BIOS does not have the NUMA node information set, select
- * NUMA 0 so we get consistent performance.
- */
- if (node < 0) {
- dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
- node = 0;
- }
- dd->node = node;
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
@@ -659,7 +649,7 @@
* create an entry in the global affinity structure and initialize it.
*/
if (!entry) {
- entry = node_affinity_allocate(node);
+ entry = node_affinity_allocate(dd->node);
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
@@ -750,6 +740,7 @@
if (new_entry)
node_affinity_add_tail(entry);
+ dd->affinity_entry = entry;
mutex_unlock(&node_affinity.lock);
return 0;
@@ -765,10 +756,9 @@
{
struct hfi1_affinity_node *entry;
- if (dd->node < 0)
- return;
-
mutex_lock(&node_affinity.lock);
+ if (!dd->affinity_entry)
+ goto unlock;
entry = node_affinity_lookup(dd->node);
if (!entry)
goto unlock;
@@ -779,8 +769,8 @@
*/
_dev_comp_vect_cpu_mask_clean_up(dd, entry);
unlock:
+ dd->affinity_entry = NULL;
mutex_unlock(&node_affinity.lock);
- dd->node = NUMA_NO_NODE;
}
/*
@@ -915,6 +905,11 @@
set = &entry->rcv_intr;
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
break;
+ case IRQ_NETDEVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ set = &entry->def_intr;
+ scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+ break;
default:
dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
return -EINVAL;
@@ -987,6 +982,10 @@
if (rcd->ctxt != HFI1_CTRL_CTXT)
set = &entry->rcv_intr;
break;
+ case IRQ_NETDEVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ set = &entry->def_intr;
+ break;
default:
mutex_unlock(&node_affinity.lock);
return;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 6a7e6ea..f94ed5d 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
enum irq_type {
IRQ_SDMA,
IRQ_RCVCTXT,
+ IRQ_NETDEVCTXT,
IRQ_GENERAL,
IRQ_OTHER
};
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 10924f1..88476a1 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -66,10 +66,7 @@
#include "affinity.h"
#include "debugfs.h"
#include "fault.h"
-
-uint kdeth_qp;
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
+#include "netdev.h"
uint num_vls = HFI1_MAX_VLS_SUPPORTED;
module_param(num_vls, uint, S_IRUGO);
@@ -128,13 +125,15 @@
/*
* RSM instance allocation
- * 0 - Verbs
- * 1 - User Fecn Handling
- * 2 - Vnic
+ * 0 - User Fecn Handling
+ * 1 - Vnic
+ * 2 - AIP
+ * 3 - Verbs
*/
-#define RSM_INS_VERBS 0
-#define RSM_INS_FECN 1
-#define RSM_INS_VNIC 2
+#define RSM_INS_FECN 0
+#define RSM_INS_VNIC 1
+#define RSM_INS_AIP 2
+#define RSM_INS_VERBS 3
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -175,6 +174,25 @@
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for AIP */
+/* LRH.BTH above is reused for this rule */
+
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
+#define BTH_DESTQP_QW 1ull
+#define BTH_DESTQP_BIT_OFFSET 16ull
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
+#define BTH_DESTQP_MASK 0xFFull
+#define BTH_DESTQP_VALUE 0x81ull
+
+/* DETH.SQPN: QW 1 Offset 56 for select */
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
+#define DETH_AIP_SQPN_QW 3ull
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
+#define DETH_AIP_SQPN_SELECT_OFFSET \
+ DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
+
/* RSM fields for Vnic */
/* L2_TYPE: QW 0, OFFSET 61 - for match */
#define L2_TYPE_QW 0ull
@@ -6873,7 +6891,7 @@
}
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= rcd->rcvhdrtail_kvaddr ?
+ rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, rcd);
hfi1_rcd_put(rcd);
@@ -7299,11 +7317,11 @@
case 1: return OPA_LINK_WIDTH_1X;
case 2: return OPA_LINK_WIDTH_2X;
case 3: return OPA_LINK_WIDTH_3X;
+ case 4: return OPA_LINK_WIDTH_4X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
__func__, width);
- /* fall through */
- case 4: return OPA_LINK_WIDTH_4X;
+ return OPA_LINK_WIDTH_4X;
}
}
@@ -7358,12 +7376,13 @@
case 0:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
break;
+ case 1:
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+ break;
default:
dd_dev_err(dd,
"%s: unexpected max rate %d, using 25Gb\n",
__func__, (int)max_rate);
- /* fall through */
- case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
break;
}
@@ -8405,20 +8424,107 @@
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
u32 tail;
- int present;
- if (!rcd->rcvhdrtail_kvaddr)
- present = (rcd->seq_cnt ==
- rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
- else /* is RDMA rtail */
- present = (rcd->head != get_rcvhdrtail(rcd));
-
- if (present)
+ if (hfi1_packet_present(rcd))
return 1;
/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
- return rcd->head != tail;
+ return hfi1_rcd_head(rcd) != tail;
+}
+
+/**
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+
+ trace_hfi1_receive_interrupt(dd, rcd);
+ this_cpu_inc(*dd->int_counter);
+ aspm_ctx_disable(rcd);
+}
+
+/**
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ if (!rcd->rcvhdrq)
+ return;
+ clear_recv_intr(rcd);
+ if (check_packet_present(rcd))
+ force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ * Hold IRQs so we can safely clear the interrupt and
+ * recheck for a packet that may have arrived after the previous
+ * check and the interrupt clear. If a packet arrived, force another
+ * interrupt. This routine can be called at the end of receive packet
+ * processing in interrupt service routines, interrupt service thread
+ * and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __hfi1_rcd_eoi_intr(rcd);
+ local_irq_restore(flags);
+}
+
+/**
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
+ * @napi - pointer to napi object
+ * @budget - netdev budget
+ */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ struct hfi1_ctxtdata *rcd = rxq->rcd;
+ int work_done = 0;
+
+ work_done = rcd->do_interrupt(rcd, budget);
+
+ if (work_done < budget) {
+ napi_complete_done(napi, work_done);
+ hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return work_done;
+}
+
+/* Receive packet napi handler for netdevs VNIC and AIP */
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
+{
+ struct hfi1_ctxtdata *rcd = data;
+
+ receive_interrupt_common(rcd);
+
+ if (likely(rcd->napi)) {
+ if (likely(napi_schedule_prep(rcd->napi)))
+ __napi_schedule_irqoff(rcd->napi);
+ else
+ __hfi1_rcd_eoi_intr(rcd);
+ } else {
+ WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
+ rcd->ctxt);
+ __hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return IRQ_HANDLED;
}
/*
@@ -8432,13 +8538,9 @@
irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- struct hfi1_devdata *dd = rcd->dd;
int disposition;
- int present;
- trace_hfi1_receive_interrupt(dd, rcd);
- this_cpu_inc(*dd->int_counter);
- aspm_ctx_disable(rcd);
+ receive_interrupt_common(rcd);
/* receive interrupt remains blocked while processing packets */
disposition = rcd->do_interrupt(rcd, 0);
@@ -8451,17 +8553,7 @@
if (disposition == RCV_PKT_LIMIT)
return IRQ_WAKE_THREAD;
- /*
- * The packet processor detected no more packets. Clear the receive
- * interrupt and recheck for a packet packet that may have arrived
- * after the previous check and interrupt clear. If a packet arrived,
- * force another interrupt.
- */
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
-
+ __hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8472,24 +8564,11 @@
irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- int present;
/* receive interrupt is still blocked from the IRQ handler */
(void)rcd->do_interrupt(rcd, 1);
- /*
- * The packet processor will only return if it detected no more
- * packets. Hold IRQs here so we can safely clear the interrupt and
- * recheck for a packet that may have arrived after the previous
- * check and the interrupt clear. If a packet arrived, force another
- * interrupt.
- */
- local_irq_disable();
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
- local_irq_enable();
+ hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -10060,7 +10139,7 @@
* the first kernel context would have been allocated by now so
* we are guaranteed a valid value.
*/
- return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+ return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
}
/*
@@ -10105,7 +10184,7 @@
thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
sc_mtu_to_threshold(dd->vld[i].sc,
dd->vld[i].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ get_hdrqentsize(dd->rcd[0])));
for (j = 0; j < INIT_SC_PER_VL; j++)
sc_set_cr_threshold(
pio_select_send_context_vl(dd, j, i),
@@ -11832,7 +11911,7 @@
head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
tail = get_rcvhdrtail(rcd);
else
tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11876,6 +11955,84 @@
return 0x1; /* if invalid, go with the minimum size */
}
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+ /* there are only 3 valid receive header entry sizes */
+ if (size == 2)
+ return 1;
+ if (size == 16)
+ return 2;
+ if (size == 32)
+ return 4;
+ return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd, "Receive header queue count too small\n");
+ return -EINVAL;
+ }
+
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ return -EINVAL;
+ }
+
+ if (thecnt % HDRQ_INCREMENT) {
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+ u64 reg;
+
+ reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+ RCV_HDR_CNT_CNT_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+ reg = ((u64)encode_rcv_header_entry_size(entsize) &
+ RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+ RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+ RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+ /*
+ * Program dummy tail address for every receive context
+ * before enabling any receive context
+ */
+ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+ dd->rcvhdrtail_dummy_dma);
+}
+
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
struct hfi1_ctxtdata *rcd)
{
@@ -11897,13 +12054,13 @@
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
- rcd->seq_cnt = 1;
+ hfi1_set_seq_cnt(rcd, 1);
/* reset the cached receive header queue head value */
- rcd->head = 0;
+ hfi1_set_rcd_head(rcd, 0);
/*
* Zero the receive header queue so we don't get false
@@ -11983,7 +12140,7 @@
IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
}
- if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -12724,11 +12881,6 @@
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
- default:
- dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
- /* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
case LSTATE_INIT:
@@ -12737,6 +12889,11 @@
return IB_PORT_ARMED;
case LSTATE_ACTIVE:
return IB_PORT_ACTIVE;
+ default:
+ dd_dev_err(dd,
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
+ return IB_PORT_DOWN;
}
}
@@ -12744,10 +12901,6 @@
{
/* look at the HFI meta-states only */
switch (chip_pstate & 0xf0) {
- default:
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
- /* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
case PLS_OFFLINE:
@@ -12760,6 +12913,10 @@
return IB_PORTPHYSSTATE_LINKUP;
case PLS_PHYTEST:
return IB_PORTPHYSSTATE_PHY_TEST;
+ default:
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+ chip_pstate);
+ return IB_PORTPHYSSTATE_DISABLED;
}
}
@@ -13237,13 +13394,12 @@
* in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
- * num_vnic_contexts - number of contexts reserved for VNIC
+ * num_netdev_contexts - number of contexts reserved for netdev
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
unsigned long num_kernel_contexts;
- u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
- int total_contexts;
+ u16 num_netdev_contexts;
int ret;
unsigned ngroups;
int rmt_count;
@@ -13280,13 +13436,6 @@
num_kernel_contexts = send_contexts - num_vls - 1;
}
- /* Accommodate VNIC contexts if possible */
- if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
- dd_dev_err(dd, "No receive contexts available for VNIC\n");
- num_vnic_contexts = 0;
- }
- total_contexts = num_kernel_contexts + num_vnic_contexts;
-
/*
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
@@ -13299,28 +13448,32 @@
/*
* Adjust the counts given a global max.
*/
- if (total_contexts + n_usr_ctxts > rcv_contexts) {
+ if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
dd_dev_err(dd,
- "Reducing # user receive contexts to: %d, from %u\n",
- rcv_contexts - total_contexts,
+ "Reducing # user receive contexts to: %u, from %u\n",
+ (u32)(rcv_contexts - num_kernel_contexts),
n_usr_ctxts);
/* recalculate */
- n_usr_ctxts = rcv_contexts - total_contexts;
+ n_usr_ctxts = rcv_contexts - num_kernel_contexts;
}
+ num_netdev_contexts =
+ hfi1_num_netdev_contexts(dd, rcv_contexts -
+ (num_kernel_contexts + n_usr_ctxts),
+ &node_affinity.real_cpu_mask);
/*
* The RMT entries are currently allocated as shown below:
* 1. QOS (0 to 128 entries);
* 2. FECN (num_kernel_context - 1 + num_user_contexts +
- * num_vnic_contexts);
- * 3. VNIC (num_vnic_contexts).
- * It should be noted that FECN oversubscribe num_vnic_contexts
- * entries of RMT because both VNIC and PSM could allocate any receive
+ * num_netdev_contexts);
+ * 3. netdev (num_netdev_contexts).
+ * It should be noted that FECN oversubscribe num_netdev_contexts
+ * entries of RMT because both netdev and PSM could allocate any receive
* context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
* and PSM FECN must reserve an RMT entry for each possible PSM receive
* context.
*/
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
if (HFI1_CAP_IS_KSET(TID_RDMA))
rmt_count += num_kernel_contexts - 1;
if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
@@ -13333,21 +13486,20 @@
n_usr_ctxts = user_rmt_reduced;
}
- total_contexts += n_usr_ctxts;
-
- /* the first N are kernel contexts, the rest are user/vnic contexts */
- dd->num_rcv_contexts = total_contexts;
+ /* the first N are kernel contexts, the rest are user/netdev contexts */
+ dd->num_rcv_contexts =
+ num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->first_dyn_alloc_ctxt = num_kernel_contexts;
- dd->num_vnic_contexts = num_vnic_contexts;
+ dd->num_netdev_contexts = num_netdev_contexts;
dd->num_user_contexts = n_usr_ctxts;
dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
- "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
+ "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
- dd->num_vnic_contexts,
+ dd->num_netdev_contexts,
dd->num_user_contexts);
/*
@@ -14026,21 +14178,12 @@
static void init_kdeth_qp(struct hfi1_devdata *dd)
{
- /* user changed the KDETH_QP */
- if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
- /* out of range or illegal value */
- dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
- kdeth_qp = 0;
- }
- if (kdeth_qp == 0) /* not set, or failed range check */
- kdeth_qp = DEFAULT_KDETH_QP;
-
write_csr(dd, SEND_BTH_QP,
- (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
SEND_BTH_QP_KDETH_QP_SHIFT);
write_csr(dd, RCV_BTH_QP,
- (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
RCV_BTH_QP_KDETH_QP_SHIFT);
}
@@ -14156,6 +14299,12 @@
}
}
+/* Is a receive side mapping rule */
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
+}
+
/*
* Add a receive side mapping rule.
*/
@@ -14392,77 +14541,138 @@
rmt->used += total_cnt;
}
-/* Initialize RSM for VNIC */
-void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+static inline bool hfi1_is_rmt_full(int start, int spare)
+{
+ return (start + spare) > NUM_MAP_ENTRIES;
+}
+
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
{
u8 i, j;
u8 ctx_id = 0;
u64 reg;
u32 regoff;
- struct rsm_rule_data rrd;
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
- if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
- dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
- dd->vnic.rmt_start);
- return;
+ /* We already have contexts mapped in RMT */
+ if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
+ dd_dev_info(dd, "Contexts are already mapped in RMT\n");
+ return true;
}
- dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
- dd->vnic.rmt_start,
- dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+ if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Not enough RMT entries used = %d\n",
+ rmt_start);
+ return false;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
+ rmt_start,
+ rmt_start + NUM_NETDEV_MAP_ENTRIES);
/* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
- regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
reg = read_csr(dd, regoff);
- for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
- /* Update map register with vnic context */
- j = (dd->vnic.rmt_start + i) % 8;
+ for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
+ /* Update map register with netdev context */
+ j = (rmt_start + i) % 8;
reg &= ~(0xffllu << (j * 8));
- reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
- /* Wrap up vnic ctx index */
- ctx_id %= dd->vnic.num_ctxt;
+ reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
+ /* Wrap up netdev ctx index */
+ ctx_id %= ctxt_count;
/* Write back map register */
- if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
dev_dbg(&(dd)->pcidev->dev,
- "Vnic rsm map reg[%d] =0x%llx\n",
+ "RMT[%d] =0x%llx\n",
regoff - RCV_RSM_MAP_TABLE, reg);
write_csr(dd, regoff, reg);
regoff += 8;
- if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
reg = read_csr(dd, regoff);
}
}
- /* Add rule for vnic */
- rrd.offset = dd->vnic.rmt_start;
- rrd.pkt_type = 4;
- /* Match 16B packets */
- rrd.field1_off = L2_TYPE_MATCH_OFFSET;
- rrd.mask1 = L2_TYPE_MASK;
- rrd.value1 = L2_16B_VALUE;
- /* Match ETH L4 packets */
- rrd.field2_off = L4_TYPE_MATCH_OFFSET;
- rrd.mask2 = L4_16B_TYPE_MASK;
- rrd.value2 = L4_16B_ETH_VALUE;
- /* Calc context from veswid and entropy */
- rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
- rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
- rrd.index2_off = L2_16B_ENTROPY_OFFSET;
- rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
- add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+ return true;
+}
- /* Enable RSM if not already enabled */
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
+ int rule, struct rsm_rule_data *rrd)
+{
+ if (!hfi1_netdev_update_rmt(dd)) {
+ dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
+ return;
+ }
+
+ add_rsm_rule(dd, rule, rrd);
add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
}
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
+{
+ /*
+ * go through with the initialisation only if this rule actually doesn't
+ * exist yet
+ */
+ if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ .offset = rmt_start,
+ .pkt_type = IB_PACKET_TYPE,
+ .field1_off = LRH_BTH_MATCH_OFFSET,
+ .mask1 = LRH_BTH_MASK,
+ .value1 = LRH_BTH_VALUE,
+ .field2_off = BTH_DESTQP_MATCH_OFFSET,
+ .mask2 = BTH_DESTQP_MASK,
+ .value2 = BTH_DESTQP_VALUE,
+ .index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
+ ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
+ }
+}
+
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ /* Add rule for vnic */
+ .offset = rmt_start,
+ .pkt_type = 4,
+ /* Match 16B packets */
+ .field1_off = L2_TYPE_MATCH_OFFSET,
+ .mask1 = L2_TYPE_MASK,
+ .value1 = L2_16B_VALUE,
+ /* Match ETH L4 packets */
+ .field2_off = L4_TYPE_MATCH_OFFSET,
+ .mask2 = L4_16B_TYPE_MASK,
+ .value2 = L4_16B_ETH_VALUE,
+ /* Calc context from veswid and entropy */
+ .index1_off = L4_16B_HDR_VESWID_OFFSET,
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = L2_16B_ENTROPY_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+}
+
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
{
clear_rsm_rule(dd, RSM_INS_VNIC);
+}
- /* Disable RSM if used only by vnic */
- if (dd->vnic.rmt_start == 0)
- clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
+{
+ /* only actually clear the rule if it's the last user asking to do so */
+ if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
+ clear_rsm_rule(dd, RSM_INS_AIP);
}
static int init_rxe(struct hfi1_devdata *dd)
@@ -14481,8 +14691,8 @@
init_qos(dd, rmt);
init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
- /* record number of used rsm map entries for vnic */
- dd->vnic.rmt_start = rmt->used;
+ /* record number of used rsm map entries for netdev */
+ hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
kfree(rmt);
/*
@@ -15036,6 +15246,11 @@
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
+ /* alloc netdev data */
+ ret = hfi1_netdev_alloc(dd);
+ if (ret)
+ goto bail_cleanup;
+
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
@@ -15136,6 +15351,7 @@
hfi1_comp_vectors_clean_up(dd);
msix_clean_up_interrupts(dd);
bail_cleanup:
+ hfi1_netdev_free(dd);
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index af00619..2c6f2de 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -358,6 +358,8 @@
#define MAX_EAGER_BUFFER (256 * 1024)
#define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
#define MAX_EXPECTED_BUFFER (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
/*
* Receive expected base and count and eager base and count increment -
@@ -699,6 +701,10 @@
return read_csr(dd, RCV_ARRAY_CNT);
}
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
u32 dw_len);
@@ -1441,6 +1447,7 @@
irqreturn_t sdma_interrupt(int irq, void *data);
irqreturn_t receive_context_interrupt(int irq, void *data);
irqreturn_t receive_context_thread(int irq, void *data);
+irqreturn_t receive_context_interrupt_napi(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
@@ -1449,6 +1456,8 @@
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index d47da7b..ff423e5 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -72,13 +72,6 @@
* compilation unit
*/
-/*
- * If a packet's QP[23:16] bits match this value, then it is
- * a PSM packet and the hardware will expect a KDETH header
- * following the BTH.
- */
-#define DEFAULT_KDETH_QP 0x80
-
/* driver/hw feature set bitmask */
#define HFI1_CAP_USER_SHIFT 24
#define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1)
@@ -149,7 +142,8 @@
HFI1_CAP_NO_INTEGRITY | \
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_TID_RDMA | \
- HFI1_CAP_OPFN) << \
+ HFI1_CAP_OPFN | \
+ HFI1_CAP_AIP) << \
HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
@@ -166,6 +160,7 @@
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_AIP | \
((HFI1_CAP_HDRSUPP | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_STATIC_RATE_CTRL | \
@@ -323,6 +318,9 @@
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index c29da2f..2ced236 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -379,7 +379,7 @@
struct hfi1_devdata *dd = dd_from_dev(ibd);
++*pos;
- if (!dd->rcd || *pos >= dd->n_krcv_queues)
+ if (!dd->rcd || *pos >= dd->num_rcv_contexts)
return NULL;
return pos;
}
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 941b465..8085718 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2015-2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
#include "hfi.h"
#include "trace.h"
@@ -63,6 +64,9 @@
#include "vnic.h"
#include "fault.h"
+#include "ipoib.h"
+#include "netdev.h"
+
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -411,14 +415,14 @@
static inline void init_packet(struct hfi1_ctxtdata *rcd,
struct hfi1_packet *packet)
{
- packet->rsize = rcd->rcvhdrqentsize; /* words */
- packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+ packet->rsize = get_hdrqentsize(rcd); /* words */
+ packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
packet->rcd = rcd;
packet->updegr = 0;
packet->etail = -1;
packet->rhf_addr = get_rhf_addr(rcd);
packet->rhf = rhf_to_cpu(packet->rhf_addr);
- packet->rhqoff = rcd->head;
+ packet->rhqoff = hfi1_rcd_head(rcd);
packet->numpkt = 0;
}
@@ -551,22 +555,22 @@
mdata->maxcnt = packet->maxcnt;
mdata->ps_head = packet->rhqoff;
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (get_dma_rtail_setting(rcd)) {
mdata->ps_tail = get_rcvhdrtail(rcd);
if (rcd->ctxt == HFI1_CTRL_CTXT)
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
else
mdata->ps_seq = 0; /* not used with DMA_RTAIL */
} else {
mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
}
}
static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
struct hfi1_ctxtdata *rcd)
{
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (get_dma_rtail_setting(rcd))
return mdata->ps_head == mdata->ps_tail;
return mdata->ps_seq != rhf_rcv_seq(rhf);
}
@@ -592,11 +596,9 @@
mdata->ps_head = 0;
/* Control context must do seq counting */
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
- (rcd->ctxt == HFI1_CTRL_CTXT)) {
- if (++mdata->ps_seq > 13)
- mdata->ps_seq = 1;
- }
+ if (!get_dma_rtail_setting(rcd) ||
+ rcd->ctxt == HFI1_CTRL_CTXT)
+ mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
}
/*
@@ -750,6 +752,39 @@
return ret;
}
+static void process_rcv_packet_napi(struct hfi1_packet *packet)
+{
+ packet->etype = rhf_rcv_type(packet->rhf);
+
+ /* total length */
+ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+ /* retrieve eager buffer details */
+ packet->etail = rhf_egr_index(packet->rhf);
+ packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
+ &packet->updegr);
+ /*
+ * Prefetch the contents of the eager buffer. It is
+ * OK to send a negative length to prefetch_range().
+ * The +2 is the size of the RHF.
+ */
+ prefetch_range(packet->ebuf,
+ packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ (rhf_hdrq_offset(packet->rhf)
+ + 2)) * 4));
+
+ packet->rcd->rhf_rcv_function_map[packet->etype](packet);
+ packet->numpkt++;
+
+ /* Set up for the next packet */
+ packet->rhqoff += packet->rsize;
+ if (packet->rhqoff >= packet->maxcnt)
+ packet->rhqoff = 0;
+
+ packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+ packet->rcd->rhf_offset;
+ packet->rhf = rhf_to_cpu(packet->rhf_addr);
+}
+
static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
@@ -770,7 +805,7 @@
* The +2 is the size of the RHF.
*/
prefetch_range(packet->ebuf,
- packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ packet->tlen - ((get_hdrqentsize(packet->rcd) -
(rhf_hdrq_offset(packet->rhf)
+ 2)) * 4));
}
@@ -824,22 +859,50 @@
* The only thing we need to do is a final update and call for an
* interrupt
*/
- update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+ update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
/*
+ * handle_receive_interrupt_napi_fp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for receive interrupt.
+ * This is the fast path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget)
+{
+ struct hfi1_packet packet;
+
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
+
+ while (packet.numpkt < budget) {
+ process_rcv_packet_napi(&packet);
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ break;
+
+ process_rcv_update(0, &packet);
+ }
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+bail:
+ finish_packet(&packet);
+ return packet.numpkt;
+}
+
+/*
* Handle receive interrupts when using the no dma rtail option.
*/
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
- u32 seq;
int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
- seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -848,15 +911,12 @@
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
- seq = rhf_rcv_seq(packet.rhf);
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
@@ -885,15 +945,14 @@
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
}
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u16 i;
/*
@@ -901,50 +960,17 @@
* interrupt handler only for that context. Otherwise, switch
* interrupt handler for all statically allocated kernel contexts.
*/
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
- hfi1_rcd_put(rcd);
- }
- return;
- }
-
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
+ if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+ hfi1_rcd_get(rcd);
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
- }
-}
-
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
-{
- struct hfi1_ctxtdata *rcd;
- u16 i;
-
- /*
- * For dynamically allocated kernel contexts (like vnic) switch
- * interrupt handler only for that context. Otherwise, switch
- * interrupt handler for all statically allocated kernel contexts.
- */
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
- hfi1_rcd_put(rcd);
- }
return;
}
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
+ if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
}
}
@@ -960,17 +986,14 @@
if (!rcd)
continue;
if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- rcd->do_interrupt = &handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
hfi1_rcd_put(rcd);
}
}
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet,
- struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
{
- struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
u8 etype = rhf_rcv_type(packet->rhf);
u8 sc = SC15_PACKET;
@@ -985,19 +1008,34 @@
sc = hfi1_16B_get_sc(hdr);
}
if (sc != SC15_PACKET) {
- int hwstate = driver_lstate(rcd->ppd);
+ int hwstate = driver_lstate(packet->rcd->ppd);
+ struct work_struct *lsaw =
+ &packet->rcd->ppd->linkstate_active_work;
if (hwstate != IB_PORT_ACTIVE) {
- dd_dev_info(dd,
+ dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
opa_lstate_name(hwstate));
- return 0;
+ return false;
}
- queue_work(rcd->ppd->link_wq, lsaw);
- return 1;
+ queue_work(packet->rcd->ppd->link_wq, lsaw);
+ return true;
}
- return 0;
+ return false;
+}
+
+/**
+ * armed to active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+ if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+ return false;
+ return __set_armed_to_active(packet);
}
/*
@@ -1015,15 +1053,15 @@
struct hfi1_packet packet;
int skip_pkt = 0;
+ if (!rcd->rcvhdrq)
+ return RCV_PKT_OK;
/* Control context will always use the slow path interrupt handler */
needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
init_packet(rcd, &packet);
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt) {
+ if (!get_dma_rtail_setting(rcd)) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -1040,22 +1078,15 @@
* Control context can potentially receive an invalid
* rhf. Drop such packets.
*/
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt)
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
skip_pkt = 1;
- }
}
prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
- if (unlikely(dd->do_drop &&
- atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
- DROP_PACKET_ON)) {
- dd->do_drop = 0;
-
+ if (hfi1_need_drop(dd)) {
/* On to the next packet */
packet.rhqoff += packet.rsize;
packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
@@ -1067,26 +1098,14 @@
last = skip_rcv_packet(&packet, thread);
skip_pkt = 0;
} else {
- /* Auto activate link on non-SC15 packet receive */
- if (unlikely(rcd->ppd->host_link_state ==
- HLS_UP_ARMED) &&
- set_armed_to_active(rcd, &packet, dd))
+ if (set_armed_to_active(&packet))
goto bail;
last = process_rcv_packet(&packet, thread);
}
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (!get_dma_rtail_setting(rcd)) {
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
- if (needset) {
- dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_nodma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
} else {
if (packet.rhqoff == hdrqtail)
last = RCV_PKT_DONE;
@@ -1095,27 +1114,24 @@
* rhf. Drop such packets.
*/
if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
+ bool lseq;
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (!last && (seq != rcd->seq_cnt))
+ lseq = hfi1_seq_incr(rcd,
+ rhf_rcv_seq(packet.rhf));
+ if (!last && lseq)
skip_pkt = 1;
}
-
- if (needset) {
- dd_dev_info(dd,
- "Switching to DMA_RTAIL\n");
- set_dma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
}
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
/*
@@ -1127,6 +1143,63 @@
}
/*
+ * handle_receive_interrupt_napi_sp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for errors or receive interrupt.
+ * This is the slow path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ int last = RCV_PKT_OK;
+ bool needset = true;
+ struct hfi1_packet packet;
+
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
+
+ while (last != RCV_PKT_DONE && packet.numpkt < budget) {
+ if (hfi1_need_drop(dd)) {
+ /* On to the next packet */
+ packet.rhqoff += packet.rsize;
+ packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
+ packet.rhqoff +
+ rcd->rhf_offset;
+ packet.rhf = rhf_to_cpu(packet.rhf_addr);
+
+ } else {
+ if (set_armed_to_active(&packet))
+ goto bail;
+ process_rcv_packet_napi(&packet);
+ }
+
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ last = RCV_PKT_DONE;
+
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
+
+ process_rcv_update(last, &packet);
+ }
+
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+
+bail:
+ /*
+ * Always write head at end, and setup rcv interrupt, even
+ * if no packets were processed.
+ */
+ finish_packet(&packet);
+ return packet.numpkt;
+}
+
+/*
* We may discover in the interrupt that the hardware link state has
* changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
* and we need to update the driver's notion of the link state. We cannot
@@ -1603,59 +1676,116 @@
show_eflags_errs(packet);
}
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp;
+ struct net_device *netdev;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct napi_struct *napi = rcd->napi;
+ struct sk_buff *skb;
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ u32 extra_bytes;
+ u32 tlen, qpnum;
+ bool do_work, do_cnp;
+ struct hfi1_ipoib_dev_priv *priv;
+
+ trace_hfi1_rcvhdr(packet);
+
+ hfi1_setup_ib_header(packet);
+
+ packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+ packet->grh = NULL;
+
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ handle_eflags(packet);
+ return;
+ }
+
+ qpnum = ib_bth_get_qpn(packet->ohdr);
+ netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+ if (!netdev)
+ goto drop_no_nd;
+
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+ trace_ctxt_rsm_hist(rcd->ctxt);
+
+ /* handle congestion notifications */
+ do_work = hfi1_may_ecn(packet);
+ if (unlikely(do_work)) {
+ do_cnp = (packet->opcode != IB_OPCODE_CNP);
+ (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+ packet, do_cnp);
+ }
+
+ /*
+ * We have split point after last byte of DETH
+ * lets strip padding and CRC and ICRC.
+ * tlen is whole packet len so we need to
+ * subtract header size as well.
+ */
+ tlen = packet->tlen;
+ extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+ packet->hlen;
+ if (unlikely(tlen < extra_bytes))
+ goto drop;
+
+ tlen -= extra_bytes;
+
+ skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+ if (unlikely(!skb))
+ goto drop;
+
+ priv = hfi1_ipoib_priv(netdev);
+ hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
+
+ skb->dev = netdev;
+ skb->pkt_type = PACKET_HOST;
+ netif_receive_skb(skb);
+
+ return;
+
+drop:
+ ++netdev->stats.rx_dropped;
+drop_no_nd:
+ ibp = rcd_to_iport(packet->rcd);
+ ++ibp->rvp.n_pkt_drops;
+}
+
/*
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
*/
-static int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
{
if (hfi1_setup_9B_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_ib_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
-{
- /* Packet received in VNIC context via RSM */
- if (packet->rcd->is_vnic)
- return true;
-
- if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
- (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
- return true;
-
- return false;
-}
-
-static int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (hfi1_is_vnic_packet(packet)) {
- hfi1_vnic_bypass_rcv(packet);
- return RHF_RCV_CONTINUE;
- }
-
if (hfi1_setup_bypass_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
@@ -1678,17 +1808,16 @@
(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
}
}
- return RHF_RCV_CONTINUE;
}
-static int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
packet->rhf & RHF_DC_ERR)))
- return RHF_RCV_CONTINUE;
+ return;
hfi1_setup_ib_header(packet);
handle_eflags(packet);
@@ -1696,32 +1825,29 @@
if (unlikely(rhf_err_flags(packet->rhf)))
dd_dev_err(packet->rcd->dd,
"Unhandled error packet received. Dropping.\n");
-
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_expected_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
@@ -1729,37 +1855,41 @@
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_eager_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
rhf_rcv_type(packet->rhf));
- return RHF_RCV_CONTINUE;
}
+#define HFI1_RCVHDR_DUMP_MAX 5
+
void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
{
struct hfi1_packet packet;
struct ps_mdata mdata;
+ int i;
- seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
- rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
- HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+ seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n",
+ rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+ get_dma_rtail_setting(rcd) ?
"dma_rtail" : "nodma_rtail",
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
RCV_HDR_HEAD_HEAD_MASK,
- read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
+ read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+ rcd->head);
init_packet(rcd, &packet);
init_ps_mdata(&mdata, &packet);
- while (1) {
+ for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
rcd->rhf_offset;
struct ib_header *hdr;
@@ -1811,3 +1941,14 @@
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+ [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+ [RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+ [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+ [RHF_RCV_TYPE_ERROR] = process_receive_error,
+ [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+ [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index d106d23..c22ab7b 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -78,7 +78,7 @@
*size = 0;
*return_data = NULL;
- if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return -EOPNOTSUPP;
uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 986c121..0dfbcfb 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -222,11 +222,11 @@
while (bit < bitsize) {
zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
if (zero - 1 != bit)
- size += snprintf(data + size,
+ size += scnprintf(data + size,
datalen - size - 1,
"0x%lx-0x%lx,", bit, zero - 1);
else
- size += snprintf(data + size,
+ size += scnprintf(data + size,
datalen - size - 1, "0x%lx,",
bit);
bit = find_next_bit(fault->opcodes, bitsize, zero);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 89e1dfd..329ee4f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,6 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -206,10 +207,7 @@
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- mmgrab(fd->mm);
fd->dd = dd;
- kobject_get(&fd->dd->kobj);
fp->private_data = fd;
return 0;
nomem:
@@ -516,12 +514,12 @@
ret = -EINVAL;
goto done;
}
- if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
+ if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
ret = -EPERM;
goto done;
}
memlen = PAGE_SIZE;
- memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+ memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -712,8 +710,6 @@
deallocate_ctxt(uctxt);
done:
- mmdrop(fdata->mm);
- kobject_put(&dd->kobj);
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
@@ -1102,7 +1098,7 @@
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
@@ -1150,7 +1146,7 @@
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */
- if (!fd->handler)
+ if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
cinfo.num_active = hfi1_count_active_units();
@@ -1166,8 +1162,8 @@
cinfo.send_ctxt = uctxt->sc->hw_context;
cinfo.egrtids = uctxt->egrbufs.alloced;
- cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+ cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+ cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
@@ -1266,7 +1262,7 @@
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
binfo.sw_version = HFI1_KERN_SWVERSION;
- binfo.bthqp = kdeth_qp;
+ binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*
* If more than 64 contexts are enabled the allocated credit
@@ -1555,7 +1551,7 @@
* always resets it's tail register back to 0 on a
* transition from disabled to enabled.
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
} else {
@@ -1696,7 +1692,7 @@
snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
&dd->user_cdev, &dd->user_device,
- true, &dd->kobj);
+ true, &dd->verbs_dev.rdi.ibdev.dev.kobj);
if (ret)
user_remove(dd);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index c090807..2cf102b 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1868,11 +1868,8 @@
2;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
pcfgcache->config_tables[table_type].num_table =
table_length_dwords;
@@ -1890,15 +1887,10 @@
/* metadata table */
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
@@ -2028,15 +2020,10 @@
switch (table) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
if (field && field < platform_config_table_limits[table])
src_ptr =
@@ -2139,11 +2126,8 @@
pcfgcache->config_tables[table_type].table;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
src_ptr = pcfgcache->config_tables[table_type].table;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index b79931c..2a9a040 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,8 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015-2018 Intel Corporation.
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -197,7 +198,9 @@
u32 count;
};
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
@@ -226,7 +229,13 @@
* be valid. Worst case is we process an extra interrupt and up to 64
* packets with the wrong interrupt handler.
*/
- int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+ intr_handler do_interrupt;
+ /** fast handler after autoactive */
+ intr_handler fast_handler;
+ /** slow handler */
+ intr_handler slow_handler;
+ /* napi pointer assiociated with netdev */
+ struct napi_struct *napi;
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
/* clear interrupt mask */
@@ -377,11 +386,11 @@
u32 rhqoff;
u32 dlid;
u32 slid;
+ int numpkt;
u16 tlen;
s16 etail;
u16 pkey;
u8 hlen;
- u8 numpkt;
u8 rsize;
u8 updegr;
u8 etype;
@@ -979,7 +988,7 @@
struct hfi1_pkt_state *ps,
struct rvt_swqe *wqe);
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
-
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE 0 /* keep going */
@@ -1039,23 +1048,10 @@
#define NUM_MAP_ENTRIES 256
#define NUM_MAP_REGS 32
-/*
- * Number of VNIC contexts used. Ensure it is less than or equal to
- * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
- */
-#define HFI1_NUM_VNIC_CTXT 8
-
-/* Number of VNIC RSM entries */
-#define NUM_VNIC_MAP_ENTRIES 8
-
/* Virtual NIC information */
struct hfi1_vnic_data {
- struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
struct kmem_cache *txreq_cache;
- struct xarray vesws;
u8 num_vports;
- u8 rmt_start;
- u8 num_ctxt;
};
struct hfi1_vnic_vport_info;
@@ -1161,8 +1157,8 @@
u64 z_send_schedule;
u64 __percpu *send_schedule;
- /* number of reserved contexts for VNIC usage */
- u16 num_vnic_contexts;
+ /* number of reserved contexts for netdev usage */
+ u16 num_netdev_contexts;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
/* number of pio send contexts in use by the driver */
@@ -1312,7 +1308,7 @@
struct err_info_constraint err_info_xmit_constraint;
atomic_t drop_packet;
- u8 do_drop;
+ bool do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
@@ -1407,18 +1403,17 @@
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable *sdma_rht;
- struct kobject kobj;
-
/* vnic data */
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
-};
+ int vnic_num_vports;
+ struct net_device *dummy_netdev;
+ struct hfi1_affinity_node *affinity_entry;
-static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
-{
- return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
-}
+ /* Keeps track of IPoIB RSM rule users */
+ atomic_t ipoib_rsm_usr_num;
+};
/* 8051 firmware version helper */
#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
@@ -1449,7 +1444,7 @@
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct mmu_rb_handler *handler;
+ bool use_mn;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
@@ -1458,7 +1453,6 @@
u32 invalid_tid_idx;
/* protect invalid_tids array and invalid_tid_idx */
spinlock_t invalid_lock;
- struct mm_struct *mm;
};
extern struct xarray hfi1_dev_table;
@@ -1496,6 +1490,8 @@
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
void set_all_slowpath(struct hfi1_devdata *dd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1512,12 +1508,148 @@
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+ rcd->head = head;
+}
+
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
}
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+ return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
+
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+ if (++seq > RHF_MAX_SEQ)
+ seq = 1;
+ return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+ rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+ return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrq_cnt;
+}
+
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ return false;
+
+ return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+ if (unlikely(!rcd))
+ return;
+ if (unlikely(!hfi1_is_fastpath(rcd)))
+ rcd->do_interrupt = rcd->fast_handler;
+}
+
int hfi1_reset_device(int);
void receive_interrupt_work(struct work_struct *work);
@@ -2020,9 +2152,21 @@
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty);
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+ return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
- *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+ u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+ if (kv)
+ *kv = 0ULL;
}
static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -2031,7 +2175,17 @@
* volatile because it's a DMA target from the chip, routine is
* inlined, and don't want register caching or reordering.
*/
- return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+ return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+ if (likely(!rcd->rcvhdrtail_kvaddr)) {
+ u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+ return !last_rcv_seq(rcd, seq);
+ }
+ return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
}
/*
@@ -2088,7 +2242,6 @@
extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
-extern uint kdeth_qp;
extern uint loopback;
extern uint quick_linkup;
extern uint rcv_intr_timeout;
@@ -2303,6 +2456,25 @@
return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
}
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+ if (unlikely(dd->do_drop &&
+ atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+ DROP_PACKET_ON)) {
+ dd->do_drop = false;
+ return true;
+ }
+ return false;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 1256dbd..fa2cd76 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
#include "affinity.h"
#include "vnic.h"
#include "exp_rcv.h"
+#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -78,8 +79,6 @@
*/
#define HFI1_MIN_USER_CTXT_BUFCNT 7
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
@@ -122,8 +121,6 @@
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static int hfi1_create_kctxt(struct hfi1_devdata *dd,
@@ -154,7 +151,11 @@
/* Control context must use DMA_RTAIL */
if (rcd->ctxt == HFI1_CTRL_CTXT)
rcd->flags |= HFI1_CAP_DMA_RTAIL;
- rcd->seq_cnt = 1;
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+
+ hfi1_set_seq_cnt(rcd, 1);
rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
if (!rcd->sc) {
@@ -373,6 +374,9 @@
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+ rcd->slow_handler = handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
@@ -511,23 +515,6 @@
}
/*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
- /* there are only 3 valid receive header entry sizes */
- if (size == 2)
- return 1;
- if (size == 16)
- return 2;
- else if (size == 32)
- return 4;
- return 0; /* invalid */
-}
-
-/*
* Select the largest ccti value over all SLs to determine the intra-
* packet gap for the link.
*
@@ -910,10 +897,10 @@
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
- dd->do_drop = 1;
+ dd->do_drop = true;
} else {
atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
- dd->do_drop = 0;
+ dd->do_drop = false;
}
/* make sure the link is not "up" */
@@ -929,18 +916,6 @@
if (ret)
goto done;
- /* allocate dummy tail memory for all receive contexts */
- dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
- sizeof(u64),
- &dd->rcvhdrtail_dummy_dma,
- GFP_KERNEL);
-
- if (!dd->rcvhdrtail_dummy_kvaddr) {
- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
- ret = -ENOMEM;
- goto done;
- }
-
/* dd->rcd can be NULL if early initialization failed */
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
@@ -953,8 +928,6 @@
if (!rcd)
continue;
- rcd->do_interrupt = &handle_receive_interrupt;
-
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
@@ -1162,9 +1135,9 @@
dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
- if (rcd->rcvhdrtail_kvaddr) {
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)rcd->rcvhdrtail_kvaddr,
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
@@ -1175,7 +1148,7 @@
rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].dma)
+ if (rcd->egrbufs.buffers[e].addr)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
@@ -1227,13 +1200,13 @@
}
/**
- * hfi1_clean_devdata - cleans up per-unit data structure
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
* @dd: pointer to a valid devdata structure
*
- * It cleans up all data structures set up by
+ * It cleans up and frees all data structures set up by
* by hfi1_alloc_devdata().
*/
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
+void hfi1_free_devdata(struct hfi1_devdata *dd)
{
struct hfi1_asic_data *ad;
unsigned long flags;
@@ -1256,27 +1229,15 @@
dd->tx_opstats = NULL;
kfree(dd->comp_vect);
dd->comp_vect = NULL;
+ if (dd->rcvhdrtail_dummy_kvaddr)
+ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+ (void *)dd->rcvhdrtail_dummy_kvaddr,
+ dd->rcvhdrtail_dummy_dma);
+ dd->rcvhdrtail_dummy_kvaddr = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
-static void __hfi1_free_devdata(struct kobject *kobj)
-{
- struct hfi1_devdata *dd =
- container_of(kobj, struct hfi1_devdata, kobj);
-
- hfi1_clean_devdata(dd);
-}
-
-static struct kobj_type hfi1_devdata_type = {
- .release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
- kobject_put(&dd->kobj);
-}
-
/**
* hfi1_alloc_devdata - Allocate our primary per-unit data structure.
* @pdev: Valid PCI device
@@ -1303,7 +1264,6 @@
dd->pport = (struct hfi1_pportdata *)(dd + 1);
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
- dd->node = NUMA_NO_NODE;
ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
GFP_KERNEL);
@@ -1313,6 +1273,15 @@
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ dd->node = pcibus_to_node(pdev->bus);
+ if (dd->node == NUMA_NO_NODE) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ dd->node = 0;
+ }
/*
* Initialize all locks for the device. This needs to be as early as
@@ -1362,11 +1331,20 @@
goto bail;
}
- kobject_init(&dd->kobj, &hfi1_devdata_type);
+ /* allocate dummy tail memory for all receive contexts */
+ dd->rcvhdrtail_dummy_kvaddr =
+ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
+ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
+ if (!dd->rcvhdrtail_dummy_kvaddr) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
return dd;
bail:
- hfi1_clean_devdata(dd);
+ hfi1_free_devdata(dd);
return ERR_PTR(ret);
}
@@ -1569,13 +1547,6 @@
free_credit_return(dd);
- if (dd->rcvhdrtail_dummy_kvaddr) {
- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
- (void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_dma);
- dd->rcvhdrtail_dummy_kvaddr = NULL;
- }
-
/*
* Free any resources still in use (usually just kernel contexts)
* at unload; we do for ctxtcnt, because that's what we allocate.
@@ -1624,29 +1595,6 @@
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd, "Receive header queue count too small\n");
- return -EINVAL;
- }
-
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- return -EINVAL;
- }
-
- if (thecnt % HDRQ_INCREMENT) {
- dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
@@ -1674,7 +1622,7 @@
}
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
@@ -1733,9 +1681,6 @@
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
- /* setup vnic */
- hfi1_vnic_setup(dd);
-
ret = hfi1_register_ib_device(dd);
/*
@@ -1774,7 +1719,6 @@
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
- hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1819,8 +1763,8 @@
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
- /* cleanup vnic */
- hfi1_vnic_cleanup(dd);
+ /* free netdev data */
+ hfi1_netdev_free(dd);
/*
* Disable the IB link, disable interrupts on the device,
@@ -1856,7 +1800,6 @@
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned amt;
- u64 reg;
if (!rcd->rcvhdrq) {
gfp_t gfp_flags;
@@ -1888,30 +1831,9 @@
goto bail_free;
}
}
- /*
- * These values are per-context:
- * RcvHdrCnt
- * RcvHdrEntSize
- * RcvHdrSize
- */
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
- & RCV_HDR_CNT_CNT_MASK)
- << RCV_HDR_CNT_CNT_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
- /*
- * Program dummy tail address for every receive context
- * before enabling any receive context
- */
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_dma);
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+ rcd->rcvhdrq_cnt);
return 0;
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 07847cb..d580aa1 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -399,7 +399,7 @@
* @wait_head: the wait queue
*
* This function is called to insert an iowait struct into a
- * wait queue after a resource (eg, sdma decriptor or pio
+ * wait queue after a resource (eg, sdma descriptor or pio
* buffer) is run out.
*/
static inline void iowait_queue(bool pkts_sent, struct iowait *w,
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
new file mode 100644
index 0000000..1ee361c
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB functionality
+ */
+
+#ifndef HFI1_IPOIB_H
+#define HFI1_IPOIB_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/if_infiniband.h>
+
+#include "hfi.h"
+#include "iowait.h"
+#include "netdev.h"
+
+#include <rdma/ib_verbs.h>
+
+#define HFI1_IPOIB_ENTROPY_SHIFT 24
+
+#define HFI1_IPOIB_TXREQ_NAME_LEN 32
+
+#define HFI1_IPOIB_PSEUDO_LEN 20
+#define HFI1_IPOIB_ENCAP_LEN 4
+
+struct hfi1_ipoib_dev_priv;
+
+union hfi1_ipoib_flow {
+ u16 as_int;
+ struct {
+ u8 tx_queue;
+ u8 sc5;
+ } __attribute__((__packed__));
+};
+
+/**
+ * struct hfi1_ipoib_circ_buf - List of items to be processed
+ * @items: ring of items
+ * @head: ring head
+ * @tail: ring tail
+ * @max_items: max items + 1 that the ring can contain
+ * @producer_lock: producer sync lock
+ * @consumer_lock: consumer sync lock
+ */
+struct ipoib_txreq;
+struct hfi1_ipoib_circ_buf {
+ struct ipoib_txreq **items;
+ unsigned long head;
+ unsigned long tail;
+ unsigned long max_items;
+ spinlock_t producer_lock; /* head sync lock */
+ spinlock_t consumer_lock; /* tail sync lock */
+};
+
+/**
+ * struct hfi1_ipoib_txq - IPOIB per Tx queue information
+ * @priv: private pointer
+ * @sde: sdma engine
+ * @tx_list: tx request list
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @flow: tracks when list needs to be flushed for a flow change
+ * @q_idx: ipoib Tx queue index
+ * @pkts_sent: indicator packets have been sent from this queue
+ * @wait: iowait structure
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @napi: pointer to tx napi interface
+ * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
+ */
+struct hfi1_ipoib_txq {
+ struct hfi1_ipoib_dev_priv *priv;
+ struct sdma_engine *sde;
+ struct list_head tx_list;
+ u64 sent_txreqs;
+ atomic_t stops;
+ atomic_t ring_full;
+ atomic_t no_desc;
+ union hfi1_ipoib_flow flow;
+ u8 q_idx;
+ bool pkts_sent;
+ struct iowait wait;
+
+ atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
+ struct napi_struct *napi;
+ struct hfi1_ipoib_circ_buf tx_ring;
+};
+
+struct hfi1_ipoib_dev_priv {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ struct ib_device *device;
+ struct hfi1_ipoib_txq *txqs;
+ struct kmem_cache *txreq_cache;
+ struct napi_struct *tx_napis;
+ u16 pkey;
+ u16 pkey_index;
+ u32 qkey;
+ u8 port_num;
+
+ const struct net_device_ops *netdev_ops;
+ struct rvt_qp *qp;
+ struct pcpu_sw_netstats __percpu *netstats;
+};
+
+/* hfi1 ipoib rdma netdev's private data structure */
+struct hfi1_ipoib_rdma_netdev {
+ struct rdma_netdev rn; /* keep this first */
+ /* followed by device private data */
+ struct hfi1_ipoib_dev_priv dev_priv;
+};
+
+static inline struct hfi1_ipoib_dev_priv *
+hfi1_ipoib_priv(const struct net_device *dev)
+{
+ return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
+}
+
+static inline void
+hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
+ u64 packets,
+ u64 bytes)
+{
+ struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+ u64_stats_update_begin(&netstats->syncp);
+ netstats->rx_packets += packets;
+ netstats->rx_bytes += bytes;
+ u64_stats_update_end(&netstats->syncp);
+}
+
+static inline void
+hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
+ u64 packets,
+ u64 bytes)
+{
+ struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
+
+ u64_stats_update_begin(&netstats->syncp);
+ netstats->tx_packets += packets;
+ netstats->tx_bytes += bytes;
+ u64_stats_update_end(&netstats->syncp);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn);
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
+
+int hfi1_ipoib_rxq_init(struct net_device *dev);
+void hfi1_ipoib_rxq_deinit(struct net_device *dev);
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data);
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+
+#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
new file mode 100644
index 0000000..22299b0
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for ipoib functionality
+ */
+
+#include "ipoib.h"
+#include "hfi.h"
+
+static u32 qpn_from_mac(u8 *mac_arr)
+{
+ return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3];
+}
+
+static int hfi1_ipoib_dev_init(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ priv->netstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+
+ ret = priv->netdev_ops->ndo_init(dev);
+ if (ret)
+ return ret;
+
+ ret = hfi1_netdev_add_data(priv->dd,
+ qpn_from_mac(priv->netdev->dev_addr),
+ dev);
+ if (ret < 0) {
+ priv->netdev_ops->ndo_uninit(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+ priv->netdev_ops->ndo_uninit(dev);
+}
+
+static int hfi1_ipoib_dev_open(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ ret = priv->netdev_ops->ndo_open(dev);
+ if (!ret) {
+ struct hfi1_ibport *ibp = to_iport(priv->device,
+ priv->port_num);
+ struct rvt_qp *qp;
+ u32 qpn = qpn_from_mac(priv->netdev->dev_addr);
+
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (!qp) {
+ rcu_read_unlock();
+ priv->netdev_ops->ndo_stop(dev);
+ return -EINVAL;
+ }
+ rvt_get_qp(qp);
+ priv->qp = qp;
+ rcu_read_unlock();
+
+ hfi1_netdev_enable_queues(priv->dd);
+ hfi1_ipoib_napi_tx_enable(dev);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_dev_stop(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ if (!priv->qp)
+ return 0;
+
+ hfi1_ipoib_napi_tx_disable(dev);
+ hfi1_netdev_disable_queues(priv->dd);
+
+ rvt_put_qp(priv->qp);
+ priv->qp = NULL;
+
+ return priv->netdev_ops->ndo_stop(dev);
+}
+
+static void hfi1_ipoib_dev_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *storage)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ netdev_stats_to_stats64(storage, &dev->stats);
+ dev_fetch_sw_netstats(storage, priv->netstats);
+}
+
+static const struct net_device_ops hfi1_ipoib_netdev_ops = {
+ .ndo_init = hfi1_ipoib_dev_init,
+ .ndo_uninit = hfi1_ipoib_dev_uninit,
+ .ndo_open = hfi1_ipoib_dev_open,
+ .ndo_stop = hfi1_ipoib_dev_stop,
+ .ndo_get_stats64 = hfi1_ipoib_dev_get_stats64,
+};
+
+static int hfi1_ipoib_send(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn)
+{
+ return hfi1_ipoib_send_dma(dev, skb, address, dqpn);
+}
+
+static int hfi1_ipoib_mcast_attach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid,
+ int set_qkey,
+ u32 qkey)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ if (set_qkey)
+ priv->qkey = qkey;
+
+ /* attach QP to multicast group */
+ ret = ib_attach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_mcast_detach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ ret = ib_detach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+ return ret;
+}
+
+static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_ipoib_txreq_deinit(priv);
+ hfi1_ipoib_rxq_deinit(priv->netdev);
+
+ free_percpu(priv->netstats);
+}
+
+static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ priv->pkey_index = (u16)id;
+ ib_query_pkey(priv->device,
+ priv->port_num,
+ priv->pkey_index,
+ &priv->pkey);
+}
+
+static int hfi1_ipoib_setup_rn(struct ib_device *device,
+ u8 port_num,
+ struct net_device *netdev,
+ void *param)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct rdma_netdev *rn = netdev_priv(netdev);
+ struct hfi1_ipoib_dev_priv *priv;
+ int rc;
+
+ rn->send = hfi1_ipoib_send;
+ rn->attach_mcast = hfi1_ipoib_mcast_attach;
+ rn->detach_mcast = hfi1_ipoib_mcast_detach;
+ rn->set_id = hfi1_ipoib_set_id;
+ rn->hca = device;
+ rn->port_num = port_num;
+ rn->mtu = netdev->mtu;
+
+ priv = hfi1_ipoib_priv(netdev);
+ priv->dd = dd;
+ priv->netdev = netdev;
+ priv->device = device;
+ priv->port_num = port_num;
+ priv->netdev_ops = netdev->netdev_ops;
+
+ ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+ rc = hfi1_ipoib_txreq_init(priv);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_ipoib_rxq_init(netdev);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+ hfi1_ipoib_txreq_deinit(priv);
+ return rc;
+ }
+
+ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
+ netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+ netdev->needs_free_netdev = true;
+
+ return 0;
+}
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+
+ if (type != RDMA_NETDEV_IPOIB)
+ return -EOPNOTSUPP;
+
+ if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts)
+ return -EOPNOTSUPP;
+
+ if (!port_num || port_num > dd->num_pports)
+ return -EINVAL;
+
+ params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev);
+ params->txqs = dd->num_sdma;
+ params->rxqs = dd->num_netdev_contexts;
+ params->param = NULL;
+ params->initialize_rdma_netdev = hfi1_ipoib_setup_rn;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644
index 0000000..3afa754
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+ void *dst_data;
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = *((__be16 *)data);
+
+ dst_data = skb_put(skb, size);
+ memcpy(dst_data, data, size);
+ skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+ skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+ struct sk_buff *skb;
+ int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+ void *frag;
+
+ skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ skb_size = SKB_DATA_ALIGN(skb_size);
+ frag = napi_alloc_frag(skb_size);
+
+ if (unlikely(!frag))
+ return napi_alloc_skb(napi, size);
+
+ skb = build_skb(frag, skb_size);
+
+ if (unlikely(!skb)) {
+ skb_free_frag(frag);
+ return NULL;
+ }
+
+ skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+ return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data)
+{
+ struct napi_struct *napi = &rxq->napi;
+ int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+ struct sk_buff *skb;
+
+ /*
+ * For smaller(4k + skb overhead) allocations we will go using
+ * napi cache. Otherwise we will try to use napi frag cache.
+ */
+ if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+ skb = napi_alloc_skb(napi, skb_size);
+ else
+ skb = prepare_frag_skb(napi, skb_size);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ copy_ipoib_buf(skb, data, size);
+
+ return skb;
+}
+
+int hfi1_ipoib_rxq_init(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+ int ret;
+
+ ret = hfi1_netdev_rx_init(dd);
+ if (ret)
+ return ret;
+
+ hfi1_init_aip_rsm(dd);
+
+ return ret;
+}
+
+void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+
+ hfi1_deinit_aip_rsm(dd);
+ hfi1_netdev_rx_destroy(dd);
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
new file mode 100644
index 0000000..ab1eeff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -0,0 +1,856 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB SDMA functionality
+ */
+
+#include <linux/log2.h>
+#include <linux/circ_buf.h>
+
+#include "sdma.h"
+#include "verbs.h"
+#include "trace_ibhdrs.h"
+#include "ipoib.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_sdma_header sdma_hdr;
+ int sdma_status;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct hfi1_ipoib_txq *txq;
+ struct sk_buff *skb;
+};
+
+struct ipoib_txparms {
+ struct hfi1_devdata *dd;
+ struct rdma_ah_attr *ah_attr;
+ struct hfi1_ibport *ibp;
+ struct hfi1_ipoib_txq *txq;
+ union hfi1_ipoib_flow flow;
+ u32 dqpn;
+ u8 hdr_dwords;
+ u8 entropy;
+};
+
+static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+{
+ return sent - completed;
+}
+
+static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
+{
+ return hfi1_ipoib_txreqs(txq->sent_txreqs,
+ atomic64_read(&txq->complete_txreqs));
+}
+
+static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
+{
+ if (atomic_inc_return(&txq->stops) == 1)
+ netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
+{
+ if (atomic_dec_and_test(&txq->stops))
+ netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items - 1);
+}
+
+static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items) >> 1;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+ ++txq->sent_txreqs;
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
+ !atomic_xchg(&txq->ring_full, 1))
+ hfi1_ipoib_stop_txq(txq);
+}
+
+static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
+{
+ struct net_device *dev = txq->priv->netdev;
+
+ /* If shutting down just return as queue state is irrelevant */
+ if (unlikely(dev->reg_state != NETREG_REGISTERED))
+ return;
+
+ /*
+ * When the queue has been drained to less than half full it will be
+ * restarted.
+ * The size of the txreq ring is fixed at initialization.
+ * The tx queue len can be adjusted upward while the interface is
+ * running.
+ * The tx queue len can be large enough to overflow the txreq_ring.
+ * Use the minimum of the current tx_queue_len or the rings max txreqs
+ * to protect against ring overflow.
+ */
+ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
+ atomic_xchg(&txq->ring_full, 0))
+ hfi1_ipoib_wake_txq(txq);
+}
+
+static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+ if (likely(!tx->sdma_status)) {
+ hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len);
+ } else {
+ ++priv->netdev->stats.tx_errors;
+ dd_dev_warn(priv->dd,
+ "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
+ __func__, tx->sdma_status,
+ le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx,
+ tx->txq->sde->this_idx);
+ }
+
+ napi_consume_skb(tx->skb, budget);
+ sdma_txclean(priv->dd, &tx->txreq);
+ kmem_cache_free(priv->txreq_cache, tx);
+}
+
+static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget)
+{
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ unsigned long head;
+ unsigned long tail;
+ unsigned int max_tx;
+ int work_done;
+ int tx_count;
+
+ spin_lock_bh(&tx_ring->consumer_lock);
+
+ /* Read index before reading contents at that index. */
+ head = smp_load_acquire(&tx_ring->head);
+ tail = tx_ring->tail;
+ max_tx = tx_ring->max_items;
+
+ work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget);
+
+ for (tx_count = work_done; tx_count; tx_count--) {
+ hfi1_ipoib_free_tx(tx_ring->items[tail], budget);
+ tail = CIRC_NEXT(tail, max_tx);
+ }
+
+ atomic64_add(work_done, &txq->complete_txreqs);
+
+ /* Finished freeing tx items so store the tail value. */
+ smp_store_release(&tx_ring->tail, tail);
+
+ spin_unlock_bh(&tx_ring->consumer_lock);
+
+ hfi1_ipoib_check_queue_stopped(txq);
+
+ return work_done;
+}
+
+static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev);
+ struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis];
+
+ int work_done = hfi1_ipoib_drain_tx_ring(txq, budget);
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+
+ return work_done;
+}
+
+static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx)
+{
+ struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring;
+ unsigned long head;
+ unsigned long tail;
+ size_t max_tx;
+
+ spin_lock(&tx_ring->producer_lock);
+
+ head = tx_ring->head;
+ tail = READ_ONCE(tx_ring->tail);
+ max_tx = tx_ring->max_items;
+
+ if (likely(CIRC_SPACE(head, tail, max_tx))) {
+ tx_ring->items[head] = tx;
+
+ /* Finish storing txreq before incrementing head. */
+ smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx));
+ napi_schedule(tx->txq->napi);
+ } else {
+ struct hfi1_ipoib_txq *txq = tx->txq;
+ struct hfi1_ipoib_dev_priv *priv = tx->priv;
+
+ /* Ring was full */
+ hfi1_ipoib_free_tx(tx, 0);
+ atomic64_inc(&txq->complete_txreqs);
+ dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx);
+ }
+
+ spin_unlock(&tx_ring->producer_lock);
+}
+
+static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
+{
+ struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+
+ tx->sdma_status = status;
+
+ hfi1_ipoib_add_tx(tx);
+}
+
+static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct sk_buff *skb = tx->skb;
+ int ret = 0;
+ int i;
+
+ if (skb_headlen(skb)) {
+ ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
+ if (unlikely(ret))
+ return ret;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ ret = sdma_txadd_page(dd,
+ txreq,
+ skb_frag_page(frag),
+ frag->bv_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ break;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ u16 pkt_bytes =
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
+ int ret;
+
+ ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
+ if (unlikely(ret))
+ return ret;
+
+ /* add pbc + headers */
+ ret = sdma_txadd_kvaddr(dd,
+ txreq,
+ sdma_hdr,
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
+ if (unlikely(ret))
+ return ret;
+
+ /* add the ulp payload */
+ return hfi1_ipoib_build_ulp_payload(tx, txp);
+}
+
+static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->priv;
+ struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
+ struct sk_buff *skb = tx->skb;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
+ struct rdma_ah_attr *ah_attr = txp->ah_attr;
+ struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
+ u16 dwords;
+ u16 slid;
+ u16 dlid;
+ u16 lrh0;
+ u32 bth0;
+ u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
+ priv->netdev->dev_addr[2] << 8 |
+ priv->netdev->dev_addr[3]);
+ u16 payload_dwords;
+ u8 pad_cnt;
+
+ pad_cnt = -skb->len & 3;
+
+ /* Includes ICRC */
+ payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
+
+ /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+ txp->hdr_dwords = 7;
+
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ grh = &sdma_hdr->hdr.ibh.u.l.grh;
+ txp->hdr_dwords +=
+ hfi1_make_grh(txp->ibp,
+ grh,
+ rdma_ah_read_grh(ah_attr),
+ txp->hdr_dwords - LRH_9B_DWORDS,
+ payload_dwords);
+ lrh0 = HFI1_LRH_GRH;
+ ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
+ } else {
+ lrh0 = HFI1_LRH_BTH;
+ ohdr = &sdma_hdr->hdr.ibh.u.oth;
+ }
+
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+ lrh0 |= (txp->flow.sc5 & 0xf) << 12;
+
+ dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+ if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ } else {
+ u16 lid = (u16)ppd->lid;
+
+ if (lid) {
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
+ slid = lid;
+ } else {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ }
+ }
+
+ /* Includes ICRC */
+ dwords = txp->hdr_dwords + payload_dwords;
+
+ /* Build the lrh */
+ sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
+ hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
+
+ /* Build the bth */
+ bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
+
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(txp->dqpn);
+ ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs));
+
+ /* Build the deth */
+ ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
+ HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
+
+ /* Construct the pbc. */
+ sdma_hdr->pbc =
+ cpu_to_le64(create_pbc(ppd,
+ ib_is_sc5(txp->flow.sc5) <<
+ PBC_DC_INFO_SHIFT,
+ 0,
+ sc_to_vlt(priv->dd, txp->flow.sc5),
+ dwords - SIZE_OF_CRC +
+ (sizeof(sdma_hdr->pbc) >> 2)));
+}
+
+static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct ipoib_txreq *tx;
+ int ret;
+
+ tx = kmem_cache_alloc_node(priv->txreq_cache,
+ GFP_ATOMIC,
+ priv->dd->node);
+ if (unlikely(!tx))
+ return ERR_PTR(-ENOMEM);
+
+ /* so that we can test if the sdma descriptors are there */
+ tx->txreq.num_desc = 0;
+ tx->priv = priv;
+ tx->txq = txp->txq;
+ tx->skb = skb;
+ INIT_LIST_HEAD(&tx->txreq.list);
+
+ hfi1_ipoib_build_ib_tx_headers(tx, txp);
+
+ ret = hfi1_ipoib_build_tx_desc(tx, txp);
+ if (likely(!ret)) {
+ if (txp->txq->flow.as_int != txp->flow.as_int) {
+ txp->txq->flow.tx_queue = txp->flow.tx_queue;
+ txp->txq->flow.sc5 = txp->flow.sc5;
+ txp->txq->sde =
+ sdma_select_engine_sc(priv->dd,
+ txp->flow.tx_queue,
+ txp->flow.sc5);
+ }
+
+ return tx;
+ }
+
+ sdma_txclean(priv->dd, &tx->txreq);
+ kmem_cache_free(priv->txreq_cache, tx);
+
+ return ERR_PTR(ret);
+}
+
+static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret;
+ u16 count_out;
+
+ ret = sdma_send_txlist(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &txq->tx_list,
+ &count_out);
+ if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
+ return ret;
+
+ dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
+
+ return ret;
+}
+
+static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret = 0;
+
+ if (!list_empty(&txq->tx_list)) {
+ /* Flush the current list */
+ ret = hfi1_ipoib_submit_tx_list(dev, txq);
+
+ if (unlikely(ret))
+ if (ret != -EBUSY)
+ ++dev->stats.tx_carrier_errors;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
+ struct ipoib_txreq *tx)
+{
+ int ret;
+
+ ret = sdma_send_txreq(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &tx->txreq,
+ txq->pkts_sent);
+ if (likely(!ret)) {
+ txq->pkts_sent = true;
+ iowait_starve_clear(txq->pkts_sent, &txq->wait);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_send_dma_single(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct ipoib_txreq *tx;
+ int ret;
+
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ ret = hfi1_ipoib_submit_tx(txq, tx);
+ if (likely(!ret)) {
+tx_ok:
+ trace_sdma_output_ibhdr(tx->priv->dd,
+ &tx->sdma_hdr.hdr,
+ ib_is_sc5(txp->flow.sc5));
+ hfi1_ipoib_check_queue_depth(txq);
+ return NETDEV_TX_OK;
+ }
+
+ txq->pkts_sent = false;
+
+ if (ret == -EBUSY || ret == -ECOMM)
+ goto tx_ok;
+
+ sdma_txclean(priv->dd, &tx->txreq);
+ dev_kfree_skb_any(skb);
+ kmem_cache_free(priv->txreq_cache, tx);
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+}
+
+static int hfi1_ipoib_send_dma_list(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct ipoib_txreq *tx;
+
+ /* Has the flow change ? */
+ if (txq->flow.as_int != txp->flow.as_int) {
+ int ret;
+
+ ret = hfi1_ipoib_flush_tx_list(dev, txq);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY)
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+ }
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+ hfi1_ipoib_check_queue_depth(txq);
+
+ trace_sdma_output_ibhdr(tx->priv->dd,
+ &tx->sdma_hdr.hdr,
+ ib_is_sc5(txp->flow.sc5));
+
+ if (!netdev_xmit_more())
+ (void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+ return NETDEV_TX_OK;
+}
+
+static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
+{
+ if (skb_transport_header_was_set(skb)) {
+ u8 *hdr = (u8 *)skb_transport_header(skb);
+
+ return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
+ }
+
+ return (u8)skb_get_queue_mapping(skb);
+}
+
+int hfi1_ipoib_send_dma(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct ipoib_txparms txp;
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
+ dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
+ skb->len,
+ rn->mtu + HFI1_IPOIB_ENCAP_LEN);
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ txp.dd = priv->dd;
+ txp.ah_attr = &ibah_to_rvtah(address)->attr;
+ txp.ibp = to_iport(priv->device, priv->port_num);
+ txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
+ txp.dqpn = dqpn;
+ txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
+ txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
+ txp.entropy = hfi1_ipoib_calc_entropy(skb);
+
+ if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
+ return hfi1_ipoib_send_dma_list(dev, skb, &txp);
+
+ return hfi1_ipoib_send_dma_single(dev, skb, &txp);
+}
+
+/*
+ * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *txreq,
+ uint seq,
+ bool pkts_sent)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait->iow, struct hfi1_ipoib_txq, wait);
+
+ write_seqlock(&sde->waitlock);
+
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
+
+ if (list_empty(&txreq->list))
+ /* came from non-list submit */
+ list_add_tail(&txreq->list, &txq->tx_list);
+ if (list_empty(&txq->wait.list)) {
+ if (!atomic_xchg(&txq->no_desc, 1))
+ hfi1_ipoib_stop_txq(txq);
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EBUSY;
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EINVAL;
+}
+
+/*
+ * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ */
+static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
+ iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
+}
+
+static void hfi1_ipoib_flush_txq(struct work_struct *work)
+{
+ struct iowait_work *ioww =
+ container_of(work, struct iowait_work, iowork);
+ struct iowait *wait = iowait_ioww_to_iow(ioww);
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+ struct net_device *dev = txq->priv->netdev;
+
+ if (likely(dev->reg_state == NETREG_REGISTERED) &&
+ likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
+ if (atomic_xchg(&txq->no_desc, 0))
+ hfi1_ipoib_wake_txq(txq);
+}
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
+{
+ struct net_device *dev = priv->netdev;
+ char buf[HFI1_IPOIB_TXREQ_NAME_LEN];
+ unsigned long tx_ring_size;
+ int i;
+
+ /*
+ * Ring holds 1 less than tx_ring_size
+ * Round up to next power of 2 in order to hold at least tx_queue_len
+ */
+ tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1);
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit);
+ priv->txreq_cache = kmem_cache_create(buf,
+ sizeof(struct ipoib_txreq),
+ 0,
+ 0,
+ NULL);
+ if (!priv->txreq_cache)
+ return -ENOMEM;
+
+ priv->tx_napis = kcalloc_node(dev->num_tx_queues,
+ sizeof(struct napi_struct),
+ GFP_KERNEL,
+ priv->dd->node);
+ if (!priv->tx_napis)
+ goto free_txreq_cache;
+
+ priv->txqs = kcalloc_node(dev->num_tx_queues,
+ sizeof(struct hfi1_ipoib_txq),
+ GFP_KERNEL,
+ priv->dd->node);
+ if (!priv->txqs)
+ goto free_tx_napis;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ iowait_init(&txq->wait,
+ 0,
+ hfi1_ipoib_flush_txq,
+ NULL,
+ hfi1_ipoib_sdma_sleep,
+ hfi1_ipoib_sdma_wakeup,
+ NULL,
+ NULL);
+ txq->priv = priv;
+ txq->sde = NULL;
+ INIT_LIST_HEAD(&txq->tx_list);
+ atomic64_set(&txq->complete_txreqs, 0);
+ atomic_set(&txq->stops, 0);
+ atomic_set(&txq->ring_full, 0);
+ atomic_set(&txq->no_desc, 0);
+ txq->q_idx = i;
+ txq->flow.tx_queue = 0xff;
+ txq->flow.sc5 = 0xff;
+ txq->pkts_sent = false;
+
+ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+ priv->dd->node);
+
+ txq->tx_ring.items =
+ kcalloc_node(tx_ring_size,
+ sizeof(struct ipoib_txreq *),
+ GFP_KERNEL, priv->dd->node);
+ if (!txq->tx_ring.items)
+ goto free_txqs;
+
+ spin_lock_init(&txq->tx_ring.producer_lock);
+ spin_lock_init(&txq->tx_ring.consumer_lock);
+ txq->tx_ring.max_items = tx_ring_size;
+
+ txq->napi = &priv->tx_napis[i];
+ netif_tx_napi_add(dev, txq->napi,
+ hfi1_ipoib_process_tx_ring,
+ NAPI_POLL_WEIGHT);
+ }
+
+ return 0;
+
+free_txqs:
+ for (i--; i >= 0; i--) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ netif_napi_del(txq->napi);
+ kfree(txq->tx_ring.items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+
+free_tx_napis:
+ kfree(priv->tx_napis);
+ priv->tx_napis = NULL;
+
+free_txreq_cache:
+ kmem_cache_destroy(priv->txreq_cache);
+ priv->txreq_cache = NULL;
+ return -ENOMEM;
+}
+
+static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
+{
+ struct sdma_txreq *txreq;
+ struct sdma_txreq *txreq_tmp;
+ atomic64_t *complete_txreqs = &txq->complete_txreqs;
+
+ list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
+ struct ipoib_txreq *tx =
+ container_of(txreq, struct ipoib_txreq, txreq);
+
+ list_del(&txreq->list);
+ sdma_txclean(txq->priv->dd, &tx->txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(txq->priv->txreq_cache, tx);
+ atomic64_inc(complete_txreqs);
+ }
+
+ if (hfi1_ipoib_used(txq))
+ dd_dev_warn(txq->priv->dd,
+ "txq %d not empty found %llu requests\n",
+ txq->q_idx,
+ hfi1_ipoib_txreqs(txq->sent_txreqs,
+ atomic64_read(complete_txreqs)));
+}
+
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->netdev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ iowait_cancel_work(&txq->wait);
+ iowait_sdma_drain(&txq->wait);
+ hfi1_ipoib_drain_tx_list(txq);
+ netif_napi_del(txq->napi);
+ (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+ kfree(txq->tx_ring.items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+
+ kfree(priv->tx_napis);
+ priv->tx_napis = NULL;
+
+ kmem_cache_destroy(priv->txreq_cache);
+ priv->txreq_cache = NULL;
+}
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_enable(txq->napi);
+ }
+}
+
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_disable(txq->napi);
+ (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index d8ff063..3222e3a 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -721,7 +721,7 @@
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
- /* fall through */
+ fallthrough;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -1272,7 +1272,7 @@
case IB_PORT_NOP:
if (phys_state == IB_PORTPHYSSTATE_NOP)
break;
- /* FALLTHROUGH */
+ fallthrough;
case IB_PORT_DOWN:
if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
@@ -2300,7 +2300,6 @@
* can be changed from the default values
*/
case OPA_VLARB_PREEMPT_ELEMENTS:
- /* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
break;
@@ -2381,7 +2380,7 @@
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
__be64 port_vl_xmit_discards;
- } vls[0]; /* real array size defined by # bits set in vl_select_mask */
+ } vls[]; /* real array size defined by # bits set in vl_select_mask */
};
enum counter_selects {
@@ -2423,7 +2422,7 @@
__be16 attr_id;
__be16 err_reqlength; /* 1 bit, 8 res, 7 bit */
__be32 attr_mod;
- u8 data[0];
+ u8 data[];
};
#define MSK_LLI 0x000000f0
@@ -4170,7 +4169,7 @@
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4240,7 +4239,7 @@
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4915,16 +4914,11 @@
*/
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
- switch (in_mad->base_version) {
+ switch (in_mad->mad_hdr.base_version) {
case OPA_MGMT_BASE_VERSION:
- if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
- return IB_MAD_RESULT_FAILURE;
- }
return hfi1_process_opa_mad(ibdev, mad_flags, port,
in_wc, in_grh,
(struct opa_mad *)in_mad,
@@ -4932,10 +4926,8 @@
out_mad_size,
out_mad_pkey_index);
case IB_MGMT_BASE_VERSION:
- return hfi1_process_ib_mad(ibdev, mad_flags, port,
- in_wc, in_grh,
- (const struct ib_mad *)in_mad,
- (struct ib_mad *)out_mad);
+ return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+ in_grh, in_mad, out_mad);
default:
break;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 2f48e69..889e63d 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -165,7 +165,7 @@
} __packed ntc_2048;
};
- u8 class_data[0];
+ u8 class_data[];
};
#define IB_VLARB_LOWPRI_0_31 1
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 14d2a90..d213f65 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -1,4 +1,5 @@
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -48,23 +49,11 @@
#include <linux/rculist.h>
#include <linux/mmu_notifier.h>
#include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
#include "mmu_rb.h"
#include "trace.h"
-struct mmu_rb_handler {
- struct mmu_notifier mn;
- struct rb_root_cached root;
- void *ops_arg;
- spinlock_t lock; /* protect the RB tree */
- struct mmu_rb_ops *ops;
- struct mm_struct *mm;
- struct list_head lru_list;
- struct work_struct del_work;
- struct list_head del_list;
- struct workqueue_struct *wq;
-};
-
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
static int mmu_notifier_range_start(struct mmu_notifier *,
@@ -92,37 +81,36 @@
return PAGE_ALIGN(node->addr + node->len) - 1;
}
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+int hfi1_mmu_rb_register(void *ops_arg,
struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler)
{
- struct mmu_rb_handler *handlr;
+ struct mmu_rb_handler *h;
int ret;
- handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
- if (!handlr)
+ h = kzalloc(sizeof(*h), GFP_KERNEL);
+ if (!h)
return -ENOMEM;
- handlr->root = RB_ROOT_CACHED;
- handlr->ops = ops;
- handlr->ops_arg = ops_arg;
- INIT_HLIST_NODE(&handlr->mn.hlist);
- spin_lock_init(&handlr->lock);
- handlr->mn.ops = &mn_opts;
- handlr->mm = mm;
- INIT_WORK(&handlr->del_work, handle_remove);
- INIT_LIST_HEAD(&handlr->del_list);
- INIT_LIST_HEAD(&handlr->lru_list);
- handlr->wq = wq;
+ h->root = RB_ROOT_CACHED;
+ h->ops = ops;
+ h->ops_arg = ops_arg;
+ INIT_HLIST_NODE(&h->mn.hlist);
+ spin_lock_init(&h->lock);
+ h->mn.ops = &mn_opts;
+ INIT_WORK(&h->del_work, handle_remove);
+ INIT_LIST_HEAD(&h->del_list);
+ INIT_LIST_HEAD(&h->lru_list);
+ h->wq = wq;
- ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+ ret = mmu_notifier_register(&h->mn, current->mm);
if (ret) {
- kfree(handlr);
+ kfree(h);
return ret;
}
- *handler = handlr;
+ *handler = h;
return 0;
}
@@ -134,7 +122,7 @@
struct list_head del_list;
/* Unregister first so we don't get any more notifications. */
- mmu_notifier_unregister(&handler->mn, handler->mm);
+ mmu_notifier_unregister(&handler->mn, handler->mn.mm);
/*
* Make sure the wq delete handler is finished running. It will not
@@ -166,6 +154,10 @@
int ret = 0;
trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
+
+ if (current->mm != handler->mn.mm)
+ return -EPERM;
+
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
if (node) {
@@ -180,6 +172,7 @@
__mmu_int_rb_remove(mnode, &handler->root);
list_del(&mnode->list); /* remove from LRU list */
}
+ mnode->handler = handler;
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
return ret;
@@ -217,6 +210,9 @@
unsigned long flags;
bool ret = false;
+ if (current->mm != handler->mn.mm)
+ return ret;
+
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node) {
@@ -239,6 +235,9 @@
unsigned long flags;
bool stop = false;
+ if (current->mm != handler->mn.mm)
+ return;
+
INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
@@ -272,6 +271,9 @@
{
unsigned long flags;
+ if (current->mm != handler->mn.mm)
+ return;
+
/* Validity of handler and node pointers has been checked by caller. */
trace_hfi1_mmu_rb_remove(node->addr, node->len);
spin_lock_irqsave(&handler->lock, flags);
@@ -333,7 +335,7 @@
/*
* Work queue function to remove all nodes that have been queued up to
- * be removed. The key feature is that mm->mmap_sem is not being held
+ * be removed. The key feature is that mm->mmap_lock is not being held
* and the remove callback can sleep while taking it, if needed.
*/
static void handle_remove(struct work_struct *work)
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index f04cec1..423aacc 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -1,4 +1,5 @@
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -54,6 +55,7 @@
unsigned long len;
unsigned long __last;
struct rb_node node;
+ struct mmu_rb_handler *handler;
struct list_head list;
};
@@ -71,7 +73,19 @@
void *evict_arg, bool *stop);
};
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+struct mmu_rb_handler {
+ struct mmu_notifier mn;
+ struct rb_root_cached root;
+ void *ops_arg;
+ spinlock_t lock; /* protect the RB tree */
+ struct mmu_rb_ops *ops;
+ struct list_head lru_list;
+ struct work_struct del_work;
+ struct list_head del_list;
+ struct workqueue_struct *wq;
+};
+
+int hfi1_mmu_rb_register(void *ops_arg,
struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler);
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d920b16..d61ee85 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
+#include "netdev.h"
/**
* msix_initialize() - Calculate, request and configure MSIx IRQs
@@ -69,7 +70,7 @@
* one for each VNIC context
* ...any new IRQs should be added here.
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts;
if (total >= CCE_NUM_MSIX_VECTORS)
return -EINVAL;
@@ -115,13 +116,11 @@
*/
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
irq_handler_t handler, irq_handler_t thread,
- u32 idx, enum irq_type type)
+ enum irq_type type, const char *name)
{
unsigned long nr;
int irq;
int ret;
- const char *err_info;
- char name[MAX_NAME_SIZE];
struct hfi1_msix_entry *me;
/* Allocate an MSIx vector */
@@ -135,43 +134,15 @@
if (nr == dd->msix_info.max_requested)
return -ENOSPC;
- /* Specific verification and determine the name */
- switch (type) {
- case IRQ_GENERAL:
- /* general interrupt must be MSIx vector 0 */
- if (nr) {
- spin_lock(&dd->msix_info.msix_lock);
- __clear_bit(nr, dd->msix_info.in_use_msix);
- spin_unlock(&dd->msix_info.msix_lock);
- dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
- nr);
- return -EINVAL;
- }
- snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- break;
- case IRQ_SDMA:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
- dd->unit, idx);
- err_info = "sdma";
- break;
- case IRQ_RCVCTXT:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- break;
- case IRQ_OTHER:
- default:
+ if (type < IRQ_SDMA || type >= IRQ_OTHER)
return -EINVAL;
- }
- name[sizeof(name) - 1] = 0;
irq = pci_irq_vector(dd->pcidev, nr);
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
if (ret) {
dd_dev_err(dd,
- "%s: request for IRQ %d failed, MSIx %d, err %d\n",
- err_info, irq, idx, ret);
+ "%s: request for IRQ %d failed, MSIx %lx, err %d\n",
+ name, irq, nr, ret);
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
@@ -190,22 +161,19 @@
/* This is a request, so a failure is not fatal */
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+ dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret);
return nr;
}
-/**
- * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
- * @rcd: valid rcd context
- *
- */
-int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+ irq_handler_t handler,
+ irq_handler_t thread,
+ const char *name)
{
- int nr;
-
- nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
- receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+ rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
+ name);
if (nr < 0)
return nr;
@@ -222,6 +190,37 @@
}
/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+ receive_context_thread, name);
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * for netdev context
+ * @rcd: valid netdev contexti
+ */
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi,
+ NULL, name);
+}
+
+/**
* msix_request_smda_ira() - Helper for getting SDMA IRQ resources
* @sde: valid sdma engine
*
@@ -229,9 +228,12 @@
int msix_request_sdma_irq(struct sdma_engine *sde)
{
int nr;
+ char name[MAX_NAME_SIZE];
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ sde->dd->unit, sde->this_idx);
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
- sde->this_idx, IRQ_SDMA);
+ IRQ_SDMA, name);
if (nr < 0)
return nr;
sde->msix_intr = nr;
@@ -241,6 +243,32 @@
}
/**
+ * msix_request_general_irq(void) - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+ int nr;
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+ name);
+ if (nr < 0)
+ return nr;
+
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ msix_free_irq(dd, (u8)nr);
+ dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
* enable_sdma_src() - Helper to enable SDMA IRQ srcs
* @dd: valid devdata structure
* @i: index of SDMA engine
@@ -265,10 +293,9 @@
int msix_request_irqs(struct hfi1_devdata *dd)
{
int i;
- int ret;
+ int ret = msix_request_general_irq(dd);
- ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
- if (ret < 0)
+ if (ret)
return ret;
for (i = 0; i < dd->num_sdma; i++) {
@@ -345,15 +372,16 @@
}
/**
- * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * msix_netdev_syncrhonize_irq() - netdev IRQ synchronize
* @dd: valid devdata
*/
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ for (i = 0; i < ctxt_count; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
struct hfi1_msix_entry *me;
me = &dd->msix_info.msix_entries[rcd->msix_intr];
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index a514881..e63e944 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,11 +54,13 @@
int msix_initialize(struct hfi1_devdata *dd);
int msix_request_irqs(struct hfi1_devdata *dd);
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
-/* VNIC interface */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+/* Netdev interface */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
#endif
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644
index 0000000..947543a
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * dummy netdev. Both IPoIB and VNIC netdevices will be working on
+ * top of this device.
+ * @napi: napi object
+ * @priv: ptr to netdev_priv
+ * @rcd: ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+ struct napi_struct napi;
+ struct hfi1_netdev_priv *priv;
+ struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS 8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_priv: data required to setup and run HFI netdev.
+ * @dd: hfi1_devdata
+ * @rxq: pointer to dummy netdev receive queues.
+ * @num_rx_q: number of receive queues
+ * @rmt_index: first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled: atomic counter of netdevs enabling receive queues.
+ * When 0 NAPI will be disabled.
+ * @netdevs: atomic counter of netdevs using dummy netdev.
+ * When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_priv {
+ struct hfi1_devdata *dd;
+ struct hfi1_netdev_rxq *rxq;
+ int num_rx_q;
+ int rmt_start;
+ struct xarray dev_tbl;
+ /* count of enabled napi polls */
+ atomic_t enabled;
+ /* count of netdevs on top */
+ atomic_t netdevs;
+};
+
+static inline
+struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
+{
+ return (struct hfi1_netdev_priv *)&dev[1];
+}
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return priv->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return priv->rxq[ctxt].rcd;
+}
+
+static inline
+int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return priv->rmt_start;
+}
+
+static inline
+void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ priv->rmt_start = rmt_idx;
+}
+
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask);
+
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd);
+int hfi1_netdev_alloc(struct hfi1_devdata *dd);
+void hfi1_netdev_free(struct hfi1_devdata *dd);
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+/* chip.c */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget);
+
+#endif /* HFI1_NETDEV_H */
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644
index 0000000..ea95baa
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_priv *priv,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops;
+ struct hfi1_devdata *dd = priv->dd;
+ int ret;
+
+ uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
+ uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
+done:
+ return ret;
+}
+
+static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
+ if (ret < 0) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ /* Netdev contexts are always NO_RDMA_RTAIL */
+ uctxt->fast_handler = handle_receive_interrupt_napi_fp;
+ uctxt->slow_handler = handle_receive_interrupt_napi_sp;
+ hfi1_set_seq_cnt(uctxt, 1);
+ uctxt->is_vnic = true;
+
+ hfi1_stats.sps_ctxts++;
+
+ dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
+ *ctxt = uctxt;
+
+ return 0;
+}
+
+static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ flush_wc();
+
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+
+ if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
+ msix_free_irq(dd, uctxt->msix_intr);
+
+ uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
+
+ hfi1_stats.sps_ctxts--;
+
+ hfi1_free_ctxt(uctxt);
+}
+
+static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_priv *priv,
+ struct hfi1_ctxtdata **ctxt)
+{
+ int rc;
+ struct hfi1_devdata *dd = priv->dd;
+
+ rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_netdev_setup_ctxt(priv, *ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
+ hfi1_netdev_deallocate_ctxt(dd, *ctxt);
+ *ctxt = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
+ * @dd: device on which to allocate netdev contexts
+ * @available_contexts: count of available receive contexts
+ * @cpu_mask: mask of possible cpus to include for contexts
+ *
+ * Return: count of physical cores on a node or the remaining available recv
+ * contexts for netdev recv context usage up to the maximum of
+ * HFI1_MAX_NETDEV_CTXTS.
+ * A value of 0 can be returned when acceleration is explicitly turned off,
+ * a memory allocation error occurs or when there are no available contexts.
+ *
+ */
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask)
+{
+ cpumask_var_t node_cpu_mask;
+ unsigned int available_cpus;
+
+ if (!HFI1_CAP_IS_KSET(AIP))
+ return 0;
+
+ /* Always give user contexts priority over netdev contexts */
+ if (available_contexts == 0) {
+ dd_dev_info(dd, "No receive contexts available for netdevs.\n");
+ return 0;
+ }
+
+ if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
+ dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
+ return 0;
+ }
+
+ cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
+
+ available_cpus = cpumask_weight(node_cpu_mask);
+
+ free_cpumask_var(node_cpu_mask);
+
+ return min3(available_cpus, available_contexts,
+ (u32)HFI1_MAX_NETDEV_CTXTS);
+}
+
+static int hfi1_netdev_rxq_init(struct net_device *dev)
+{
+ int i;
+ int rc;
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+ struct hfi1_devdata *dd = priv->dd;
+
+ priv->num_rx_q = dd->num_netdev_contexts;
+ priv->rxq = kcalloc_node(priv->num_rx_q, sizeof(struct hfi1_netdev_rxq),
+ GFP_KERNEL, dd->node);
+
+ if (!priv->rxq) {
+ dd_dev_err(dd, "Unable to allocate netdev queue data\n");
+ return (-ENOMEM);
+ }
+
+ for (i = 0; i < priv->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+ rc = hfi1_netdev_allot_ctxt(priv, &rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+
+ hfi1_rcd_get(rxq->rcd);
+ rxq->priv = priv;
+ rxq->rcd->napi = &rxq->napi;
+ dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
+ i, rxq->rcd->ctxt);
+ /*
+ * Disable BUSY_POLL on this NAPI as this is not supported
+ * right now.
+ */
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
+ netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+ rc = msix_netdev_request_rcd_irq(rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+ }
+
+ return 0;
+
+bail_context_irq_failure:
+ dd_dev_err(dd, "Unable to allot receive context\n");
+ for (; i >= 0; i--) {
+ struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+ if (rxq->rcd) {
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+ }
+ kfree(priv->rxq);
+ priv->rxq = NULL;
+
+ return rc;
+}
+
+static void hfi1_netdev_rxq_deinit(struct net_device *dev)
+{
+ int i;
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dev);
+ struct hfi1_devdata *dd = priv->dd;
+
+ for (i = 0; i < priv->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+ netif_napi_del(&rxq->napi);
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+
+ kfree(priv->rxq);
+ priv->rxq = NULL;
+ priv->num_rx_q = 0;
+}
+
+static void enable_queues(struct hfi1_netdev_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+ dd_dev_info(priv->dd, "enabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+ napi_enable(&rxq->napi);
+ hfi1_rcvctrl(priv->dd,
+ HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
+ rxq->rcd);
+ }
+}
+
+static void disable_queues(struct hfi1_netdev_priv *priv)
+{
+ int i;
+
+ msix_netdev_synchronize_irq(priv->dd);
+
+ for (i = 0; i < priv->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &priv->rxq[i];
+
+ dd_dev_info(priv->dd, "disabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+
+ /* wait for napi if it was scheduled */
+ hfi1_rcvctrl(priv->dd,
+ HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
+ rxq->rcd);
+ napi_synchronize(&rxq->napi);
+ napi_disable(&rxq->napi);
+ }
+}
+
+/**
+ * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
+ * it allocates receive queue data and calls netif_napi_add
+ * for each queue.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+ int res;
+
+ if (atomic_fetch_inc(&priv->netdevs))
+ return 0;
+
+ mutex_lock(&hfi1_mutex);
+ init_dummy_netdev(dd->dummy_netdev);
+ res = hfi1_netdev_rxq_init(dd->dummy_netdev);
+ mutex_unlock(&hfi1_mutex);
+ return res;
+}
+
+/**
+ * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
+ * napi is deleted and receive queses memory is freed.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ /* destroy the RX queues only if it is the last netdev going away */
+ if (atomic_fetch_add_unless(&priv->netdevs, -1, 0) == 1) {
+ mutex_lock(&hfi1_mutex);
+ hfi1_netdev_rxq_deinit(dd->dummy_netdev);
+ mutex_unlock(&hfi1_mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * hfi1_netdev_alloc - Allocates netdev and private data. It is required
+ * because RMT index and MSI-X interrupt can be set only
+ * during driver initialization.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_alloc(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv;
+ const int netdev_size = sizeof(*dd->dummy_netdev) +
+ sizeof(struct hfi1_netdev_priv);
+
+ dd_dev_info(dd, "allocating netdev size %d\n", netdev_size);
+ dd->dummy_netdev = kcalloc_node(1, netdev_size, GFP_KERNEL, dd->node);
+
+ if (!dd->dummy_netdev)
+ return -ENOMEM;
+
+ priv = hfi1_netdev_priv(dd->dummy_netdev);
+ priv->dd = dd;
+ xa_init(&priv->dev_tbl);
+ atomic_set(&priv->enabled, 0);
+ atomic_set(&priv->netdevs, 0);
+
+ return 0;
+}
+
+void hfi1_netdev_free(struct hfi1_devdata *dd)
+{
+ if (dd->dummy_netdev) {
+ dd_dev_info(dd, "hfi1 netdev freed\n");
+ kfree(dd->dummy_netdev);
+ dd->dummy_netdev = NULL;
+ }
+}
+
+/**
+ * hfi1_netdev_enable_queues - This is napi enable function.
+ * It enables napi objects associated with queues.
+ * When at least one device has called it it increments atomic counter.
+ * Disable function decrements counter and when it is 0,
+ * calls napi_disable for every queue.
+ *
+ * @dd: hfi1 dev data
+ */
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv;
+
+ if (!dd->dummy_netdev)
+ return;
+
+ priv = hfi1_netdev_priv(dd->dummy_netdev);
+ if (atomic_fetch_inc(&priv->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ enable_queues(priv);
+ mutex_unlock(&hfi1_mutex);
+}
+
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_priv *priv;
+
+ if (!dd->dummy_netdev)
+ return;
+
+ priv = hfi1_netdev_priv(dd->dummy_netdev);
+ if (atomic_dec_if_positive(&priv->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ disable_queues(priv);
+ mutex_unlock(&hfi1_mutex);
+}
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return xa_insert(&priv->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return xa_erase(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+
+ return xa_load(&priv->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+ struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
+ unsigned long index = *start_id;
+ void *ret;
+
+ ret = xa_find(&priv->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+ *start_id = (int)index;
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 61362bd..18d32f0 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -161,7 +161,7 @@
return -EINVAL;
}
- dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+ dd->kregbase1 = ioremap(addr, RCV_ARRAY);
if (!dd->kregbase1) {
dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
@@ -179,7 +179,7 @@
dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
- dd->kregbase2 = ioremap_nocache(
+ dd->kregbase2 = ioremap(
addr + dd->base2_start,
TXE_PIO_SEND - dd->base2_start);
if (!dd->kregbase2) {
@@ -306,7 +306,7 @@
ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
@@ -334,10 +334,14 @@
return 0;
}
-/* restore command and BARs after a reset has wiped them out */
+/**
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int restore_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
if (ret)
@@ -386,13 +390,17 @@
error:
dd_dev_err(dd, "Unable to write to PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
-/* Save BARs and command to rewrite after device reset */
+/**
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int save_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
&dd->pcibar0);
@@ -441,7 +449,7 @@
error:
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 79126b2..1cd8f80 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -86,7 +86,7 @@
switch (op) {
case PSC_GLOBAL_ENABLE:
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
- /* Fall through */
+ fallthrough;
case PSC_DATA_VL_ENABLE:
mask = 0;
for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
@@ -920,6 +920,7 @@
{
u64 reg;
struct pio_buf *pbuf;
+ LIST_HEAD(wake_list);
if (!sc)
return;
@@ -954,19 +955,21 @@
spin_unlock(&sc->release_lock);
write_seqlock(&sc->waitlock);
- while (!list_empty(&sc->piowait)) {
+ if (!list_empty(&sc->piowait))
+ list_move(&sc->piowait, &wake_list);
+ write_sequnlock(&sc->waitlock);
+ while (!list_empty(&wake_list)) {
struct iowait *wait;
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
- wait = list_first_entry(&sc->piowait, struct iowait, list);
+ wait = list_first_entry(&wake_list, struct iowait, list);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}
- write_sequnlock(&sc->waitlock);
spin_unlock_irq(&sc->alloc_lock);
}
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index c9a58b6..0102262 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -243,7 +243,7 @@
*/
struct pio_map_elem {
u32 mask;
- struct send_context *ksc[0];
+ struct send_context *ksc[];
};
/*
@@ -263,7 +263,7 @@
u32 mask;
u8 actual_vls;
u8 vls;
- struct pio_map_elem *map[0];
+ struct pio_map_elem *map[];
};
int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024ce..4a4ec23 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -191,25 +191,24 @@
switch (n) {
case 7:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 6:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 5:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 4:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 3:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 2:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 1:
*dest++ = *src++;
- /* fall through */
}
}
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index cbf7faa..4642d6c 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -634,7 +634,7 @@
u32 config_data, const char *message)
{
u8 i;
- int ret = HCMD_SUCCESS;
+ int ret;
for (i = 0; i < 4; i++) {
ret = load_8051_config(ppd->dd, field_id, i, config_data);
@@ -668,8 +668,8 @@
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -899,8 +899,8 @@
*ptr_tuning_method = OPA_PASSIVE_TUNING;
break;
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
ptr_total_atten);
@@ -909,7 +909,7 @@
*ptr_tuning_method = OPA_ACTIVE_TUNING;
break;
- case 0xD: /* fallthrough */
+ case 0xD: fallthrough;
case 0xF:
default:
dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index acd4400..356518e 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2019 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -186,15 +186,6 @@
write_sequnlock_irqrestore(lock, flags);
}
-static inline int opa_mtu_enum_to_int(int mtu)
-{
- switch (mtu) {
- case OPA_MTU_8192: return 8192;
- case OPA_MTU_10240: return 10240;
- default: return -1;
- }
-}
-
/**
* This function is what we would push to the core layer if we wanted to be a
* "first class citizen". Instead we hide this here and rely on Verbs ULPs
@@ -202,15 +193,10 @@
*/
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
- int val;
-
/* Constraining 10KB packets to 8KB packets */
if (mtu == (enum ib_mtu)OPA_MTU_10240)
- mtu = OPA_MTU_8192;
- val = opa_mtu_enum_to_int((int)mtu);
- if (val > 0)
- return val;
- return ib_mtu_enum_to_int(mtu);
+ mtu = (enum ib_mtu)OPA_MTU_8192;
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
}
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -326,7 +312,7 @@
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
- /* fall through */
+ fallthrough;
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b670321..b0d053d 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -113,20 +113,6 @@
}
/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-
-/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index b596699..8386c84 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -231,7 +231,7 @@
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -279,7 +279,7 @@
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a3c647..1bb5f57 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -141,7 +141,7 @@
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
release_rdma_sge_mr(e);
- /* FALLTHROUGH */
+ fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
@@ -160,7 +160,7 @@
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
@@ -267,7 +267,7 @@
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -881,8 +881,7 @@
goto bail;
}
qp->s_num_rd_atomic++;
-
- /* FALLTHROUGH */
+ fallthrough;
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
@@ -946,10 +945,10 @@
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -991,10 +990,10 @@
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -2599,7 +2598,7 @@
* to be sent before sending this one.
*/
e = NULL;
- old_req = 1;
+ old_req = true;
ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -2901,7 +2900,7 @@
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
@@ -2941,7 +2940,7 @@
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
+ fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
@@ -2957,7 +2956,7 @@
goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
@@ -3010,7 +3009,7 @@
case OP(RDMA_WRITE_ONLY):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 248be21..0b73dc7 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -232,11 +232,11 @@
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
@@ -545,9 +545,10 @@
schedule_work(&sde->err_halt_worker);
}
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t,
+ sdma_hw_clean_up_task);
u64 statuscsr;
while (1) {
@@ -604,9 +605,9 @@
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
@@ -833,7 +834,7 @@
struct sdma_rht_map_elem {
u32 mask;
u8 ctr;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
struct sdma_rht_node {
@@ -848,7 +849,7 @@
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
@@ -879,10 +880,10 @@
if (current->nr_cpus_allowed != 1)
goto out;
- cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
- sdma_rht_params);
+ cpu_id = smp_processor_id();
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];
@@ -1454,11 +1455,10 @@
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
-
- tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ tasklet_setup(&sde->sdma_hw_clean_up_task,
+ sdma_hw_clean_up_task);
+ tasklet_setup(&sde->sdma_sw_clean_up_task,
+ sdma_sw_clean_up_task);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
@@ -2584,7 +2584,7 @@
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through -- and start dma engine */
+ fallthrough; /* and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -2726,7 +2726,6 @@
case sdma_event_e70_go_idle:
break;
case sdma_event_e85_link_down:
- /* fall through */
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3007,7 +3006,7 @@
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
- /* fall through */
+ fallthrough;
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
@@ -3021,7 +3020,7 @@
break;
case sdma_event_e85_link_down:
ss->go_s99_running = 0;
- /* fall through */
+ fallthrough;
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3251,7 +3250,7 @@
tx->num_desc++;
tx->descs[2].qw[0] = 0;
tx->descs[2].qw[1] = 0;
- /* FALLTHROUGH */
+ fallthrough;
case SDMA_AHG_APPLY_UPDATE2:
tx->num_desc++;
tx->descs[1].qw[0] = 0;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 1e2e40f..7a85119 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1002,7 +1002,7 @@
*/
struct sdma_map_elem {
u32 mask;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
/**
@@ -1024,7 +1024,7 @@
u32 mask;
u8 actual_vls;
u8 vls;
- struct sdma_map_elem *map[0];
+ struct sdma_map_elem *map[];
};
int sdma_map_init(
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index c018fc6..73d197e 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*
*/
@@ -194,7 +194,7 @@
{
struct hfi1_qp_priv *priv = qp->priv;
- p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+ p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
p->jkey = priv->rcd->jkey;
p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
@@ -3228,7 +3228,7 @@
case IB_WR_RDMA_READ:
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
- /* fall through */
+ fallthrough;
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
@@ -5068,7 +5068,7 @@
if (priv->s_state == TID_OP(WRITE_REQ))
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
priv->s_state = TID_OP(WRITE_DATA);
- /* fall through */
+ fallthrough;
case TID_OP(WRITE_DATA):
/*
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 9a3d236..b219ea9 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -47,6 +47,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
+#include "ipoib.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@@ -126,6 +127,7 @@
#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
@@ -444,6 +446,12 @@
break;
/* deth */
case OP(UD, SEND_ONLY):
+ trace_seq_printf(p, DETH_ENTROPY_PRN,
+ be32_to_cpu(eh->ud.deth[0]),
+ be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
+ be32_to_cpu(eh->ud.deth[1]) >>
+ HFI1_IPOIB_ENTROPY_SHIFT);
+ break;
case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN,
be32_to_cpu(eh->ud.deth[0]),
@@ -512,6 +520,38 @@
return EXP_TID_GET(ent, IDX);
}
+struct hfi1_ctxt_hist {
+ atomic_t count;
+ atomic_t data[255];
+};
+
+struct hfi1_ctxt_hist hist = {
+ .count = ATOMIC_INIT(0)
+};
+
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt)
+{
+ int i, len = ARRAY_SIZE(hist.data);
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned long packet_count = atomic_fetch_inc(&hist.count);
+
+ trace_seq_printf(p, "packet[%lu]", packet_count);
+ for (i = 0; i < len; ++i) {
+ unsigned long val;
+ atomic_t *count = &hist.data[i];
+
+ if (ctxt == i)
+ val = atomic_fetch_inc(count);
+ else
+ val = atomic_read(count);
+
+ if (val)
+ trace_seq_printf(p, "(%d:%lu)", i, val);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index e00c8a7..d8c168d 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -80,7 +80,7 @@
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
@@ -138,6 +138,15 @@
)
);
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt);
+TRACE_EVENT(ctxt_rsm_hist,
+ TP_PROTO(unsigned int ctxt),
+ TP_ARGS(ctxt),
+ TP_STRUCT__entry(__field(unsigned int, ctxt)),
+ TP_fast_assign(__entry->ctxt = ctxt;),
+ TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt))
+);
+
#endif /* __HFI1_TRACE_CTXTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 3cec960..168079e 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -106,19 +106,8 @@
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = rcd->ctxt;
- if (rcd->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
+ __entry->slow_path = hfi1_is_slowpath(rcd);
+ __entry->dma_rtail = get_dma_rtail_setting(rcd);
),
TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
__get_str(dev),
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index 343fb98..985ffa9 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -138,10 +138,10 @@
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd)
- __field(unsigned long, pa);
- __field(u32, index);
- __field(u32, type);
- __field(u16, order);
+ __field(unsigned long, pa)
+ __field(u32, index)
+ __field(u32, type)
+ __field(u16, order)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd);
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 09eb0c9..769e5e4 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -588,7 +588,7 @@
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
TP_ARGS(dd, ctxt, subctxt, i),
TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
+ DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u8, subctxt)
__field(u8, ver_opcode)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 0c77f18..1fb9183 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -216,7 +216,7 @@
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -241,7 +241,7 @@
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -414,7 +414,7 @@
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
/*
@@ -515,7 +515,7 @@
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4d73235..b94fc7f 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,4 +1,5 @@
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -59,11 +60,11 @@
struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
struct tid_group *grp,
unsigned int start, u16 count,
@@ -73,10 +74,8 @@
struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
-static struct mmu_rb_ops tid_rb_ops = {
- .insert = tid_rb_insert,
- .remove = tid_rb_remove,
- .invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
};
/*
@@ -87,7 +86,6 @@
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
fd->entry_to_rb = kcalloc(uctxt->expected_count,
@@ -106,20 +104,7 @@
fd->entry_to_rb = NULL;
return -ENOMEM;
}
-
- /*
- * Register MMU notifier callbacks. If the registration
- * fails, continue without TID caching for this context.
- */
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
- dd->pport->hfi1_wq,
- &fd->handler);
- if (ret) {
- dd_dev_info(dd,
- "Failed MMU notifier registration %d\n",
- ret);
- ret = 0;
- }
+ fd->use_mn = true;
}
/*
@@ -136,7 +121,7 @@
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && fd->handler) {
+ if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -155,20 +140,12 @@
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- /*
- * The notifier would have been removed when the process'es mm
- * was freed.
- */
- if (fd->handler) {
- hfi1_mmu_rb_unregister(fd->handler);
- } else {
- mutex_lock(&uctxt->exp_mutex);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- mutex_unlock(&uctxt->exp_mutex);
- }
+ mutex_lock(&uctxt->exp_mutex);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
@@ -197,15 +174,18 @@
{
struct page **pages;
struct hfi1_devdata *dd = fd->uctxt->dd;
+ struct mm_struct *mm;
if (mapped) {
pci_unmap_single(dd->pcidev, node->dma_addr,
- node->mmu.len, PCI_DMA_FROMDEVICE);
+ node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
pages = &node->pages[idx];
+ mm = mm_from_tid_node(node);
} else {
pages = &tidbuf->pages[idx];
+ mm = current->mm;
}
- hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+ hfi1_release_user_pages(mm, pages, npages, mapped);
fd->tid_n_pinned -= npages;
}
@@ -230,13 +210,6 @@
return -EINVAL;
}
- /* Verify that access is OK for the user buffer */
- if (!access_ok((void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- return -EFAULT;
- }
/* Allocate the array of struct page pointers needed for pinning */
pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
if (!pages)
@@ -247,12 +220,12 @@
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
kfree(pages);
return -ENOMEM;
}
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
if (pinned <= 0) {
kfree(pages);
return pinned;
@@ -776,8 +749,7 @@
return -EFAULT;
}
- node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
- node->mmu.len = npages * PAGE_SIZE;
+ node->fdata = fd;
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
@@ -786,23 +758,35 @@
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- if (!fd->handler)
- ret = tid_rb_insert(fd, &node->mmu);
- else
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
- if (ret) {
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->mmu.addr, node->phys, ret);
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
- kfree(node);
- return -EFAULT;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &node->notifier, current->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
+ /*
+ * FIXME: This is in the wrong order, the notifier should be
+ * established before the pages are pinned by pin_rcv_pages.
+ */
+ mmu_interval_read_begin(&node->notifier);
}
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
- node->mmu.addr, node->phys, phys);
+ node->notifier.interval_tree.start, node->phys,
+ phys);
return 0;
+
+out_unmap:
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->notifier.interval_tree.start,
+ node->phys, ret);
+ pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ kfree(node);
+ return -EFAULT;
}
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
@@ -832,10 +816,9 @@
if (grp)
*grp = node->grp;
- if (!fd->handler)
- cacheless_tid_rb_remove(fd, node);
- else
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
return 0;
}
@@ -846,7 +829,8 @@
struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->mmu.addr, node->phys,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
node->dma_addr);
/*
@@ -893,30 +877,29 @@
if (!node || node->rcventry != rcventry)
continue;
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(
+ &node->notifier);
cacheless_tid_rb_remove(fd, node);
}
}
}
}
-/*
- * Always return 0 from this function. A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM. Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
- container_of(mnode, struct tid_rb_node, mmu);
+ container_of(mni, struct tid_rb_node, notifier);
+ struct hfi1_filedata *fdata = node->fdata;
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed)
- return 0;
+ return true;
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
node->freed = true;
@@ -945,18 +928,7 @@
fdata->invalid_tid_idx++;
}
spin_unlock(&fdata->invalid_lock);
- return 0;
-}
-
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
- u32 base = fdata->uctxt->expected_base;
-
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
- return 0;
+ return true;
}
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -967,12 +939,3 @@
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode);
}
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
-
- cacheless_tid_rb_remove(fdata, tnode);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 43b105d..d45c7b6 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,6 +1,7 @@
#ifndef _HFI1_USER_EXP_RCV_H
#define _HFI1_USER_EXP_RCV_H
/*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -65,14 +66,15 @@
};
struct tid_rb_node {
- struct mmu_rb_node mmu;
+ struct mmu_interval_notifier notifier;
+ struct hfi1_filedata *fdata;
unsigned long phys;
struct tid_group *grp;
u32 rcventry;
dma_addr_t dma_addr;
bool freed;
unsigned int npages;
- struct page *pages[0];
+ struct page *pages[];
};
static inline int num_user_pages(unsigned long addr,
@@ -94,4 +96,9 @@
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
+static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
+{
+ return node->notifier.mm;
+}
+
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 469acb9..3b50500 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -106,7 +106,7 @@
int ret;
unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
- ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
+ ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
@@ -118,7 +118,7 @@
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- put_user_pages_dirty_lock(p, npages, dirty);
+ unpin_user_pages_dirty_lock(p, npages, dirty);
if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a92346e..4a4956f 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,4 +1,5 @@
/*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -188,7 +189,6 @@
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
- pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL, NULL);
@@ -230,7 +230,7 @@
cq->nentries = hfi1_sdma_comp_ring_size;
- ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+ ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
&pq->handler);
if (ret) {
dd_dev_err(dd, "Failed to register with MMU %d", ret);
@@ -980,13 +980,13 @@
npages -= node->npages;
retry:
- if (!hfi1_can_pin_pages(pq->dd, pq->mm,
+ if (!hfi1_can_pin_pages(pq->dd, current->mm,
atomic_read(&pq->n_locked), npages)) {
cleared = sdma_cache_evict(pq, npages);
if (cleared >= npages)
goto retry;
}
- pinned = hfi1_acquire_user_pages(pq->mm,
+ pinned = hfi1_acquire_user_pages(current->mm,
((unsigned long)iovec->iov.iov_base +
(node->npages * PAGE_SIZE)), npages, 0,
pages + node->npages);
@@ -995,7 +995,7 @@
return pinned;
}
if (pinned != npages) {
- unpin_vector_pages(pq->mm, pages, node->npages, pinned);
+ unpin_vector_pages(current->mm, pages, node->npages, pinned);
return -EFAULT;
}
kfree(node->pages);
@@ -1008,7 +1008,8 @@
static void unpin_sdma_pages(struct sdma_mmu_node *node)
{
if (node->npages) {
- unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
+ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+ node->npages);
atomic_sub(node->npages, &node->pq->n_locked);
}
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 9972e0e..1e8c02f 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,6 +1,7 @@
#ifndef _HFI1_USER_SDMA_H
#define _HFI1_USER_SDMA_H
/*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
@@ -133,7 +134,6 @@
unsigned long unpinned;
struct mmu_rb_handler *handler;
atomic_t n_locked;
- struct mm_struct *mm;
};
struct hfi1_user_sdma_comp_q {
@@ -250,4 +250,9 @@
struct iovec *iovec, unsigned long dim,
unsigned long *count);
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+ return node->rb.handler->mn.mm;
+}
+
#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2f6323a..3591923 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -66,6 +66,7 @@
#include "vnic.h"
#include "fault.h"
#include "affinity.h"
+#include "ipoib.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -1342,7 +1343,7 @@
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
IB_DEVICE_MEM_MGT_EXTENSIONS |
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
+ IB_DEVICE_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1360,7 +1361,6 @@
rdi->dparms.props.max_cq = hfi1_max_cqs;
rdi->dparms.props.max_ah = hfi1_max_ahs;
rdi->dparms.props.max_cqe = hfi1_max_cqes;
- rdi->dparms.props.max_map_per_fmr = 32767;
rdi->dparms.props.max_pd = hfi1_max_pds;
rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1424,7 +1424,7 @@
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
@@ -1439,6 +1439,8 @@
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+ props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
+ ib_mtu_enum_to_int(props->max_mtu);
return 0;
}
@@ -1793,6 +1795,7 @@
.modify_device = modify_device,
/* keep process mad in the driver */
.process_mad = hfi1_process_mad,
+ .rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
};
/**
@@ -1863,9 +1866,8 @@
dd->verbs_dev.rdi.dparms.qpn_start = 0;
dd->verbs_dev.rdi.dparms.qpn_inc = 1;
dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
- dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
- dd->verbs_dev.rdi.dparms.qpn_res_end =
- dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+ dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index ae9582d..d36e3e1 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -107,9 +107,9 @@
HFI1_HAS_GRH = (1 << 0),
};
-#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
-#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */
@@ -330,9 +330,8 @@
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
/*
* The PSN_MASK and PSN_SHIFT allow for
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index bfa6e08..d2d526c 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -91,7 +91,7 @@
tx->mr = NULL;
tx->sde = priv->s_sde;
tx->psc = priv->s_sendcontext;
- /* so that we can test if the sdma decriptors are there */
+ /* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 5ae7815..66150a1 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_VNIC_H
#define _HFI1_VNIC_H
/*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -69,6 +69,7 @@
#define HFI1_VNIC_SC_SHIFT 4
#define HFI1_VNIC_MAX_QUEUE 16
+#define HFI1_NUM_VNIC_CTXT 8
/**
* struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
@@ -104,7 +105,6 @@
struct hfi1_vnic_vport_info *vinfo;
struct net_device *netdev;
struct napi_struct napi;
- struct sk_buff_head skbq;
};
/**
@@ -146,7 +146,6 @@
/* vnic hfi1 internal functions */
void hfi1_vnic_setup(struct hfi1_devdata *dd);
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index b49e60e..a90824d 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2017 - 2018 Intel Corporation.
+ * Copyright(c) 2017 - 2020 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include <linux/if_vlan.h>
#include "vnic.h"
+#include "netdev.h"
#define HFI_TX_TIMEOUT_MS 1000
@@ -62,114 +63,6 @@
static DEFINE_SPINLOCK(vport_cntr_lock);
-static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
-{
- unsigned int rcvctrl_ops = 0;
- int ret;
-
- uctxt->do_interrupt = &handle_receive_interrupt;
-
- /* Now allocate the RcvHdr queue and eager buffers. */
- ret = hfi1_create_rcvhdrq(dd, uctxt);
- if (ret)
- goto done;
-
- ret = hfi1_setup_eagerbufs(uctxt);
- if (ret)
- goto done;
-
- if (uctxt->rcvhdrtail_kvaddr)
- clear_rcvhdrtail(uctxt);
-
- rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
- rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
-
- if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
- rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
- rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
- rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
- rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
-
- hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
-done:
- return ret;
-}
-
-static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata **vnic_ctxt)
-{
- struct hfi1_ctxtdata *uctxt;
- int ret;
-
- if (dd->flags & HFI1_FROZEN)
- return -EIO;
-
- ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
- if (ret < 0) {
- dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
- return -ENOMEM;
- }
-
- uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
- HFI1_CAP_KGET(NODROP_RHQ_FULL) |
- HFI1_CAP_KGET(NODROP_EGR_FULL) |
- HFI1_CAP_KGET(DMA_RTAIL);
- uctxt->seq_cnt = 1;
- uctxt->is_vnic = true;
-
- msix_request_rcd_irq(uctxt);
-
- hfi1_stats.sps_ctxts++;
- dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
- *vnic_ctxt = uctxt;
-
- return 0;
-}
-
-static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata *uctxt)
-{
- dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
- flush_wc();
-
- /*
- * Disable receive context and interrupt available, reset all
- * RcvCtxtCtrl bits to default values.
- */
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_TIDFLOW_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
- HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
-
- /* msix_intr will always be > 0, only clean up if this is true */
- if (uctxt->msix_intr)
- msix_free_irq(dd, uctxt->msix_intr);
-
- uctxt->event_flags = 0;
-
- hfi1_clear_tids(uctxt);
- hfi1_clear_ctxt_pkey(dd, uctxt);
-
- hfi1_stats.sps_ctxts--;
-
- hfi1_free_ctxt(uctxt);
-}
-
-void hfi1_vnic_setup(struct hfi1_devdata *dd)
-{
- xa_init(&dd->vnic.vesws);
-}
-
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
-{
- WARN_ON(!xa_empty(&dd->vnic.vesws));
-}
-
#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
u64 *src64, *dst64; \
for (src64 = &qstats->x_grp.unicast, \
@@ -179,6 +72,9 @@
} \
} while (0)
+#define VNIC_MASK (0xFF)
+#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
+
/* hfi1_vnic_update_stats - update statistics */
static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
struct opa_vnic_stats *stats)
@@ -454,71 +350,25 @@
return rc;
}
-static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
+ int vesw_id)
{
- unsigned char *pad_info;
- struct sk_buff *skb;
+ int vnic_id = VNIC_ID(vesw_id);
- skb = skb_dequeue(&rxq->skbq);
- if (unlikely(!skb))
+ return hfi1_netdev_get_data(dd, vnic_id);
+}
+
+static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
+{
+ struct hfi1_vnic_vport_info *vinfo;
+ int next_id = VNIC_ID(0);
+
+ vinfo = hfi1_netdev_get_first_data(dd, &next_id);
+
+ if (next_id > VNIC_ID(VNIC_MASK))
return NULL;
- /* remove tail padding and icrc */
- pad_info = skb->data + skb->len - 1;
- skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
- ((*pad_info) & 0x7)));
-
- return skb;
-}
-
-/* hfi1_vnic_handle_rx - handle skb receive */
-static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
- int *work_done, int work_to_do)
-{
- struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
- struct sk_buff *skb;
- int rc;
-
- while (1) {
- if (*work_done >= work_to_do)
- break;
-
- skb = hfi1_vnic_get_skb(rxq);
- if (unlikely(!skb))
- break;
-
- rc = hfi1_vnic_decap_skb(rxq, skb);
- /* update rx counters */
- hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
- if (unlikely(rc)) {
- dev_kfree_skb_any(skb);
- continue;
- }
-
- skb_checksum_none_assert(skb);
- skb->protocol = eth_type_trans(skb, rxq->netdev);
-
- napi_gro_receive(&rxq->napi, skb);
- (*work_done)++;
- }
-}
-
-/* hfi1_vnic_napi - napi receive polling callback function */
-static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
-{
- struct hfi1_vnic_rx_queue *rxq = container_of(napi,
- struct hfi1_vnic_rx_queue, napi);
- struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
- int work_done = 0;
-
- v_dbg("napi %d budget %d\n", rxq->idx, budget);
- hfi1_vnic_handle_rx(rxq, &work_done, budget);
-
- v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
- if (work_done < budget)
- napi_complete(napi);
-
- return work_done;
+ return vinfo;
}
void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
@@ -527,13 +377,14 @@
struct hfi1_vnic_vport_info *vinfo = NULL;
struct hfi1_vnic_rx_queue *rxq;
struct sk_buff *skb;
- int l4_type, vesw_id = -1;
+ int l4_type, vesw_id = -1, rc;
u8 q_idx;
+ unsigned char *pad_info;
l4_type = hfi1_16B_get_l4(packet->ebuf);
if (likely(l4_type == OPA_16B_L4_ETHR)) {
vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
- vinfo = xa_load(&dd->vnic.vesws, vesw_id);
+ vinfo = get_vnic_port(dd, vesw_id);
/*
* In case of invalid vesw id, count the error on
@@ -541,10 +392,8 @@
*/
if (unlikely(!vinfo)) {
struct hfi1_vnic_vport_info *vinfo_tmp;
- unsigned long index = 0;
- vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
- XA_PRESENT);
+ vinfo_tmp = get_first_vnic_port(dd);
if (vinfo_tmp) {
spin_lock(&vport_cntr_lock);
vinfo_tmp->stats[0].netstats.rx_nohandler++;
@@ -563,12 +412,6 @@
rxq = &vinfo->rxq[q_idx];
if (unlikely(!netif_oper_up(vinfo->netdev))) {
vinfo->stats[q_idx].rx_drop_state++;
- skb_queue_purge(&rxq->skbq);
- return;
- }
-
- if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
- vinfo->stats[q_idx].netstats.rx_fifo_errors++;
return;
}
@@ -580,62 +423,65 @@
memcpy(skb->data, packet->ebuf, packet->tlen);
skb_put(skb, packet->tlen);
- skb_queue_tail(&rxq->skbq, skb);
- if (napi_schedule_prep(&rxq->napi)) {
- v_dbg("napi %d scheduling\n", q_idx);
- __napi_schedule(&rxq->napi);
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ return;
}
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
}
static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
struct net_device *netdev = vinfo->netdev;
- int i, rc;
+ int rc;
/* ensure virtual eth switch id is valid */
if (!vinfo->vesw_id)
return -EINVAL;
- rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
+ rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
if (rc < 0)
return rc;
- for (i = 0; i < vinfo->num_rx_q; i++) {
- struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
- skb_queue_head_init(&rxq->skbq);
- napi_enable(&rxq->napi);
- }
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc)
+ goto err_remove;
netif_carrier_on(netdev);
netif_tx_start_all_queues(netdev);
set_bit(HFI1_VNIC_UP, &vinfo->flags);
return 0;
+
+err_remove:
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+ return rc;
}
static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- u8 i;
clear_bit(HFI1_VNIC_UP, &vinfo->flags);
netif_carrier_off(vinfo->netdev);
netif_tx_disable(vinfo->netdev);
- xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
- /* ensure irqs see the change */
- msix_vnic_synchronize_irq(dd);
-
- /* remove unread skbs */
- for (i = 0; i < vinfo->num_rx_q; i++) {
- struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
- napi_disable(&rxq->napi);
- skb_queue_purge(&rxq->skbq);
- }
+ hfi1_netdev_rx_destroy(dd);
}
static int hfi1_netdev_open(struct net_device *netdev)
@@ -660,70 +506,31 @@
return 0;
}
-static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata **vnic_ctxt)
-{
- int rc;
-
- rc = allocate_vnic_ctxt(dd, vnic_ctxt);
- if (rc) {
- dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
- return rc;
- }
-
- rc = setup_vnic_ctxt(dd, *vnic_ctxt);
- if (rc) {
- dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
- deallocate_vnic_ctxt(dd, *vnic_ctxt);
- *vnic_ctxt = NULL;
- }
-
- return rc;
-}
-
static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- int i, rc = 0;
+ int rc = 0;
mutex_lock(&hfi1_mutex);
- if (!dd->vnic.num_vports) {
+ if (!dd->vnic_num_vports) {
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
}
- for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
- rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
- if (rc)
- break;
- hfi1_rcd_get(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i]->vnic_q_idx = i;
- }
-
- if (i < vinfo->num_rx_q) {
- /*
- * If required amount of contexts is not
- * allocated successfully then remaining contexts
- * are released.
- */
- while (i-- > dd->vnic.num_ctxt) {
- deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
- hfi1_rcd_put(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i] = NULL;
- }
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc) {
+ dd_dev_err(dd, "Unable to initialize netdev contexts\n");
goto alloc_fail;
}
- if (dd->vnic.num_ctxt != i) {
- dd->vnic.num_ctxt = i;
- hfi1_init_vnic_rsm(dd);
- }
+ hfi1_init_vnic_rsm(dd);
- dd->vnic.num_vports++;
+ dd->vnic_num_vports++;
hfi1_vnic_sdma_init(vinfo);
+
alloc_fail:
- if (!dd->vnic.num_vports)
+ if (!dd->vnic_num_vports)
hfi1_vnic_txreq_deinit(dd);
txreq_fail:
mutex_unlock(&hfi1_mutex);
@@ -733,20 +540,14 @@
static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- int i;
mutex_lock(&hfi1_mutex);
- if (--dd->vnic.num_vports == 0) {
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
- hfi1_rcd_put(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i] = NULL;
- }
+ if (--dd->vnic_num_vports == 0) {
hfi1_deinit_vnic_rsm(dd);
- dd->vnic.num_ctxt = 0;
hfi1_vnic_txreq_deinit(dd);
}
mutex_unlock(&hfi1_mutex);
+ hfi1_netdev_rx_destroy(dd);
}
static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
@@ -804,7 +605,7 @@
struct rdma_netdev *rn;
int i, size, rc;
- if (!dd->num_vnic_contexts)
+ if (!dd->num_netdev_contexts)
return ERR_PTR(-ENOMEM);
if (!port_num || (port_num > dd->num_pports))
@@ -815,15 +616,16 @@
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- dd->num_sdma, dd->num_vnic_contexts);
+ chip_sdma_engines(dd),
+ dd->num_netdev_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = dd->num_sdma;
- vinfo->num_rx_q = dd->num_vnic_contexts;
+ vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_rx_q = dd->num_netdev_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
rn->set_id = hfi1_vnic_set_vesw_id;
@@ -841,7 +643,6 @@
rxq->idx = i;
rxq->vinfo = vinfo;
rxq->netdev = netdev;
- netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
}
rc = hfi1_vnic_init(vinfo);