Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273..26b792b 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -49,11 +49,12 @@
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/delay.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
#include <linux/bitmap.h>
+#include <linux/numa.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
@@ -72,7 +73,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
@@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
+#define NUM_IB_PORTS 1
+
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -122,7 +124,7 @@
static inline u64 encode_rcv_header_entry_size(u16 size);
-static struct idr hfi1_unit_table;
+DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static int hfi1_create_kctxt(struct hfi1_devdata *dd,
struct hfi1_pportdata *ppd)
@@ -213,12 +215,12 @@
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
- hfi1_free_ctxtdata(rcd->dd, rcd);
-
spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
rcd->dd->rcd[rcd->ctxt] = NULL;
spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+
kfree(rcd);
}
@@ -241,10 +243,13 @@
* @rcd: pointer to an initialized rcd data structure
*
* Use this to get a reference after the init.
+ *
+ * Return : reflect kref_get_unless_zero(), which returns non-zero on
+ * increment, otherwise 0.
*/
-void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
- kref_get(&rcd->kref);
+ return kref_get_unless_zero(&rcd->kref);
}
/**
@@ -324,7 +329,8 @@
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd[ctxt]) {
rcd = dd->rcd[ctxt];
- hfi1_rcd_get(rcd);
+ if (!hfi1_rcd_get(rcd))
+ rcd = NULL;
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -369,6 +375,9 @@
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
+ spin_lock_init(&rcd->exp_lock);
+ INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
+ INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@@ -460,7 +469,7 @@
if (rcd->egrbufs.size < hfi1_max_mtu) {
rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
hfi1_cdbg(PROC,
- "ctxt%u: eager bufs size too small. Adjusting to %zu\n",
+ "ctxt%u: eager bufs size too small. Adjusting to %u\n",
rcd->ctxt, rcd->egrbufs.size);
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
@@ -471,6 +480,9 @@
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
+
+ /* Initialize TID flow generations for the context */
+ hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
@@ -654,9 +666,8 @@
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) {
- hfi1_early_err(&pdev->dev,
- "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
+ dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
+ !default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001;
}
@@ -702,9 +713,7 @@
return;
bail:
-
- hfi1_early_err(&pdev->dev,
- "Congestion Control Agent disabled for port %d\n", port);
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@@ -773,6 +782,8 @@
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
@@ -794,7 +805,8 @@
ppd->hfi1_wq =
alloc_workqueue(
"hfi%d_%d",
- WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
+ WQ_MEM_RECLAIM,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
@@ -833,6 +845,23 @@
}
/**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
* @reinit: re-initializing, so don't allocate new memory
@@ -883,10 +912,10 @@
goto done;
/* allocate dummy tail memory for all receive contexts */
- dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_dma,
- GFP_KERNEL);
+ dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
+ sizeof(u64),
+ &dd->rcvhdrtail_dummy_dma,
+ GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
dd_dev_err(dd, "cannot allocate dummy tail memory\n");
@@ -911,11 +940,14 @@
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
+ if (!lastfail)
+ lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
+ /* enable IRQ */
hfi1_rcd_put(rcd);
}
@@ -954,7 +986,8 @@
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
- set_intr_state(dd, 1);
+ enable_general_intr(dd);
+ init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -986,21 +1019,9 @@
return ret;
}
-static inline struct hfi1_devdata *__hfi1_lookup(int unit)
-{
- return idr_find(&hfi1_unit_table, unit);
-}
-
struct hfi1_devdata *hfi1_lookup(int unit)
{
- struct hfi1_devdata *dd;
- unsigned long flags;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- dd = __hfi1_lookup(unit);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- return dd;
+ return xa_load(&hfi1_dev_table, unit);
}
/*
@@ -1051,9 +1072,9 @@
}
dd->flags &= ~HFI1_INITTED;
- /* mask and clean up interrupts, but not errors */
- set_intr_state(dd, 0);
- hfi1_clean_up_interrupts(dd);
+ /* mask and clean up interrupts */
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+ msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1168,7 +1189,7 @@
/*
* Release our hold on the shared asic data. If we are the last one,
* return the structure to be finalized outside the lock. Must be
- * holding hfi1_devs_lock.
+ * holding hfi1_dev_table lock.
*/
static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
{
@@ -1204,13 +1225,10 @@
struct hfi1_asic_data *ad;
unsigned long flags;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- if (!list_empty(&dd->list)) {
- idr_remove(&hfi1_unit_table, dd->unit);
- list_del_init(&dd->list);
- }
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ __xa_erase(&hfi1_dev_table, dd->unit);
ad = release_asic_data(dd);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
finalize_asic_data(dd, ad);
free_platform_config(dd);
@@ -1246,17 +1264,18 @@
kobject_put(&dd->kobj);
}
-/*
- * Allocate our primary per-unit data structure. Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
- * "extra" is for chip-specific data.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
*
- * Use the idr mechanism to get a unit number for this unit.
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
+ * "extra" is for chip-specific data.
*/
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ size_t extra)
{
- unsigned long flags;
struct hfi1_devdata *dd;
int ret, nports;
@@ -1271,24 +1290,13 @@
dd->pport = (struct hfi1_pportdata *)(dd + 1);
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
+ dd->node = NUMA_NO_NODE;
- INIT_LIST_HEAD(&dd->list);
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT);
- if (ret >= 0) {
- dd->unit = ret;
- list_add(&dd->list, &hfi1_dev_list);
- }
- dd->node = -1;
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- idr_preload_end();
-
+ ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
+ GFP_KERNEL);
if (ret < 0) {
- hfi1_early_err(&pdev->dev,
- "Could not allocate unit ID: error %d\n", -ret);
+ dev_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1317,7 @@
spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
+ spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
@@ -1474,16 +1483,17 @@
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
+ ret = opfn_init();
+ if (ret < 0) {
+ pr_err("Failed to allocate opfn_wq");
+ goto bail_dev;
+ }
+
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
- idr_init(&hfi1_unit_table);
-
hfi1_dbg_init();
- ret = hfi1_wss_init();
- if (ret < 0)
- goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,10 +1502,7 @@
goto bail; /* all OK */
bail_dev:
- hfi1_wss_exit();
-bail_wss:
hfi1_dbg_exit();
- idr_destroy(&hfi1_unit_table);
dev_cleanup();
bail:
return ret;
@@ -1509,11 +1516,11 @@
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ opfn_exit();
node_affinity_destroy_all();
- hfi1_wss_exit();
hfi1_dbg_exit();
- idr_destroy(&hfi1_unit_table);
+ WARN_ON(!xa_empty(&hfi1_dev_table));
dispose_firmware(); /* asymmetric with obtain_firmware() */
dev_cleanup();
}
@@ -1564,7 +1571,7 @@
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
- hfi1_clear_tids(rcd);
+ hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}
@@ -1604,23 +1611,23 @@
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev, "Receive header queue count too small\n");
+ dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL;
}
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL;
}
if (thecnt % HDRQ_INCREMENT) {
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
return -EINVAL;
}
@@ -1639,22 +1646,29 @@
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
goto bail;
}
+ /* Allocate the dd so we can get to work */
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ goto bail;
+ }
+
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
- hfi1_hdrq_entsize);
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+ hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@@ -1676,10 +1690,10 @@
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
- eager_buffer_size);
+ dd_dev_info(dd, "Eager buffer size %u\n",
+ eager_buffer_size);
} else {
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@@ -1687,7 +1701,7 @@
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
- ret = hfi1_pcie_init(pdev, ent);
+ ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
@@ -1695,12 +1709,9 @@
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
- dd = hfi1_init_dd(pdev, ent);
-
- if (IS_ERR(dd)) {
- ret = PTR_ERR(dd);
+ ret = hfi1_init_dd(dd);
+ if (ret)
goto clean_bail; /* error already printed */
- }
ret = create_workqueues(dd);
if (ret)
@@ -1731,7 +1742,7 @@
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1842,9 +1853,9 @@
gfp_flags = GFP_KERNEL;
else
gfp_flags = GFP_USER;
- rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
- gfp_flags | __GFP_COMP);
+ rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
+ &rcd->rcvhdrq_dma,
+ gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@@ -1855,9 +1866,10 @@
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
- rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE,
- &rcd->rcvhdrqtailaddr_dma, gfp_flags);
+ rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
+ PAGE_SIZE,
+ &rcd->rcvhdrqtailaddr_dma,
+ gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
@@ -1953,10 +1965,10 @@
while (alloced_bytes < rcd->egrbufs.size &&
rcd->egrbufs.alloced < rcd->egrbufs.count) {
rcd->egrbufs.buffers[idx].addr =
- dma_zalloc_coherent(&dd->pcidev->dev,
- rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].dma,
- gfp_flags);
+ dma_alloc_coherent(&dd->pcidev->dev,
+ rcd->egrbufs.rcvtid_size,
+ &rcd->egrbufs.buffers[idx].dma,
+ gfp_flags);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
@@ -2027,7 +2039,7 @@
rcd->egrbufs.size = alloced_bytes;
hfi1_cdbg(PROC,
- "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
+ "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n",
rcd->ctxt, rcd->egrbufs.alloced,
rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);