Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 902d12d..9b1fb84 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
#include "debugfs.h"
#include "fault.h"
-#define NUM_IB_PORTS 1
-
uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1074,6 +1072,8 @@
static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -1100,9 +1100,9 @@
const char *desc;
};
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@@ -4101,9 +4101,13 @@
def_access_ibp_counter(rdma_seq);
def_access_ibp_counter(unaligned);
def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
+[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
+[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
CNTR_NORMAL),
[C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
@@ -4253,6 +4257,8 @@
access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
+[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
+ hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
@@ -5114,6 +5120,7 @@
[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
access_sw_cpu_rc_acks),
[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
@@ -5222,6 +5229,17 @@
return (chip_rev_minor & 0xF0) == 0x10;
}
+/* return true is kernel urg disabled for rcd */
+bool is_urg_masked(struct hfi1_ctxtdata *rcd)
+{
+ u64 mask;
+ u32 is = IS_RCVURGENT_START + rcd->ctxt;
+ u8 bit = is % 64;
+
+ mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
+ return !(mask & BIT_ULL(bit));
+}
+
/*
* Append string s to buffer buf. Arguments curp and len are the current
* position and remaining length, respectively.
@@ -8181,7 +8199,7 @@
/**
* is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
*
* RX block receive urgent interrupt. Source is < 160.
*
@@ -8231,7 +8249,7 @@
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
-{ IS_SDMA_START, IS_SDMA_END,
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@@ -8257,7 +8275,7 @@
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
- if (source < entry->end) {
+ if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@@ -8276,7 +8294,7 @@
* context DATA IRQs are threaded and are not supported by this handler.
*
*/
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8327,7 @@
return handled;
}
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@@ -8352,7 +8370,6 @@
struct hfi1_devdata *dd = rcd->dd;
u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
- mmiowb(); /* make sure everything before is written */
write_csr(dd, addr, rcd->imask);
/* force the above write on the chip and get a value back */
(void)read_csr(dd, addr);
@@ -8401,7 +8418,7 @@
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8458,7 @@
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
@@ -9651,30 +9668,10 @@
}
}
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
- u64 qsfp_mask, cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8 * (QSFP1_INT / 64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
- cce_int_mask);
- }
+ u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9688,12 @@
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
+
+ /* Enable the appropriate QSFP IRQ source */
+ if (!dd->hfi1_id)
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+ else
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@@ -9849,6 +9852,7 @@
/* disable the port */
clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ cancel_work_sync(&ppd->freeze_work);
}
static inline int init_cpu_counters(struct hfi1_devdata *dd)
@@ -10577,12 +10581,29 @@
}
}
-/*
- * Verify if BCT for data VLs is non-zero.
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ * are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
*/
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{
- return !!ppd->actual_vls_operational;
+ int i;
+ u64 reg;
+
+ if (!ppd->actual_vls_operational)
+ return false;
+
+ for (i = 0; i < ppd->vls_supported; i++) {
+ reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+ if ((reg && !ppd->dd->vld[i].mtu) ||
+ (!reg && ppd->dd->vld[i].mtu))
+ return false;
+ }
+
+ return true;
}
/*
@@ -10695,7 +10716,8 @@
if (!data_vls_operational(ppd)) {
dd_dev_err(dd,
- "%s: data VLs not operational\n", __func__);
+ "%s: Invalid data VL credits or mtu\n",
+ __func__);
ret = -EINVAL;
break;
}
@@ -10768,13 +10790,15 @@
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10782,6 +10806,14 @@
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -11776,12 +11808,10 @@
<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
}
- mmiowb();
reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
<< RCV_HDR_HEAD_HEAD_SHIFT);
write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
- mmiowb();
}
u32 hdrqempty(struct hfi1_ctxtdata *rcd)
@@ -11932,10 +11962,16 @@
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+ }
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
+ }
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +12001,13 @@
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, true);
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, false);
+
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@@ -12913,6 +12956,39 @@
return read_state;
}
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
@@ -12964,63 +13040,71 @@
return ret;
}
-/**
- * get_int_mask - get 64 bit int mask
- * @dd - the devdata
- * @i - the csr (relative to CCE_INT_MASK)
- *
- * Returns the mask with the urgent interrupt mask
- * bit clear for kernel receive contexts.
- */
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
-{
- u64 mask = U64_MAX; /* default to no change */
-
- if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
- int j = (i - (IS_RCVURGENT_START / 64)) * 64;
- int k = !j ? IS_RCVURGENT_START % 64 : 0;
-
- if (j)
- j -= IS_RCVURGENT_START % 64;
- /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
- for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
- /* convert to bit in mask and clear */
- mask &= ~BIT_ULL(k);
- }
- return mask;
-}
-
/* ========================================================================= */
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
+ *
*/
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+ bool set)
{
- int i;
+ u64 reg;
+ u16 idx = src / BITS_PER_REGISTER;
- /*
- * In HFI, the mask needs to be 1 to allow interrupts.
- */
- if (enable) {
- /* enable all interrupts but urgent on kernel contexts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
- u64 mask = get_int_mask(dd, i);
+ spin_lock(&dd->irq_src_lock);
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+ if (set)
+ reg |= bits;
+ else
+ reg &= ~bits;
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+ spin_unlock(&dd->irq_src_lock);
+}
- write_csr(dd, CCE_INT_MASK + (8 * i), mask);
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
+ */
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
+{
+ u64 bits = 0;
+ u64 bit;
+ u16 src;
+
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+ return -EINVAL;
+
+ if (last < first)
+ return -ERANGE;
+
+ for (src = first; src <= last; src++) {
+ bit = src % BITS_PER_REGISTER;
+ /* wrapped to next register? */
+ if (!bit && bits) {
+ read_mod_write(dd, src - 1, bits, set);
+ bits = 0;
}
-
- init_qsfp_int(dd);
- } else {
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+ bits |= BIT_ULL(bit);
}
+ read_mod_write(dd, last, bits, set);
+
+ return 0;
}
/*
* Clear all interrupt sources on the chip.
*/
-static void clear_all_interrupts(struct hfi1_devdata *dd)
+void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13044,38 +13128,11 @@
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/**
- * hfi1_clean_up_interrupts() - Free all IRQ resources
- * @dd: valid device data data structure
- *
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
- */
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
-{
- int i;
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- /* remove irqs - must happen before disabling/turning off */
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, i, me->arg);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
-
- pci_free_irq_vectors(dd->pcidev);
-}
-
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@@ -13099,8 +13156,7 @@
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
- int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@@ -13109,204 +13165,16 @@
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
- int first_general, last_general;
- int first_sdma, last_sdma;
- int first_rx, last_rx;
- int i, ret = 0;
-
- /* calculate the ranges we are going to use */
- first_general = 0;
- last_general = first_general + 1;
- first_sdma = last_general;
- last_sdma = first_sdma + dd->num_sdma;
- first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
- dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
-
- /*
- * Sanity check - the code expects all SDMA chip source
- * interrupts to be in the same CSR, starting at bit 0. Verify
- * that this is true by checking the bit location of the start.
- */
- BUILD_BUG_ON(IS_SDMA_START % 64);
-
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
- const char *err_info;
- irq_handler_t handler;
- irq_handler_t thread = NULL;
- void *arg = NULL;
- int idx;
- struct hfi1_ctxtdata *rcd = NULL;
- struct sdma_engine *sde = NULL;
- char name[MAX_NAME_SIZE];
-
- /* obtain the arguments to pci_request_irq */
- if (first_general <= i && i < last_general) {
- idx = i - first_general;
- handler = general_interrupt;
- arg = dd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- me->type = IRQ_GENERAL;
- } else if (first_sdma <= i && i < last_sdma) {
- idx = i - first_sdma;
- sde = &dd->per_sdma[idx];
- handler = sdma_interrupt;
- arg = sde;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
- err_info = "sdma";
- remap_sdma_interrupts(dd, idx, i);
- me->type = IRQ_SDMA;
- } else if (first_rx <= i && i < last_rx) {
- idx = i - first_rx;
- rcd = hfi1_rcd_get_by_index_safe(dd, idx);
- if (rcd) {
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
- rcd->msix_intr = i;
- hfi1_rcd_put(rcd);
- }
- } else {
- /* not in our expected range - complain, then
- * ignore it
- */
- dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
- continue;
- }
- /* no argument, no interrupt */
- if (!arg)
- continue;
- /* make sure the name is terminated */
- name[sizeof(name) - 1] = 0;
- me->irq = pci_irq_vector(dd->pcidev, i);
- ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
- name);
- if (ret) {
- dd_dev_err(dd,
- "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
- err_info, me->irq, idx, ret);
- return ret;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
- }
-
- return ret;
-}
-
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
-{
- int i;
-
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- synchronize_irq(me->irq);
- }
-}
-
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- if (!me->arg) /* => no irq, no affinity */
- return;
-
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-
- me->arg = NULL;
-}
-
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me;
- int idx = rcd->ctxt;
- void *arg = rcd;
- int ret;
-
- rcd->msix_intr = dd->vnic.msix_idx++;
- me = &dd->msix_entries[rcd->msix_intr];
-
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- me->type = IRQ_RCVCTXT;
- me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
- remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
-
- ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
- receive_context_interrupt,
- receive_context_thread, arg,
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- if (ret) {
- dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
- me->irq, idx, ret);
- return;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret) {
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
- }
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13319,54 +13187,33 @@
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- u32 total;
- int ret, request;
-
- /*
- * Interrupt count:
- * 1 general, "slow path" interrupt (includes the SDMA engines
- * slow source, SDMACleanupDone)
- * N interrupts - one per used SDMA engine
- * M interrupt - one per kernel receive context
- * V interrupt - one for each VNIC context
- */
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* ask for MSI-X interrupts */
- request = request_msix(dd, total);
- if (request < 0) {
- ret = request;
- goto fail;
- } else {
- dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
- GFP_KERNEL);
- if (!dd->msix_entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* using MSI-X */
- dd->num_msix_entries = total;
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
- }
+ int ret;
/* mask all interrupts */
- set_intr_state(dd, 0);
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- ret = request_msix_irqs(dd);
+ /* ask for MSI-X interrupts */
+ ret = msix_initialize(dd);
if (ret)
- goto fail;
+ return ret;
- return 0;
+ ret = msix_request_irqs(dd);
+ if (ret)
+ msix_clean_up_interrupts(dd);
-fail:
- hfi1_clean_up_interrupts(dd);
return ret;
}
@@ -13388,7 +13235,7 @@
int total_contexts;
int ret;
unsigned ngroups;
- int qos_rmt_count;
+ int rmt_count;
int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
@@ -13450,10 +13297,23 @@
n_usr_ctxts = rcv_contexts - total_contexts;
}
- /* each user context requires an entry in the RMT */
- qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
- if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
+ /*
+ * The RMT entries are currently allocated as shown below:
+ * 1. QOS (0 to 128 entries);
+ * 2. FECN (num_kernel_context - 1 + num_user_contexts +
+ * num_vnic_contexts);
+ * 3. VNIC (num_vnic_contexts).
+ * It should be noted that FECN oversubscribe num_vnic_contexts
+ * entries of RMT because both VNIC and PSM could allocate any receive
+ * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
+ * and PSM FECN must reserve an RMT entry for each possible PSM receive
+ * context.
+ */
+ rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ rmt_count += num_kernel_contexts - 1;
+ if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
+ user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
dd_dev_err(dd,
"RMT size is reducing the number of user receive contexts from %u to %d\n",
n_usr_ctxts,
@@ -14174,6 +14034,19 @@
}
/**
+ * hfi1_get_qp_map
+ * @dd: device data
+ * @idx: index to read
+ */
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
+{
+ u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
+
+ reg >>= (idx % 8) * 8;
+ return reg;
+}
+
+/**
* init_qpmap_table
* @dd - device data
* @first_ctxt - first context
@@ -14434,35 +14307,43 @@
init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
}
-static void init_user_fecn_handling(struct hfi1_devdata *dd,
- struct rsm_map_table *rmt)
+static void init_fecn_handling(struct hfi1_devdata *dd,
+ struct rsm_map_table *rmt)
{
struct rsm_rule_data rrd;
u64 reg;
- int i, idx, regoff, regidx;
+ int i, idx, regoff, regidx, start;
u8 offset;
+ u32 total_cnt;
+
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ /* Exclude context 0 */
+ start = 1;
+ else
+ start = dd->first_dyn_alloc_ctxt;
+
+ total_cnt = dd->num_rcv_contexts - start;
/* there needs to be enough room in the map table */
- if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
- dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
+ if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
+ dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
return;
}
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range start...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
* There are only enough bits in offset for the table size, so
* start with that to allow for a "negative" offset.
*/
- offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_dyn_alloc_ctxt);
+ offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
- for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
- i < dd->num_rcv_contexts; i++, idx++) {
+ for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
+ i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
regidx = idx / 8;
@@ -14497,7 +14378,7 @@
/* add rule 1 */
add_rsm_rule(dd, RSM_INS_FECN, &rrd);
- rmt->used += dd->num_user_contexts;
+ rmt->used += total_cnt;
}
/* Initialize RSM for VNIC */
@@ -14573,7 +14454,7 @@
clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
}
-static void init_rxe(struct hfi1_devdata *dd)
+static int init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
u64 val;
@@ -14582,9 +14463,12 @@
write_csr(dd, RCV_ERR_MASK, ~0ull);
rmt = alloc_rsm_map_table(dd);
+ if (!rmt)
+ return -ENOMEM;
+
/* set up QOS, including the QPN map table */
init_qos(dd, rmt);
- init_user_fecn_handling(dd, rmt);
+ init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
/* record number of used rsm map entries for vnic */
dd->vnic.rmt_start = rmt->used;
@@ -14608,6 +14492,7 @@
val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
RCV_BYPASS_HDR_SIZE_SHIFT);
write_csr(dd, RCV_BYPASS, val);
+ return 0;
}
static void init_other(struct hfi1_devdata *dd)
@@ -14810,8 +14695,8 @@
*/
static int init_asic_data(struct hfi1_devdata *dd)
{
- unsigned long flags;
- struct hfi1_devdata *tmp, *peer = NULL;
+ unsigned long index;
+ struct hfi1_devdata *peer;
struct hfi1_asic_data *asic_data;
int ret = 0;
@@ -14820,14 +14705,12 @@
if (!asic_data)
return -ENOMEM;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
+ xa_lock_irq(&hfi1_dev_table);
/* Find our peer device */
- list_for_each_entry(tmp, &hfi1_dev_list, list) {
- if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
- dd->unit != tmp->unit) {
- peer = tmp;
+ xa_for_each(&hfi1_dev_table, index, peer) {
+ if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) &&
+ dd->unit != peer->unit)
break;
- }
}
if (peer) {
@@ -14839,7 +14722,7 @@
mutex_init(&dd->asic_data->asic_resource_mutex);
}
dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irq(&hfi1_dev_table);
/* first one through - set up i2c devices */
if (!peer)
@@ -14919,20 +14802,16 @@
}
/**
- * Allocate and initialize the device structure for the hfi.
+ * hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev
*
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
- *
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd;
+ struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@@ -14943,13 +14822,8 @@
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
- u32 sdma_engines;
+ u32 sdma_engines = chip_sdma_engines(dd);
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
- sizeof(struct hfi1_pportdata));
- if (IS_ERR(dd))
- goto bail;
- sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -15128,6 +15002,12 @@
if (ret)
goto bail_cleanup;
+ /*
+ * This should probably occur in hfi1_pcie_init(), but historically
+ * occurs after the do_pcie_gen3_transition() code.
+ */
+ tune_pcie_caps(dd);
+
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@@ -15150,7 +15030,10 @@
goto bail_cleanup;
/* set initial RXE CSRs */
- init_rxe(dd);
+ ret = init_rxe(dd);
+ if (ret)
+ goto bail_cleanup;
+
/* set initial TXE CSRs */
init_txe(dd);
/* set initial non-RXE, non-TXE CSRs */
@@ -15240,14 +15123,13 @@
free_cntrs(dd);
bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
- dd = ERR_PTR(ret);
bail:
- return dd;
+ return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,