Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 1f2759c..a297f13 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
- depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
+ depends on INFINIBAND_VIRT_DMA
+ depends on X86_64
depends on PCI
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index a85571a..bd729aa 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -248,8 +248,8 @@
*/
if (udata && udata->outlen >= sizeof(__u64)) {
cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
- if (!cq->ip) {
- err = -ENOMEM;
+ if (IS_ERR(cq->ip)) {
+ err = PTR_ERR(cq->ip);
goto bail_wc;
}
@@ -327,7 +327,7 @@
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
- vfree(cq->queue);
+ vfree(cq->kqueue);
}
/**
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index 652f4a7..37853aa 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -154,7 +154,7 @@
* @udata: user data (must be valid!)
* @obj: opaque pointer to a cq, wq etc
*
- * Return: rvt_mmap struct on success
+ * Return: rvt_mmap struct on success, ERR_PTR on failure
*/
struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size,
struct ib_udata *udata, void *obj)
@@ -166,7 +166,7 @@
ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node);
if (!ip)
- return ip;
+ return ERR_PTR(-ENOMEM);
size = PAGE_ALIGN(size);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 0b0a241..d14ad52 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -61,6 +61,8 @@
#define RVT_RWQ_COUNT_THRESHOLD 16
static void rvt_rc_timeout(struct timer_list *t);
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type);
/*
* Convert the AETH RNR timeout code into the number of microseconds.
@@ -452,40 +454,41 @@
}
/**
- * free_all_qps - check for QPs still in use
+ * rvt_free_qp_cb - callback function to reset a qp
+ * @qp: the qp to reset
+ * @v: a 64-bit value
+ *
+ * This function resets the qp and removes it from the
+ * qp hash table.
+ */
+static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v)
+{
+ unsigned int *qp_inuse = (unsigned int *)v;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ /* Reset the qp and remove it from the qp hash list */
+ rvt_reset_qp(rdi, qp, qp->ibqp.qp_type);
+
+ /* Increment the qp_inuse count */
+ (*qp_inuse)++;
+}
+
+/**
+ * rvt_free_all_qps - check for QPs still in use
* @rdi: rvt device info structure
*
* There should not be any QPs still in use.
* Free memory for table.
+ * Return the number of QPs still in use.
*/
static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
{
- unsigned long flags;
- struct rvt_qp *qp;
- unsigned n, qp_inuse = 0;
- spinlock_t *ql; /* work around too long line below */
-
- if (rdi->driver_f.free_all_qps)
- qp_inuse = rdi->driver_f.free_all_qps(rdi);
+ unsigned int qp_inuse = 0;
qp_inuse += rvt_mcast_tree_empty(rdi);
- if (!rdi->qp_dev)
- return qp_inuse;
+ rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb);
- ql = &rdi->qp_dev->qpt_lock;
- spin_lock_irqsave(ql, flags);
- for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
- qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
- lockdep_is_held(ql));
- RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
-
- for (; qp; qp = rcu_dereference_protected(qp->next,
- lockdep_is_held(ql)))
- qp_inuse++;
- }
- spin_unlock_irqrestore(ql, flags);
- synchronize_rcu();
return qp_inuse;
}
@@ -895,21 +898,19 @@
qp->s_tail_ack_queue = 0;
qp->s_acked_ack_queue = 0;
qp->s_num_rd_atomic = 0;
- if (qp->r_rq.kwq)
- qp->r_rq.kwq->count = qp->r_rq.size;
qp->r_sge.num_sge = 0;
atomic_set(&qp->s_reserved_used, 0);
}
/**
- * rvt_reset_qp - initialize the QP state to the reset state
+ * _rvt_reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
* @type: the QP type
*
* r_lock, s_hlock, and s_lock are required to be held by the caller
*/
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- enum ib_qp_type type)
+static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
__must_hold(&qp->s_lock)
__must_hold(&qp->s_hlock)
__must_hold(&qp->r_lock)
@@ -955,6 +956,27 @@
lockdep_assert_held(&qp->s_lock);
}
+/**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @rdi: the device info
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock
+ * before calling _rvt_reset_qp().
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+{
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ _rvt_reset_qp(rdi, qp, type);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+}
+
/** rvt_free_qpn - Free a qpn from the bit map
* @qpt: QP table
* @qpn: queue pair number to free
@@ -1172,7 +1194,7 @@
err = alloc_ud_wq_attr(qp, rdi->dparms.node);
if (err) {
ret = (ERR_PTR(err));
- goto bail_driver_priv;
+ goto bail_rq_rvt;
}
err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
@@ -1220,8 +1242,8 @@
qp->ip = rvt_create_mmap_info(rdi, s, udata,
qp->r_rq.wq);
- if (!qp->ip) {
- ret = ERR_PTR(-ENOMEM);
+ if (IS_ERR(qp->ip)) {
+ ret = ERR_CAST(qp->ip);
goto bail_qpn;
}
@@ -1276,9 +1298,11 @@
rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
- rvt_free_rq(&qp->r_rq);
free_ud_wq_attr(qp);
+bail_rq_rvt:
+ rvt_free_rq(&qp->r_rq);
+
bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
@@ -1546,7 +1570,7 @@
switch (new_state) {
case IB_QPS_RESET:
if (qp->state != IB_QPS_RESET)
- rvt_reset_qp(rdi, qp, ibqp->qp_type);
+ _rvt_reset_qp(rdi, qp, ibqp->qp_type);
break;
case IB_QPS_RTR:
@@ -1695,13 +1719,7 @@
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
rvt_reset_qp(rdi, qp, ibqp->qp_type);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
wait_event(qp->wait, !atomic_read(&qp->refcount));
/* qpn is now available for use again */
@@ -2333,31 +2351,6 @@
}
/**
- * get_count - count numbers of request work queue entries
- * in circular buffer
- * @rq: data structure for request queue entry
- * @tail: tail indices of the circular buffer
- * @head: head indices of the circular buffer
- *
- * Return - total number of entries in the circular buffer
- */
-static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head)
-{
- u32 count;
-
- count = head;
-
- if (count >= rq->size)
- count = 0;
- if (count < tail)
- count += rq->size - tail;
- else
- count -= tail;
-
- return count;
-}
-
-/**
* get_rvt_head - get head indices of the circular buffer
* @rq: data structure for request queue entry
* @ip: the QP
@@ -2431,7 +2424,7 @@
if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) {
head = get_rvt_head(rq, ip);
- kwq->count = get_count(rq, tail, head);
+ kwq->count = rvt_get_rq_count(rq, head, tail);
}
if (unlikely(kwq->count == 0)) {
ret = 0;
@@ -2466,7 +2459,9 @@
* the number of remaining WQEs.
*/
if (kwq->count < srq->limit) {
- kwq->count = get_count(rq, tail, get_rvt_head(rq, ip));
+ kwq->count =
+ rvt_get_rq_count(rq,
+ get_rvt_head(rq, ip), tail);
if (kwq->count < srq->limit) {
struct ib_event ev;
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 890d7b7..2741518 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -127,9 +127,7 @@
* not atomic, which is OK, since the fuzziness is
* resolved as further ACKs go out.
*/
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
+ credits = rvt_get_rq_count(&qp->r_rq, head, tail);
}
/*
* Binary search the credit table to find the code to
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 24fef02..f547c11 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -111,8 +111,8 @@
u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
srq->ip = rvt_create_mmap_info(dev, s, udata, srq->rq.wq);
- if (!srq->ip) {
- ret = -ENOMEM;
+ if (IS_ERR(srq->ip)) {
+ ret = PTR_ERR(srq->ip);
goto bail_wq;
}
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 18da1e1..833f3f1 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -95,9 +95,7 @@
if (!rdi)
return rdi;
- rdi->ports = kcalloc(nports,
- sizeof(struct rvt_ibport **),
- GFP_KERNEL);
+ rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL);
if (!rdi->ports)
ib_dealloc_device(&rdi->ibdev);
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index d9bcfe7..0e8f1d0 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -2,8 +2,9 @@
config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
- depends on !64BIT || ARCH_DMA_ADDR_T_64BIT
+ depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
+ select CRYPTO
select CRYPTO_CRC32
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index a8c11b5..de5f3ef 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -48,6 +48,8 @@
}
+bool rxe_initialized;
+
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
@@ -116,6 +118,8 @@
rxe->attr.max_fast_reg_page_list_len = RXE_MAX_FMR_PAGE_LIST_LEN;
rxe->attr.max_pkeys = RXE_MAX_PKEYS;
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
+ addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid,
+ rxe->ndev->dev_addr);
rxe->max_ucontext = RXE_MAX_UCONTEXT;
}
@@ -157,9 +161,6 @@
rxe_init_port_param(port);
- if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)
- return -EINVAL;
-
port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,
sizeof(*port->pkey_tbl), GFP_KERNEL);
@@ -358,6 +359,7 @@
return err;
rdma_link_register(&rxe_link_ops);
+ rxe_initialized = true;
pr_info("loaded\n");
return 0;
}
@@ -369,6 +371,7 @@
rxe_net_exit();
rxe_cache_exit();
+ rxe_initialized = false;
pr_info("unloaded\n");
}
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index fb07eed..cae1b0a 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -67,6 +67,8 @@
#define RXE_ROCE_V2_SPORT (0xc000)
+extern bool rxe_initialized;
+
static inline u32 rxe_crc32(struct rxe_dev *rxe,
u32 crc, void *next, size_t len)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 116cafc..4bc8870 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -329,7 +329,7 @@
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
return COMPST_ERROR_RETRY;
@@ -463,7 +463,7 @@
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -479,7 +479,7 @@
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -725,7 +725,7 @@
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
if (pkt) {
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 48f4812..6a413d7 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -151,7 +151,7 @@
ip = kmalloc(sizeof(*ip), GFP_KERNEL);
if (!ip)
- return NULL;
+ return ERR_PTR(-ENOMEM);
size = PAGE_ALIGN(size);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index ea6a819..f885e24 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -173,7 +173,7 @@
if (IS_ERR(umem)) {
pr_warn("err %d from rxe_umem_get\n",
(int)PTR_ERR(umem));
- err = -EINVAL;
+ err = PTR_ERR(umem);
goto err1;
}
@@ -207,6 +207,7 @@
vaddr = page_address(sg_page_iter_page(&sg_iter));
if (!vaddr) {
pr_warn("null vaddr\n");
+ ib_umem_release(umem);
err = -ENOMEM;
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 5a3474f..7d1df73 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -117,10 +117,12 @@
memcpy(&fl6.daddr, daddr, sizeof(*daddr));
fl6.flowi6_proto = IPPROTO_UDP;
- if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
- recv_sockets.sk6->sk, &ndst, &fl6))) {
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
+ recv_sockets.sk6->sk, &fl6,
+ NULL);
+ if (unlikely(IS_ERR(ndst))) {
pr_err_ratelimited("no route to %pI6\n", daddr);
- goto put;
+ return NULL;
}
if (unlikely(ndst->error)) {
@@ -249,10 +251,8 @@
/* Create UDP socket */
err = udp_sock_create(net, &udp_cfg, &sock);
- if (err < 0) {
- pr_err("failed to create udp socket. err = %d\n", err);
+ if (err < 0)
return ERR_PTR(err);
- }
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;
@@ -451,6 +451,11 @@
void rxe_loopback(struct sk_buff *skb)
{
+ if (skb->protocol == htons(ETH_P_IP))
+ skb_pull(skb, sizeof(struct iphdr));
+ else
+ skb_pull(skb, sizeof(struct ipv6hdr));
+
rxe_rcv(skb);
}
@@ -653,6 +658,12 @@
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
+ if (PTR_ERR(recv_sockets.sk6) == -EAFNOSUPPORT) {
+ recv_sockets.sk6 = NULL;
+ pr_warn("IPv6 is not supported, can not create a UDPv6 socket\n");
+ return 0;
+ }
+
if (IS_ERR(recv_sockets.sk6)) {
recv_sockets.sk6 = NULL;
pr_err("Failed to create IPv6 UDP tunnel\n");
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index e2c6d1c..53166b9 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -152,7 +152,6 @@
void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
{
if (res->type == RXE_ATOMIC_MASK) {
- rxe_drop_ref(qp);
kfree_skb(res->atomic.skb);
} else if (res->type == RXE_READ_MASK) {
if (res->read.mr)
@@ -260,6 +259,7 @@
if (err) {
vfree(qp->sq.queue->buf);
kfree(qp->sq.queue);
+ qp->sq.queue = NULL;
return err;
}
@@ -313,6 +313,7 @@
if (err) {
vfree(qp->rq.queue->buf);
kfree(qp->rq.queue);
+ qp->rq.queue = NULL;
return err;
}
}
@@ -373,6 +374,11 @@
err2:
rxe_queue_cleanup(qp->sq.queue);
err1:
+ qp->pd = NULL;
+ qp->rcq = NULL;
+ qp->scq = NULL;
+ qp->srq = NULL;
+
if (srq)
rxe_drop_ref(srq);
rxe_drop_ref(scq);
@@ -592,15 +598,16 @@
int err;
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
- int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
+ int max_rd_atomic = attr->max_rd_atomic ?
+ roundup_pow_of_two(attr->max_rd_atomic) : 0;
qp->attr.max_rd_atomic = max_rd_atomic;
atomic_set(&qp->req.rd_atomic, max_rd_atomic);
}
if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- int max_dest_rd_atomic =
- __roundup_pow_of_two(attr->max_dest_rd_atomic);
+ int max_dest_rd_atomic = attr->max_dest_rd_atomic ?
+ roundup_pow_of_two(attr->max_dest_rd_atomic) : 0;
qp->attr.max_dest_rd_atomic = max_dest_rd_atomic;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index ff92704..245040c 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -45,12 +45,15 @@
if (outbuf) {
ip = rxe_create_mmap_info(rxe, buf_size, udata, buf);
- if (!ip)
+ if (IS_ERR(ip)) {
+ err = PTR_ERR(ip);
goto err1;
+ }
- err = copy_to_user(outbuf, &ip->info, sizeof(ip->info));
- if (err)
+ if (copy_to_user(outbuf, &ip->info, sizeof(ip->info))) {
+ err = -EFAULT;
goto err2;
+ }
spin_lock_bh(&rxe->pending_lock);
list_add(&ip->pending_mmaps, &rxe->pending_mmaps);
@@ -64,7 +67,7 @@
err2:
kfree(ip);
err1:
- return -EINVAL;
+ return err;
}
inline void rxe_queue_reset(struct rxe_queue *q)
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index f9a492e..369ba76 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -36,21 +36,26 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* check that QP matches packet opcode type and is in a valid state */
static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
+ unsigned int pkt_type;
+
if (unlikely(!qp->valid))
goto err1;
+ pkt_type = pkt->opcode & 0xe0;
+
switch (qp_type(qp)) {
case IB_QPT_RC:
- if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) {
+ if (unlikely(pkt_type != IB_OPCODE_RC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
break;
case IB_QPT_UC:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) {
+ if (unlikely(pkt_type != IB_OPCODE_UC)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -58,7 +63,7 @@
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) {
+ if (unlikely(pkt_type != IB_OPCODE_UD)) {
pr_warn_ratelimited("bad qp type\n");
goto err1;
}
@@ -281,6 +286,8 @@
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
+ struct sk_buff *per_qp_skb;
+ struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -298,7 +305,6 @@
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
- pkt = SKB_TO_PKT(skb);
/* validate qp for incoming packet */
err = check_type_state(rxe, pkt, qp);
@@ -309,15 +315,27 @@
if (err)
continue;
- /* if *not* the last qp in the list
- * increase the users of the skb then post to the next qp
+ /* for all but the last qp create a new clone of the
+ * skb and pass to the qp. If an error occurs in the
+ * checks for the last qp in the list we need to
+ * free the skb since it hasn't been passed on to
+ * rxe_rcv_pkt() which would free it later.
*/
- if (mce->qp_list.next != &mcg->qp_list)
- skb_get(skb);
+ if (mce->qp_list.next != &mcg->qp_list) {
+ per_qp_skb = skb_clone(skb, GFP_ATOMIC);
+ } else {
+ per_qp_skb = skb;
+ /* show we have consumed the skb */
+ skb = NULL;
+ }
- pkt->qp = qp;
+ if (unlikely(!per_qp_skb))
+ continue;
+
+ per_qp_pkt = SKB_TO_PKT(per_qp_skb);
+ per_qp_pkt->qp = qp;
rxe_add_ref(qp);
- rxe_rcv_pkt(pkt, skb);
+ rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
@@ -325,15 +343,20 @@
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
err1:
+ /* free skb if not consumed */
kfree_skb(skb);
}
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
const struct ib_gid_attr *gid_attr;
union ib_gid dgid;
union ib_gid *pdgid;
+ if (pkt->mask & RXE_LOOPBACK_MASK)
+ return 0;
+
if (skb->protocol == htons(ETH_P_IP)) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
(struct in6_addr *)&dgid);
@@ -366,7 +389,7 @@
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
- if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
+ if (rxe_match_dgid(rxe, skb) < 0) {
pr_warn_ratelimited("failed matching dgid\n");
goto drop;
}
@@ -389,7 +412,7 @@
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
+ payload_size(pkt) + bth_pad(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index c5d9b55..a4d6e0b 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -500,6 +500,12 @@
if (err)
return err;
}
+ if (bth_pad(pkt)) {
+ u8 *pad = payload_addr(pkt) + paylen;
+
+ memset(pad, 0, bth_pad(pkt));
+ crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
+ }
}
p = payload_addr(pkt) + paylen + bth_pad(pkt);
@@ -658,7 +664,8 @@
}
if (unlikely(qp_type(qp) == IB_QPT_RC &&
- qp->req.psn > (qp->comp.psn + RXE_MAX_UNACKED_PSNS))) {
+ psn_compare(qp->req.psn, (qp->comp.psn +
+ RXE_MAX_UNACKED_PSNS)) > 0)) {
qp->req.wait_psn = 1;
goto exit;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 1cbfbd9..186152b 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -732,6 +732,13 @@
if (err)
pr_err("Failed copying memory\n");
+ if (bth_pad(&ack_pkt)) {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u8 *pad = payload_addr(&ack_pkt) + payload;
+
+ memset(pad, 0, bth_pad(&ack_pkt));
+ icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
+ }
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
@@ -986,8 +993,6 @@
goto out;
}
- rxe_add_ref(qp);
-
res = &qp->resp.resources[qp->resp.res_head];
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index ccda5f5..2af31d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -61,6 +61,11 @@
struct net_device *ndev;
struct rxe_dev *exists;
+ if (!rxe_initialized) {
+ pr_err("Module parameters are not supported, use rdma link add or rxe_cfg\n");
+ return -EAGAIN;
+ }
+
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
pr_err("add: invalid interface name\n");
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 623129f..d1fe57a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -679,6 +679,7 @@
unsigned int mask;
unsigned int length = 0;
int i;
+ struct ib_send_wr *next;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
@@ -695,6 +696,8 @@
break;
}
+ next = wr->next;
+
length = 0;
for (i = 0; i < wr->num_sge; i++)
length += wr->sg_list[i].length;
@@ -705,7 +708,7 @@
*bad_wr = wr;
break;
}
- wr = wr->next;
+ wr = next;
}
rxe_run_task(&qp->req.task, 1);
@@ -1075,7 +1078,7 @@
struct rxe_dev *rxe =
rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
- return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1));
}
static DEVICE_ATTR_RO(parent);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5c4b223..b0a02d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -407,7 +407,7 @@
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock; /* guard mmap_offset */
- int mmap_offset;
+ u64 mmap_offset;
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index b622fc6..3450ba5 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -1,6 +1,7 @@
config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
depends on INET && INFINIBAND && LIBCRC32C
+ depends on INFINIBAND_VIRT_DMA
select DMA_VIRT_OPS
help
This driver implements the iWARP RDMA transport over
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index dba4535..4d8bc99 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -667,7 +667,7 @@
{
struct siw_sqe *orq_e = orq_get_tail(qp);
- if (orq_e && READ_ONCE(orq_e->flags) == 0)
+ if (READ_ONCE(orq_e->flags) == 0)
return orq_e;
return NULL;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 8c1931a..e3bac1a 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1225,10 +1225,9 @@
read_lock(&sk->sk_callback_lock);
cep = sk_to_cep(sk);
- if (!cep) {
- WARN_ON(1);
+ if (!cep)
goto out;
- }
+
siw_dbg_cep(cep, "state: %d\n", cep->state);
switch (cep->state) {
@@ -1784,14 +1783,23 @@
return 0;
}
-static int siw_listen_address(struct iw_cm_id *id, int backlog,
- struct sockaddr *laddr, int addr_family)
+/*
+ * siw_create_listen - Create resources for a listener's IWCM ID @id
+ *
+ * Starts listen on the socket address id->local_addr.
+ *
+ */
+int siw_create_listen(struct iw_cm_id *id, int backlog)
{
struct socket *s;
struct siw_cep *cep = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
+ int addr_family = id->local_addr.ss_family;
int rv = 0, s_val;
+ if (addr_family != AF_INET && addr_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
if (rv < 0)
return rv;
@@ -1806,9 +1814,25 @@
siw_dbg(id->device, "setsockopt error: %d\n", rv);
goto error;
}
- rv = s->ops->bind(s, laddr, addr_family == AF_INET ?
- sizeof(struct sockaddr_in) :
- sizeof(struct sockaddr_in6));
+ if (addr_family == AF_INET) {
+ struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
+
+ /* For wildcard addr, limit binding to current device only */
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
+ s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+
+ rv = s->ops->bind(s, (struct sockaddr *)laddr,
+ sizeof(struct sockaddr_in));
+ } else {
+ struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr);
+
+ /* For wildcard addr, limit binding to current device only */
+ if (ipv6_addr_any(&laddr->sin6_addr))
+ s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+
+ rv = s->ops->bind(s, (struct sockaddr *)laddr,
+ sizeof(struct sockaddr_in6));
+ }
if (rv) {
siw_dbg(id->device, "socket bind error: %d\n", rv);
goto error;
@@ -1867,14 +1891,7 @@
list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
cep->state = SIW_EPSTATE_LISTENING;
- if (addr_family == AF_INET)
- siw_dbg(id->device, "Listen at laddr %pI4 %u\n",
- &(((struct sockaddr_in *)laddr)->sin_addr),
- ((struct sockaddr_in *)laddr)->sin_port);
- else
- siw_dbg(id->device, "Listen at laddr %pI6 %u\n",
- &(((struct sockaddr_in6 *)laddr)->sin6_addr),
- ((struct sockaddr_in6 *)laddr)->sin6_port);
+ siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr);
return 0;
@@ -1932,114 +1949,6 @@
}
}
-/*
- * siw_create_listen - Create resources for a listener's IWCM ID @id
- *
- * Listens on the socket addresses id->local_addr and id->remote_addr.
- *
- * If the listener's @id provides a specific local IP address, at most one
- * listening socket is created and associated with @id.
- *
- * If the listener's @id provides the wildcard (zero) local IP address,
- * a separate listen is performed for each local IP address of the device
- * by creating a listening socket and binding to that local IP address.
- *
- */
-int siw_create_listen(struct iw_cm_id *id, int backlog)
-{
- struct net_device *dev = to_siw_dev(id->device)->netdev;
- int rv = 0, listeners = 0;
-
- siw_dbg(id->device, "backlog %d\n", backlog);
-
- /*
- * For each attached address of the interface, create a
- * listening socket, if id->local_addr is the wildcard
- * IP address or matches the IP address.
- */
- if (id->local_addr.ss_family == AF_INET) {
- struct in_device *in_dev = in_dev_get(dev);
- struct sockaddr_in s_laddr, *s_raddr;
- const struct in_ifaddr *ifa;
-
- if (!in_dev) {
- rv = -ENODEV;
- goto out;
- }
- memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr));
- s_raddr = (struct sockaddr_in *)&id->remote_addr;
-
- siw_dbg(id->device,
- "laddr %pI4:%d, raddr %pI4:%d\n",
- &s_laddr.sin_addr, ntohs(s_laddr.sin_port),
- &s_raddr->sin_addr, ntohs(s_raddr->sin_port));
-
- rtnl_lock();
- in_dev_for_each_ifa_rtnl(ifa, in_dev) {
- if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) ||
- s_laddr.sin_addr.s_addr == ifa->ifa_address) {
- s_laddr.sin_addr.s_addr = ifa->ifa_address;
-
- rv = siw_listen_address(id, backlog,
- (struct sockaddr *)&s_laddr,
- AF_INET);
- if (!rv)
- listeners++;
- }
- }
- rtnl_unlock();
- in_dev_put(in_dev);
- } else if (id->local_addr.ss_family == AF_INET6) {
- struct inet6_dev *in6_dev = in6_dev_get(dev);
- struct inet6_ifaddr *ifp;
- struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr),
- *s_raddr = &to_sockaddr_in6(id->remote_addr);
-
- if (!in6_dev) {
- rv = -ENODEV;
- goto out;
- }
- siw_dbg(id->device,
- "laddr %pI6:%d, raddr %pI6:%d\n",
- &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port),
- &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port));
-
- rtnl_lock();
- list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
- if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
- continue;
- if (ipv6_addr_any(&s_laddr->sin6_addr) ||
- ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) {
- struct sockaddr_in6 bind_addr = {
- .sin6_family = AF_INET6,
- .sin6_port = s_laddr->sin6_port,
- .sin6_flowinfo = 0,
- .sin6_addr = ifp->addr,
- .sin6_scope_id = dev->ifindex };
-
- rv = siw_listen_address(id, backlog,
- (struct sockaddr *)&bind_addr,
- AF_INET6);
- if (!rv)
- listeners++;
- }
- }
- rtnl_unlock();
- in6_dev_put(in6_dev);
- } else {
- rv = -EAFNOSUPPORT;
- }
-out:
- if (listeners)
- rv = 0;
- else if (!rv)
- rv = -EINVAL;
-
- siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK");
-
- return rv;
-}
-
int siw_destroy_listen(struct iw_cm_id *id)
{
if (!id->provider_data) {
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 05a92f9..dbbf8c6 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -66,12 +66,13 @@
static int dev_id = 1;
int rv;
+ sdev->vendor_part_id = dev_id++;
+
rv = ib_register_device(base_dev, name);
if (rv) {
pr_warn("siw: device registration error %d\n", rv);
return rv;
}
- sdev->vendor_part_id = dev_id++;
siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr);
@@ -133,7 +134,7 @@
static int siw_init_cpulist(void)
{
- int i, num_nodes = num_possible_nodes();
+ int i, num_nodes = nr_node_ids;
memset(siw_tx_thread, 0, sizeof(siw_tx_thread));
@@ -248,24 +249,6 @@
return NULL;
}
-static void siw_verbs_sq_flush(struct ib_qp *base_qp)
-{
- struct siw_qp *qp = to_siw_qp(base_qp);
-
- down_write(&qp->state_lock);
- siw_sq_flush(qp);
- up_write(&qp->state_lock);
-}
-
-static void siw_verbs_rq_flush(struct ib_qp *base_qp)
-{
- struct siw_qp *qp = to_siw_qp(base_qp);
-
- down_write(&qp->state_lock);
- siw_rq_flush(qp);
- up_write(&qp->state_lock);
-}
-
static const struct ib_device_ops siw_device_ops = {
.owner = THIS_MODULE,
.uverbs_abi_ver = SIW_ABI_VERSION,
@@ -284,8 +267,6 @@
.destroy_cq = siw_destroy_cq,
.destroy_qp = siw_destroy_qp,
.destroy_srq = siw_destroy_srq,
- .drain_rq = siw_verbs_rq_flush,
- .drain_sq = siw_verbs_sq_flush,
.get_dma_mr = siw_get_dma_mr,
.get_port_immutable = siw_get_port_immutable,
.iw_accept = siw_accept,
@@ -399,6 +380,9 @@
base_dev->dev.dma_ops = &dma_virt_ops;
base_dev->num_comp_vectors = num_possible_cpus();
+ xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
+
ib_set_device_ops(base_dev, &siw_device_ops);
rv = ib_device_set_netdev(base_dev, netdev, 1);
if (rv)
@@ -426,9 +410,6 @@
sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR;
sdev->attrs.max_srq_sge = SIW_MAX_SGE;
- xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
- xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1);
-
INIT_LIST_HEAD(&sdev->cep_list);
INIT_LIST_HEAD(&sdev->qp_list);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index e99983f..8bffa6e 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -106,8 +106,6 @@
mem->perms = rights & IWARP_ACCESS_MASK;
kref_init(&mem->ref);
- mr->mem = mem;
-
get_random_bytes(&next, 4);
next &= 0x00ffffff;
@@ -116,6 +114,8 @@
kfree(mem);
return -ENOMEM;
}
+
+ mr->mem = mem;
/* Set the STag index part */
mem->stag = id << 8;
mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index b431748..5927ac5 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -199,26 +199,26 @@
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
{
- irq_size = roundup_pow_of_two(irq_size);
- orq_size = roundup_pow_of_two(orq_size);
-
+ if (irq_size) {
+ irq_size = roundup_pow_of_two(irq_size);
+ qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
+ if (!qp->irq) {
+ qp->attrs.irq_size = 0;
+ return -ENOMEM;
+ }
+ }
+ if (orq_size) {
+ orq_size = roundup_pow_of_two(orq_size);
+ qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
+ if (!qp->orq) {
+ qp->attrs.orq_size = 0;
+ qp->attrs.irq_size = 0;
+ vfree(qp->irq);
+ return -ENOMEM;
+ }
+ }
qp->attrs.irq_size = irq_size;
qp->attrs.orq_size = orq_size;
-
- qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
- if (!qp->irq) {
- siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
- qp->attrs.irq_size = 0;
- return -ENOMEM;
- }
- qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
- if (!qp->orq) {
- siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
- qp->attrs.orq_size = 0;
- qp->attrs.irq_size = 0;
- vfree(qp->irq);
- return -ENOMEM;
- }
siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
return 0;
}
@@ -288,13 +288,14 @@
if (ctrl & MPA_V2_RDMA_WRITE_RTR)
wqe->sqe.opcode = SIW_OP_WRITE;
else if (ctrl & MPA_V2_RDMA_READ_RTR) {
- struct siw_sqe *rreq;
+ struct siw_sqe *rreq = NULL;
wqe->sqe.opcode = SIW_OP_READ;
spin_lock(&qp->orq_lock);
- rreq = orq_get_free(qp);
+ if (qp->attrs.orq_size)
+ rreq = orq_get_free(qp);
if (rreq) {
siw_read_to_orq(rreq, &wqe->sqe);
qp->orq_put++;
@@ -877,6 +878,96 @@
rreq->num_sge = 1;
}
+static int siw_activate_tx_from_sq(struct siw_qp *qp)
+{
+ struct siw_sqe *sqe;
+ struct siw_wqe *wqe = tx_wqe(qp);
+ int rv = 1;
+
+ sqe = sq_get_next(qp);
+ if (!sqe)
+ return 0;
+
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
+
+ /* First copy SQE to kernel private memory */
+ memcpy(&wqe->sqe, sqe, sizeof(*sqe));
+
+ if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
+ rv = -EINVAL;
+ goto out;
+ }
+ if (wqe->sqe.flags & SIW_WQE_INLINE) {
+ if (wqe->sqe.opcode != SIW_OP_SEND &&
+ wqe->sqe.opcode != SIW_OP_WRITE) {
+ rv = -EINVAL;
+ goto out;
+ }
+ if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
+ wqe->sqe.sge[0].lkey = 0;
+ wqe->sqe.num_sge = 1;
+ }
+ if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
+ /* A READ cannot be fenced */
+ if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode ==
+ SIW_OP_READ_LOCAL_INV)) {
+ siw_dbg_qp(qp, "cannot fence read\n");
+ rv = -EINVAL;
+ goto out;
+ }
+ spin_lock(&qp->orq_lock);
+
+ if (qp->attrs.orq_size && !siw_orq_empty(qp)) {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
+
+ } else if (wqe->sqe.opcode == SIW_OP_READ ||
+ wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
+ struct siw_sqe *rreq;
+
+ if (unlikely(!qp->attrs.orq_size)) {
+ /* We negotiated not to send READ req's */
+ rv = -EINVAL;
+ goto out;
+ }
+ wqe->sqe.num_sge = 1;
+
+ spin_lock(&qp->orq_lock);
+
+ rreq = orq_get_free(qp);
+ if (rreq) {
+ /*
+ * Make an immediate copy in ORQ to be ready
+ * to process loopback READ reply
+ */
+ siw_read_to_orq(rreq, &wqe->sqe);
+ qp->orq_put++;
+ } else {
+ qp->tx_ctx.orq_fence = 1;
+ rv = 0;
+ }
+ spin_unlock(&qp->orq_lock);
+ }
+
+ /* Clear SQE, can be re-used by application */
+ smp_store_mb(sqe->flags, 0);
+ qp->sq_get++;
+out:
+ if (unlikely(rv < 0)) {
+ siw_dbg_qp(qp, "error %d\n", rv);
+ wqe->wr_status = SIW_WR_IDLE;
+ }
+ return rv;
+}
+
/*
* Must be called with SQ locked.
* To avoid complete SQ starvation by constant inbound READ requests,
@@ -885,133 +976,55 @@
*/
int siw_activate_tx(struct siw_qp *qp)
{
- struct siw_sqe *irqe, *sqe;
+ struct siw_sqe *irqe;
struct siw_wqe *wqe = tx_wqe(qp);
- int rv = 1;
+
+ if (!qp->attrs.irq_size)
+ return siw_activate_tx_from_sq(qp);
irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
- if (irqe->flags & SIW_WQE_VALID) {
- sqe = sq_get_next(qp);
+ if (!(irqe->flags & SIW_WQE_VALID))
+ return siw_activate_tx_from_sq(qp);
- /*
- * Avoid local WQE processing starvation in case
- * of constant inbound READ request stream
- */
- if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
- qp->irq_burst = 0;
- goto skip_irq;
- }
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
-
- /* start READ RESPONSE */
- wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
- wqe->sqe.flags = 0;
- if (irqe->num_sge) {
- wqe->sqe.num_sge = 1;
- wqe->sqe.sge[0].length = irqe->sge[0].length;
- wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
- wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
- } else {
- wqe->sqe.num_sge = 0;
- }
-
- /* Retain original RREQ's message sequence number for
- * potential error reporting cases.
- */
- wqe->sqe.sge[1].length = irqe->sge[1].length;
-
- wqe->sqe.rkey = irqe->rkey;
- wqe->sqe.raddr = irqe->raddr;
-
- wqe->processed = 0;
- qp->irq_get++;
-
- /* mark current IRQ entry free */
- smp_store_mb(irqe->flags, 0);
-
- goto out;
+ /*
+ * Avoid local WQE processing starvation in case
+ * of constant inbound READ request stream
+ */
+ if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
+ qp->irq_burst = 0;
+ return siw_activate_tx_from_sq(qp);
}
- sqe = sq_get_next(qp);
- if (sqe) {
-skip_irq:
- memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
- wqe->wr_status = SIW_WR_QUEUED;
+ memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
+ wqe->wr_status = SIW_WR_QUEUED;
- /* First copy SQE to kernel private memory */
- memcpy(&wqe->sqe, sqe, sizeof(*sqe));
-
- if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.flags & SIW_WQE_INLINE) {
- if (wqe->sqe.opcode != SIW_OP_SEND &&
- wqe->sqe.opcode != SIW_OP_WRITE) {
- rv = -EINVAL;
- goto out;
- }
- if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
- rv = -EINVAL;
- goto out;
- }
- wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
- wqe->sqe.sge[0].lkey = 0;
- wqe->sqe.num_sge = 1;
- }
- if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
- /* A READ cannot be fenced */
- if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode ==
- SIW_OP_READ_LOCAL_INV)) {
- siw_dbg_qp(qp, "cannot fence read\n");
- rv = -EINVAL;
- goto out;
- }
- spin_lock(&qp->orq_lock);
-
- if (!siw_orq_empty(qp)) {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
-
- } else if (wqe->sqe.opcode == SIW_OP_READ ||
- wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
- struct siw_sqe *rreq;
-
- wqe->sqe.num_sge = 1;
-
- spin_lock(&qp->orq_lock);
-
- rreq = orq_get_free(qp);
- if (rreq) {
- /*
- * Make an immediate copy in ORQ to be ready
- * to process loopback READ reply
- */
- siw_read_to_orq(rreq, &wqe->sqe);
- qp->orq_put++;
- } else {
- qp->tx_ctx.orq_fence = 1;
- rv = 0;
- }
- spin_unlock(&qp->orq_lock);
- }
-
- /* Clear SQE, can be re-used by application */
- smp_store_mb(sqe->flags, 0);
- qp->sq_get++;
+ /* start READ RESPONSE */
+ wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
+ wqe->sqe.flags = 0;
+ if (irqe->num_sge) {
+ wqe->sqe.num_sge = 1;
+ wqe->sqe.sge[0].length = irqe->sge[0].length;
+ wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
+ wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
} else {
- rv = 0;
+ wqe->sqe.num_sge = 0;
}
-out:
- if (unlikely(rv < 0)) {
- siw_dbg_qp(qp, "error %d\n", rv);
- wqe->wr_status = SIW_WR_IDLE;
- }
- return rv;
+
+ /* Retain original RREQ's message sequence number for
+ * potential error reporting cases.
+ */
+ wqe->sqe.sge[1].length = irqe->sge[1].length;
+
+ wqe->sqe.rkey = irqe->rkey;
+ wqe->sqe.raddr = irqe->raddr;
+
+ wqe->processed = 0;
+ qp->irq_get++;
+
+ /* mark current IRQ entry free */
+ smp_store_mb(irqe->flags, 0);
+
+ return 1;
}
/*
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index c0a8872..c7c38f7 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -139,7 +139,8 @@
break;
bytes = min(bytes, len);
- if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
+ if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) ==
+ bytes) {
copied += bytes;
offset += bytes;
len -= bytes;
@@ -679,6 +680,10 @@
}
spin_lock_irqsave(&qp->sq_lock, flags);
+ if (unlikely(!qp->attrs.irq_size)) {
+ run_sq = 0;
+ goto error_irq;
+ }
if (tx_work->wr_status == SIW_WR_IDLE) {
/*
* immediately schedule READ response w/o
@@ -711,8 +716,9 @@
/* RRESP now valid as current TX wqe or placed into IRQ */
smp_store_mb(resp->flags, SIW_WQE_VALID);
} else {
- pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
- qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
+error_irq:
+ pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
+ qp_id(qp), qp->attrs.irq_size);
siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
RDMAP_ETYPE_REMOTE_OPERATION,
@@ -739,6 +745,9 @@
struct siw_sqe *orqe;
struct siw_wqe *wqe = NULL;
+ if (unlikely(!qp->attrs.orq_size))
+ return -EPROTO;
+
/* make sure ORQ indices are current */
smp_mb();
@@ -795,8 +804,8 @@
*/
rv = siw_orqe_start_rx(qp);
if (rv) {
- pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
- qp_id(qp), qp->orq_get % qp->attrs.orq_size);
+ pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
+ qp_id(qp), qp->attrs.orq_size);
goto error_term;
}
rv = siw_rresp_check_ntoh(srx, frx);
@@ -1289,11 +1298,13 @@
wc_status);
siw_wqe_put_mem(wqe, SIW_OP_READ);
- if (!error)
+ if (!error) {
rv = siw_check_tx_fence(qp);
- else
- /* Disable current ORQ eleement */
- WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ } else {
+ /* Disable current ORQ element */
+ if (qp->attrs.orq_size)
+ WRITE_ONCE(orq_get_current(qp)->flags, 0);
+ }
break;
case RDMAP_RDMA_READ_REQ:
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 5d97bba..424918e 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -920,20 +920,27 @@
{
struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr;
struct siw_device *sdev = to_siw_dev(pd->device);
- struct siw_mem *mem = siw_mem_id2obj(sdev, sqe->rkey >> 8);
+ struct siw_mem *mem;
int rv = 0;
siw_dbg_pd(pd, "STag 0x%08x\n", sqe->rkey);
- if (unlikely(!mem || !base_mr)) {
+ if (unlikely(!base_mr)) {
pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey);
return -EINVAL;
}
+
if (unlikely(base_mr->rkey >> 8 != sqe->rkey >> 8)) {
pr_warn("siw: fastreg: STag 0x%08x: bad MR\n", sqe->rkey);
- rv = -EINVAL;
- goto out;
+ return -EINVAL;
}
+
+ mem = siw_mem_id2obj(sdev, sqe->rkey >> 8);
+ if (unlikely(!mem)) {
+ pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey);
+ return -EINVAL;
+ }
+
if (unlikely(mem->pd != pd)) {
pr_warn("siw: fastreg: PD mismatch\n");
rv = -EINVAL;
@@ -1100,8 +1107,8 @@
/*
* RREQ may have already been completed by inbound RRESP!
*/
- if (tx_type == SIW_OP_READ ||
- tx_type == SIW_OP_READ_LOCAL_INV) {
+ if ((tx_type == SIW_OP_READ ||
+ tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) {
/* Cleanup pending entry in ORQ */
qp->orq_put--;
qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index b18a677..b9ca54e 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -314,7 +314,6 @@
struct siw_ucontext *uctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
- struct siw_cq *scq = NULL, *rcq = NULL;
unsigned long flags;
int num_sqe, num_rqe, rv = 0;
@@ -353,10 +352,8 @@
rv = -EINVAL;
goto err_out;
}
- scq = to_siw_cq(attrs->send_cq);
- rcq = to_siw_cq(attrs->recv_cq);
- if (!scq || (!rcq && !attrs->srq)) {
+ if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
rv = -EINVAL;
goto err_out;
@@ -387,13 +384,23 @@
if (rv)
goto err_out;
+ num_sqe = attrs->cap.max_send_wr;
+ num_rqe = attrs->cap.max_recv_wr;
+
/* All queue indices are derived from modulo operations
* on a free running 'get' (consumer) and 'put' (producer)
* unsigned counter. Having queue sizes at power of two
* avoids handling counter wrap around.
*/
- num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
- num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (num_sqe)
+ num_sqe = roundup_pow_of_two(num_sqe);
+ else {
+ /* Zero sized SQ is not supported */
+ rv = -EINVAL;
+ goto err_out_xa;
+ }
+ if (num_rqe)
+ num_rqe = roundup_pow_of_two(num_rqe);
if (qp->kernel_verbs)
qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe));
@@ -401,7 +408,6 @@
qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe));
if (qp->sendq == NULL) {
- siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -414,8 +420,8 @@
}
}
qp->pd = pd;
- qp->scq = scq;
- qp->rcq = rcq;
+ qp->scq = to_siw_cq(attrs->send_cq);
+ qp->rcq = to_siw_cq(attrs->recv_cq);
if (attrs->srq) {
/*
@@ -434,7 +440,6 @@
vmalloc_user(num_rqe * sizeof(struct siw_rqe));
if (qp->recvq == NULL) {
- siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe);
rv = -ENOMEM;
goto err_out_xa;
}
@@ -685,6 +690,47 @@
return bytes;
}
+/* Complete SQ WR's without processing */
+static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct siw_sqe sqe = {};
+ int rv = 0;
+
+ while (wr) {
+ sqe.id = wr->wr_id;
+ sqe.opcode = wr->opcode;
+ rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ if (rv) {
+ if (bad_wr)
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return rv;
+}
+
+/* Complete RQ WR's without processing */
+static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct siw_rqe rqe = {};
+ int rv = 0;
+
+ while (wr) {
+ rqe.id = wr->wr_id;
+ rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR);
+ if (rv) {
+ if (bad_wr)
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return rv;
+}
+
/*
* siw_post_send()
*
@@ -703,26 +749,54 @@
unsigned long flags;
int rv = 0;
+ if (wr && !qp->kernel_verbs) {
+ siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
/*
* Try to acquire QP state lock. Must be non-blocking
* to accommodate kernel clients needs.
*/
if (!down_read_trylock(&qp->state_lock)) {
- *bad_wr = wr;
- siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state);
- return -ENOTCONN;
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * ERROR state is final, so we can be sure
+ * this state will not change as long as the QP
+ * exists.
+ *
+ * This handles an ib_drain_sq() call with
+ * a concurrent request to set the QP state
+ * to ERROR.
+ */
+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP locked, state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
+ return rv;
}
if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) {
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * Immediately flush this WR to CQ, if QP
+ * is in ERROR state. SQ is guaranteed to
+ * be empty, so WR complets in-order.
+ *
+ * Typically triggered by ib_drain_sq().
+ */
+ rv = siw_sq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP out of state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
up_read(&qp->state_lock);
- *bad_wr = wr;
- siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state);
- return -ENOTCONN;
- }
- if (wr && !qp->kernel_verbs) {
- siw_dbg_qp(qp, "wr must be empty for user mapped sq\n");
- up_read(&qp->state_lock);
- *bad_wr = wr;
- return -EINVAL;
+ return rv;
}
spin_lock_irqsave(&qp->sq_lock, flags);
@@ -913,28 +987,58 @@
unsigned long flags;
int rv = 0;
- if (qp->srq) {
+ if (qp->srq || qp->attrs.rq_size == 0) {
*bad_wr = wr;
- return -EOPNOTSUPP; /* what else from errno.h? */
+ return -EINVAL;
}
+ if (!qp->kernel_verbs) {
+ siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
/*
* Try to acquire QP state lock. Must be non-blocking
* to accommodate kernel clients needs.
*/
if (!down_read_trylock(&qp->state_lock)) {
- *bad_wr = wr;
- return -ENOTCONN;
- }
- if (!qp->kernel_verbs) {
- siw_dbg_qp(qp, "no kernel post_recv for user mapped sq\n");
- up_read(&qp->state_lock);
- *bad_wr = wr;
- return -EINVAL;
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * ERROR state is final, so we can be sure
+ * this state will not change as long as the QP
+ * exists.
+ *
+ * This handles an ib_drain_rq() call with
+ * a concurrent request to set the QP state
+ * to ERROR.
+ */
+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP locked, state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
+ return rv;
}
if (qp->attrs.state > SIW_QP_STATE_RTS) {
+ if (qp->attrs.state == SIW_QP_STATE_ERROR) {
+ /*
+ * Immediately flush this WR to CQ, if QP
+ * is in ERROR state. RQ is guaranteed to
+ * be empty, so WR complets in-order.
+ *
+ * Typically triggered by ib_drain_rq().
+ */
+ rv = siw_rq_flush_wr(qp, wr, bad_wr);
+ } else {
+ siw_dbg_qp(qp, "QP out of state %d\n",
+ qp->attrs.state);
+ *bad_wr = wr;
+ rv = -ENOTCONN;
+ }
up_read(&qp->state_lock);
- *bad_wr = wr;
- return -EINVAL;
+ return rv;
}
/*
* Serialize potentially multiple producers.