Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index e0522a5..b49e8d4 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config INFINIBAND_CXGB4
 	tristate "Chelsio T4/T5 RDMA Driver"
 	depends on CHELSIO_T4 && INET
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index 9edd920..291d259 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,5 +1,6 @@
-ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
-ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb
 
 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
 
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f83cbe..347dc24 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -331,20 +331,23 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ep->com.dev->lock, flags);
-	_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
-	if (idr_is_empty(&ep->com.dev->hwtid_idr))
+	xa_lock_irqsave(&ep->com.dev->hwtids, flags);
+	__xa_erase(&ep->com.dev->hwtids, ep->hwtid);
+	if (xa_empty(&ep->com.dev->hwtids))
 		wake_up(&ep->com.dev->wait);
-	spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+	xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 }
 
-static void insert_ep_tid(struct c4iw_ep *ep)
+static int insert_ep_tid(struct c4iw_ep *ep)
 {
 	unsigned long flags;
+	int err;
 
-	spin_lock_irqsave(&ep->com.dev->lock, flags);
-	_insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0);
-	spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+	xa_lock_irqsave(&ep->com.dev->hwtids, flags);
+	err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
+	xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
+
+	return err;
 }
 
 /*
@@ -355,11 +358,11 @@
 	struct c4iw_ep *ep;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->lock, flags);
-	ep = idr_find(&dev->hwtid_idr, tid);
+	xa_lock_irqsave(&dev->hwtids, flags);
+	ep = xa_load(&dev->hwtids, tid);
 	if (ep)
 		c4iw_get_ep(&ep->com);
-	spin_unlock_irqrestore(&dev->lock, flags);
+	xa_unlock_irqrestore(&dev->hwtids, flags);
 	return ep;
 }
 
@@ -372,11 +375,11 @@
 	struct c4iw_listen_ep *ep;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->lock, flags);
-	ep = idr_find(&dev->stid_idr, stid);
+	xa_lock_irqsave(&dev->stids, flags);
+	ep = xa_load(&dev->stids, stid);
 	if (ep)
 		c4iw_get_ep(&ep->com);
-	spin_unlock_irqrestore(&dev->lock, flags);
+	xa_unlock_irqrestore(&dev->stids, flags);
 	return ep;
 }
 
@@ -403,8 +406,7 @@
 				 ep->com.local_addr.ss_family);
 		dst_release(ep->dst);
 		cxgb4_l2t_release(ep->l2t);
-		if (ep->mpa_skb)
-			kfree_skb(ep->mpa_skb);
+		kfree_skb(ep->mpa_skb);
 	}
 	if (!skb_queue_empty(&ep->com.ep_skb_list))
 		skb_queue_purge(&ep->com.ep_skb_list);
@@ -458,6 +460,8 @@
 		skb_reset_transport_header(skb);
 	} else {
 		skb = alloc_skb(len, gfp);
+		if (!skb)
+			return NULL;
 	}
 	t4_set_arp_err_handler(skb, NULL, NULL);
 	return skb;
@@ -491,7 +495,6 @@
 
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	release_ep_resources(ep);
-	kfree_skb(skb);
 	return 0;
 }
 
@@ -502,7 +505,6 @@
 	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 	c4iw_put_ep(&ep->parent_ep->com);
 	release_ep_resources(ep);
-	kfree_skb(skb);
 	return 0;
 }
 
@@ -556,7 +558,7 @@
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
-	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+	xa_erase_irq(&ep->com.dev->atids, ep->atid);
 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 	queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 }
@@ -656,7 +658,33 @@
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
-static int send_abort(struct c4iw_ep *ep)
+static void read_tcb(struct c4iw_ep *ep)
+{
+	struct sk_buff *skb;
+	struct cpl_get_tcb *req;
+	int wrlen = roundup(sizeof(*req), 16);
+
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (WARN_ON(!skb))
+		return;
+
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
+	req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
+	memset(req, 0, wrlen);
+	INIT_TP_WR(req, ep->hwtid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
+	req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
+
+	/*
+	 * keep a ref on the ep so the tcb is not unlocked before this
+	 * cpl completes. The ref is released in read_tcb_rpl().
+	 */
+	c4iw_get_ep(&ep->com);
+	if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
+		c4iw_put_ep(&ep->com);
+}
+
+static int send_abort_req(struct c4iw_ep *ep)
 {
 	u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 	struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
@@ -671,6 +699,17 @@
 	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 }
 
+static int send_abort(struct c4iw_ep *ep)
+{
+	if (!ep->com.qp || !ep->com.qp->srq) {
+		send_abort_req(ep);
+		return 0;
+	}
+	set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
+	read_tcb(ep);
+	return 0;
+}
+
 static int send_connect(struct c4iw_ep *ep)
 {
 	struct cpl_act_open_req *req = NULL;
@@ -912,7 +951,7 @@
 	mpalen = sizeof(*mpa) + ep->plen;
 	if (mpa_rev_to_use == 2)
 		mpalen += sizeof(struct mpa_v2_conn_params);
-	wrlen = roundup(mpalen + sizeof *req, 16);
+	wrlen = roundup(mpalen + sizeof(*req), 16);
 	skb = get_skb(skb, wrlen, GFP_KERNEL);
 	if (!skb) {
 		connect_reply_upcall(ep, -ENOMEM);
@@ -956,8 +995,9 @@
 	}
 
 	if (mpa_rev_to_use == 2) {
-		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
-					       sizeof (struct mpa_v2_conn_params));
+		mpa->private_data_size =
+			htons(ntohs(mpa->private_data_size) +
+			      sizeof(struct mpa_v2_conn_params));
 		pr_debug("initiator ird %u ord %u\n", ep->ird,
 			 ep->ord);
 		mpa_v2_params.ird = htons((u16)ep->ird);
@@ -1016,7 +1056,7 @@
 	mpalen = sizeof(*mpa) + plen;
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
 		mpalen += sizeof(struct mpa_v2_conn_params);
-	wrlen = roundup(mpalen + sizeof *req, 16);
+	wrlen = roundup(mpalen + sizeof(*req), 16);
 
 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
 	if (!skb) {
@@ -1047,8 +1087,9 @@
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
-		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
-					       sizeof (struct mpa_v2_conn_params));
+		mpa->private_data_size =
+			htons(ntohs(mpa->private_data_size) +
+			      sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons(((u16)ep->ird) |
 					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
 					   0));
@@ -1095,7 +1136,7 @@
 	mpalen = sizeof(*mpa) + plen;
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
 		mpalen += sizeof(struct mpa_v2_conn_params);
-	wrlen = roundup(mpalen + sizeof *req, 16);
+	wrlen = roundup(mpalen + sizeof(*req), 16);
 
 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
 	if (!skb) {
@@ -1130,8 +1171,9 @@
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
-		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
-					       sizeof (struct mpa_v2_conn_params));
+		mpa->private_data_size =
+			htons(ntohs(mpa->private_data_size) +
+			      sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons((u16)ep->ird);
 		mpa_v2_params.ord = htons((u16)ep->ord);
 		if (peer2peer && (ep->mpa_attr.p2p_type !=
@@ -1199,7 +1241,7 @@
 	set_emss(ep, tcp_opt);
 
 	/* dealloc the atid */
-	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+	xa_erase_irq(&ep->com.dev->atids, atid);
 	cxgb4_free_atid(t, atid);
 	set_bit(ACT_ESTAB, &ep->com.history);
 
@@ -1852,14 +1894,11 @@
 	return 0;
 }
 
-static void complete_cached_srq_buffers(struct c4iw_ep *ep,
-					__be32 srqidx_status)
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
 {
 	enum chip_type adapter_type;
-	u32 srqidx;
 
 	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
-	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
 
 	/*
 	 * If this TCB had a srq buffer cached, then we must complete
@@ -1877,6 +1916,7 @@
 
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
+	u32 srqidx;
 	struct c4iw_ep *ep;
 	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
@@ -1888,7 +1928,10 @@
 		return 0;
 	}
 
-	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+	if (ep->com.qp && ep->com.qp->srq) {
+		srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
+		complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
+	}
 
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
@@ -1904,8 +1947,10 @@
 	}
 	mutex_unlock(&ep->com.mutex);
 
-	if (release)
+	if (release) {
+		close_complete_upcall(ep, -ECONNRESET);
 		release_ep_resources(ep);
+	}
 	c4iw_put_ep(&ep->com);
 	return 0;
 }
@@ -2059,8 +2104,7 @@
 		}
 		ep->mtu = pdev->mtu;
 		ep->tx_chan = cxgb4_port_chan(pdev);
-		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
-						cxgb4_port_viid(pdev));
+		ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
 		step = cdev->rdev.lldi.ntxq /
 			cdev->rdev.lldi.nchan;
 		ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2074,13 +2118,12 @@
 	} else {
 		pdev = get_real_dev(n->dev);
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
-					n, pdev, 0);
+					n, pdev, rt_tos2priority(tos));
 		if (!ep->l2t)
 			goto out;
 		ep->mtu = dst_mtu(dst);
 		ep->tx_chan = cxgb4_port_chan(pdev);
-		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
-						cxgb4_port_viid(pdev));
+		ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
 		step = cdev->rdev.lldi.ntxq /
 			cdev->rdev.lldi.nchan;
 		ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2147,7 +2190,9 @@
 		err = -ENOMEM;
 		goto fail2;
 	}
-	insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
+	err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
+	if (err)
+		goto fail2a;
 
 	/* find a route */
 	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
@@ -2164,7 +2209,8 @@
 					   laddr6->sin6_addr.s6_addr,
 					   raddr6->sin6_addr.s6_addr,
 					   laddr6->sin6_port,
-					   raddr6->sin6_port, 0,
+					   raddr6->sin6_port,
+					   ep->com.cm_id->tos,
 					   raddr6->sin6_scope_id);
 		iptype = 6;
 		ra = (__u8 *)&raddr6->sin6_addr;
@@ -2198,7 +2244,8 @@
 fail4:
 	dst_release(ep->dst);
 fail3:
-	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+	xa_erase_irq(&ep->com.dev->atids, ep->atid);
+fail2a:
 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 fail2:
 	/*
@@ -2281,8 +2328,7 @@
 						(const u32 *)
 						&sin6->sin6_addr.s6_addr, 1);
 			}
-			remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
-					atid);
+			xa_erase_irq(&ep->com.dev->atids, atid);
 			cxgb4_free_atid(t, atid);
 			dst_release(ep->dst);
 			cxgb4_l2t_release(ep->l2t);
@@ -2319,7 +2365,7 @@
 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
 				 ep->com.local_addr.ss_family);
 
-	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+	xa_erase_irq(&ep->com.dev->atids, atid);
 	cxgb4_free_atid(t, atid);
 	dst_release(ep->dst);
 	cxgb4_l2t_release(ep->l2t);
@@ -2376,20 +2422,6 @@
 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
-	skb_get(skb);
-	rpl = cplhdr(skb);
-	if (!is_t4(adapter_type)) {
-		skb_trim(skb, roundup(sizeof(*rpl5), 16));
-		rpl5 = (void *)rpl;
-		INIT_TP_WR(rpl5, ep->hwtid);
-	} else {
-		skb_trim(skb, sizeof(*rpl));
-		INIT_TP_WR(rpl, ep->hwtid);
-	}
-	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
-						    ep->hwtid));
-
 	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 		      enable_tcp_timestamps && req->tcpopt.tstamp,
 		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2435,6 +2467,20 @@
 		if (tcph->ece && tcph->cwr)
 			opt2 |= CCTRL_ECN_V(1);
 	}
+
+	skb_get(skb);
+	rpl = cplhdr(skb);
+	if (!is_t4(adapter_type)) {
+		skb_trim(skb, roundup(sizeof(*rpl5), 16));
+		rpl5 = (void *)rpl;
+		INIT_TP_WR(rpl5, ep->hwtid);
+	} else {
+		skb_trim(skb, sizeof(*rpl));
+		INIT_TP_WR(rpl, ep->hwtid);
+	}
+	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						    ep->hwtid));
+
 	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
 		u32 isn = (prandom_u32() & ~7UL) - 1;
 		opt2 |= T5_OPT_2_VALID_F;
@@ -2479,7 +2525,7 @@
 	u16 peer_mss = ntohs(req->tcpopt.mss);
 	int iptype;
 	unsigned short hdrs;
-	u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+	u8 tos;
 
 	parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
 	if (!parent_ep) {
@@ -2493,6 +2539,11 @@
 		goto reject;
 	}
 
+	if (parent_ep->com.cm_id->tos_set)
+		tos = parent_ep->com.cm_id->tos;
+	else
+		tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+
 	cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
 			&iptype, local_ip, peer_ip, &local_port, &peer_port);
 
@@ -2512,7 +2563,7 @@
 			 ntohs(peer_port), peer_mss);
 		dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
 				local_ip, peer_ip, local_port, peer_port,
-				PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+				tos,
 				((struct sockaddr_in6 *)
 				 &parent_ep->com.local_addr)->sin6_scope_id);
 	}
@@ -2743,6 +2794,21 @@
 	return 0;
 }
 
+static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
+{
+	complete_cached_srq_buffers(ep, ep->srqe_idx);
+	if (ep->com.cm_id && ep->com.qp) {
+		struct c4iw_qp_attributes attrs;
+
+		attrs.next_state = C4IW_QP_STATE_ERROR;
+		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+			       C4IW_QP_ATTR_NEXT_STATE,	&attrs, 1);
+	}
+	peer_abort_upcall(ep);
+	release_ep_resources(ep);
+	c4iw_put_ep(&ep->com);
+}
+
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct cpl_abort_req_rss6 *req = cplhdr(skb);
@@ -2753,6 +2819,7 @@
 	int release = 0;
 	unsigned int tid = GET_TID(req);
 	u8 status;
+	u32 srqidx;
 
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
@@ -2772,8 +2839,6 @@
 		goto deref_ep;
 	}
 
-	complete_cached_srq_buffers(ep, req->srqidx_status);
-
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);
@@ -2796,7 +2861,8 @@
 		break;
 	case MPA_REQ_SENT:
 		(void)stop_ep_timer(ep);
-		if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+		if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+		    (mpa_rev == 2 && ep->tried_with_mpa_v1))
 			connect_reply_upcall(ep, -ECONNRESET);
 		else {
 			/*
@@ -2821,6 +2887,23 @@
 		stop_ep_timer(ep);
 		/*FALLTHROUGH*/
 	case FPDU_MODE:
+		if (ep->com.qp && ep->com.qp->srq) {
+			srqidx = ABORT_RSS_SRQIDX_G(
+					be32_to_cpu(req->srqidx_status));
+			if (srqidx) {
+				complete_cached_srq_buffers(ep,
+							    req->srqidx_status);
+			} else {
+				/* Hold ep ref until finish_peer_abort() */
+				c4iw_get_ep(&ep->com);
+				__state_set(&ep->com, ABORTING);
+				set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
+				read_tcb(ep);
+				break;
+
+			}
+		}
+
 		if (ep->com.cm_id && ep->com.qp) {
 			attrs.next_state = C4IW_QP_STATE_ERROR;
 			ret = c4iw_modify_qp(ep->com.qp->rhp,
@@ -2872,7 +2955,7 @@
 					(const u32 *)&sin6->sin6_addr.s6_addr,
 					1);
 		}
-		remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
+		xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
 				 ep->com.local_addr.ss_family);
 		dst_release(ep->dst);
@@ -2944,15 +3027,18 @@
 
 	ep = get_ep_from_tid(dev, tid);
 
-	if (ep && ep->com.qp) {
-		pr_warn("TERM received tid %u qpid %u\n",
-			tid, ep->com.qp->wq.sq.qid);
-		attrs.next_state = C4IW_QP_STATE_TERMINATE;
-		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
-			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+	if (ep) {
+		if (ep->com.qp) {
+			pr_warn("TERM received tid %u qpid %u\n", tid,
+				ep->com.qp->wq.sq.qid);
+			attrs.next_state = C4IW_QP_STATE_TERMINATE;
+			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+		}
+
+		c4iw_put_ep(&ep->com);
 	} else
 		pr_warn("TERM received tid %u no ep/qp\n", tid);
-	c4iw_put_ep(&ep->com);
 
 	return 0;
 }
@@ -3145,17 +3231,22 @@
 	int found = 0;
 	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
 	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+	const struct in_ifaddr *ifa;
 
 	ind = in_dev_get(dev->rdev.lldi.ports[0]);
 	if (!ind)
 		return -EADDRNOTAVAIL;
-	for_primary_ifa(ind) {
+	rcu_read_lock();
+	in_dev_for_each_ifa_rcu(ifa, ind) {
+		if (ifa->ifa_flags & IFA_F_SECONDARY)
+			continue;
 		laddr->sin_addr.s_addr = ifa->ifa_address;
 		raddr->sin_addr.s_addr = ifa->ifa_address;
 		found = 1;
 		break;
 	}
-	endfor_ifa(ind);
+	rcu_read_unlock();
+
 	in_dev_put(ind);
 	return found ? 0 : -EADDRNOTAVAIL;
 }
@@ -3264,7 +3355,9 @@
 		err = -ENOMEM;
 		goto fail2;
 	}
-	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
+	err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
+	if (err)
+		goto fail5;
 
 	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
 	       sizeof(ep->com.local_addr));
@@ -3320,7 +3413,7 @@
 					   laddr6->sin6_addr.s6_addr,
 					   raddr6->sin6_addr.s6_addr,
 					   laddr6->sin6_port,
-					   raddr6->sin6_port, 0,
+					   raddr6->sin6_port, cm_id->tos,
 					   raddr6->sin6_scope_id);
 	}
 	if (!ep->dst) {
@@ -3352,7 +3445,8 @@
 fail4:
 	dst_release(ep->dst);
 fail3:
-	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+	xa_erase_irq(&ep->com.dev->atids, ep->atid);
+fail5:
 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 fail2:
 	skb_queue_purge(&ep->com.ep_skb_list);
@@ -3475,7 +3569,9 @@
 		err = -ENOMEM;
 		goto fail2;
 	}
-	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+	err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
+	if (err)
+		goto fail3;
 
 	state_set(&ep->com, LISTEN);
 	if (ep->com.local_addr.ss_family == AF_INET)
@@ -3486,7 +3582,8 @@
 		cm_id->provider_data = ep;
 		goto out;
 	}
-	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
+	xa_erase_irq(&ep->com.dev->stids, ep->stid);
+fail3:
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 fail2:
@@ -3525,7 +3622,7 @@
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
-	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
+	xa_erase_irq(&ep->com.dev->stids, ep->stid);
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 done:
@@ -3608,7 +3705,6 @@
 	if (close) {
 		if (abrupt) {
 			set_bit(EP_DISC_ABORT, &ep->com.history);
-			close_complete_upcall(ep, -ECONNRESET);
 			ret = send_abort(ep);
 		} else {
 			set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3686,7 +3782,7 @@
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
-	remove_handle(dev, &dev->atid_idr, atid);
+	xa_erase_irq(&dev->atids, atid);
 	cxgb4_free_atid(dev->rdev.lldi.tids, atid);
 	dst_release(ep->dst);
 	cxgb4_l2t_release(ep->l2t);
@@ -3719,6 +3815,80 @@
 	return;
 }
 
+static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
+{
+	u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
+	u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
+	u64 t;
+	u32 shift = 32;
+
+	t = (thi << shift) | (tlo >> shift);
+
+	return t;
+}
+
+static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
+{
+	u32 v;
+	u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
+
+	if (word & 0x1)
+		shift += 32;
+	v = (t >> shift) & mask;
+	return v;
+}
+
+static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+	struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
+	__be64 *tcb = (__be64 *)(rpl + 1);
+	unsigned int tid = GET_TID(rpl);
+	struct c4iw_ep *ep;
+	u64 t_flags_64;
+	u32 rx_pdu_out;
+
+	ep = get_ep_from_tid(dev, tid);
+	if (!ep)
+		return 0;
+	/* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
+	 * determine if there's a rx PDU feedback event pending.
+	 *
+	 * If that bit is set, it means we'll need to re-read the TCB's
+	 * rq_start value. The final value is the one present in a TCB
+	 * with the TF_RX_PDU_OUT bit cleared.
+	 */
+
+	t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
+	rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
+
+	c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
+	c4iw_put_ep(&ep->com); /* from read_tcb() */
+
+	/* If TF_RX_PDU_OUT bit is set, re-read the TCB */
+	if (rx_pdu_out) {
+		if (++ep->rx_pdu_out_cnt >= 2) {
+			WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
+			goto cleanup;
+		}
+		read_tcb(ep);
+		return 0;
+	}
+
+	ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
+			TCB_RQ_START_S);
+cleanup:
+	pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
+
+	if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
+		finish_peer_abort(dev, ep);
+	else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
+		send_abort_req(ep);
+	else
+		WARN_ONCE(1, "unexpected state!");
+
+	return 0;
+}
+
 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct cpl_fw6_msg *rpl = cplhdr(skb);
@@ -3945,7 +4115,7 @@
 	} else {
 		vlan_eh = (struct vlan_ethhdr *)(req + 1);
 		iph = (struct iphdr *)(vlan_eh + 1);
-		skb->vlan_tci = ntohs(cpl->vlan);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
 	}
 
 	if (iph->version != 0x4)
@@ -4039,6 +4209,7 @@
 	[CPL_CLOSE_CON_RPL] = close_con_rpl,
 	[CPL_RDMA_TERMINATE] = terminate,
 	[CPL_FW4_ACK] = fw4_ack,
+	[CPL_GET_TCB_RPL] = read_tcb_rpl,
 	[CPL_FW6_MSG] = deferred_fw6_msg,
 	[CPL_RX_PKT] = rx_pkt,
 	[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
@@ -4270,6 +4441,7 @@
 	[CPL_RDMA_TERMINATE] = sched,
 	[CPL_FW4_ACK] = sched,
 	[CPL_SET_TCB_RPL] = set_tcb_rpl,
+	[CPL_GET_TCB_RPL] = sched,
 	[CPL_FW6_MSG] = fw6_msg,
 	[CPL_RX_PKT] = sched
 };
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6d30427..b1bb61c 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -30,18 +30,19 @@
  * SOFTWARE.
  */
 
+#include <rdma/uverbs_ioctl.h>
+
 #include "iw_cxgb4.h"
 
-static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
-		      struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
-		      struct c4iw_wr_wait *wr_waitp)
+static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+		       struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
+		       struct c4iw_wr_wait *wr_waitp)
 {
 	struct fw_ri_res_wr *res_wr;
 	struct fw_ri_res *res;
 	int wr_len;
-	int ret;
 
-	wr_len = sizeof *res_wr + sizeof *res;
+	wr_len = sizeof(*res_wr) + sizeof(*res);
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
 	res_wr = __skb_put_zero(skb, wr_len);
@@ -57,14 +58,13 @@
 	res->u.cq.iqid = cpu_to_be32(cq->cqid);
 
 	c4iw_init_wr_wait(wr_waitp);
-	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
 
 	kfree(cq->sw_queue);
 	dma_free_coherent(&(rdev->lldi.pdev->dev),
 			  cq->memsize, cq->queue,
 			  dma_unmap_addr(cq, mapping));
 	c4iw_put_cqid(rdev, cq->cqid, uctx);
-	return ret;
 }
 
 static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
@@ -102,7 +102,6 @@
 		goto err3;
 	}
 	dma_unmap_addr_set(cq, mapping, cq->dma_addr);
-	memset(cq->queue, 0, cq->memsize);
 
 	if (user && ucontext->is_32b_cqe) {
 		cq->qp_errp = &((struct t4_status_page *)
@@ -115,7 +114,7 @@
 	}
 
 	/* build fw_ri_res_wr */
-	wr_len = sizeof *res_wr + sizeof *res;
+	wr_len = sizeof(*res_wr) + sizeof(*res);
 
 	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb) {
@@ -161,7 +160,7 @@
 	cq->gts = rdev->lldi.gts_reg;
 	cq->rdev = rdev;
 
-	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
+	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
 				      &cq->bar2_qid,
 				      user ? &cq->bar2_pa : NULL);
 	if (user && !cq->bar2_pa) {
@@ -968,7 +967,7 @@
 	return !err || err == -ENODATA ? npolled : err;
 }
 
-int c4iw_destroy_cq(struct ib_cq *ib_cq)
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
 {
 	struct c4iw_cq *chp;
 	struct c4iw_ucontext *ucontext;
@@ -976,55 +975,46 @@
 	pr_debug("ib_cq %p\n", ib_cq);
 	chp = to_c4iw_cq(ib_cq);
 
-	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+	xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
 	atomic_dec(&chp->refcnt);
 	wait_event(chp->wait, !atomic_read(&chp->refcnt));
 
-	ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
-				  : NULL;
+	ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+					     ibucontext);
 	destroy_cq(&chp->rhp->rdev, &chp->cq,
 		   ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
 		   chp->destroy_skb, chp->wr_waitp);
 	c4iw_put_wr_wait(chp->wr_waitp);
-	kfree(chp);
-	return 0;
 }
 
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *ib_context,
-			     struct ib_udata *udata)
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		   struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ibcq->device;
 	int entries = attr->cqe;
 	int vector = attr->comp_vector;
-	struct c4iw_dev *rhp;
-	struct c4iw_cq *chp;
+	struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
+	struct c4iw_cq *chp = to_c4iw_cq(ibcq);
 	struct c4iw_create_cq ucmd;
 	struct c4iw_create_cq_resp uresp;
-	struct c4iw_ucontext *ucontext = NULL;
 	int ret, wr_len;
 	size_t memsize, hwentries;
 	struct c4iw_mm_entry *mm, *mm2;
+	struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct c4iw_ucontext, ibucontext);
 
 	pr_debug("ib_dev %p entries %d\n", ibdev, entries);
 	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	rhp = to_c4iw_dev(ibdev);
+		return -EINVAL;
 
 	if (vector >= rhp->rdev.lldi.nciq)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
-	if (ib_context) {
-		ucontext = to_c4iw_ucontext(ib_context);
+	if (udata) {
 		if (udata->inlen < sizeof(ucmd))
 			ucontext->is_32b_cqe = 1;
 	}
 
-	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
-	if (!chp)
-		return ERR_PTR(-ENOMEM);
-
 	chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
 	if (!chp->wr_waitp) {
 		ret = -ENOMEM;
@@ -1068,7 +1058,7 @@
 	/*
 	 * memsize must be a multiple of the page size if its a user cq.
 	 */
-	if (ucontext)
+	if (udata)
 		memsize = roundup(memsize, PAGE_SIZE);
 
 	chp->cq.size = hwentries;
@@ -1088,16 +1078,16 @@
 	spin_lock_init(&chp->comp_handler_lock);
 	atomic_set(&chp->refcnt, 1);
 	init_waitqueue_head(&chp->wait);
-	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+	ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL);
 	if (ret)
 		goto err_destroy_cq;
 
 	if (ucontext) {
 		ret = -ENOMEM;
-		mm = kmalloc(sizeof *mm, GFP_KERNEL);
+		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 		if (!mm)
 			goto err_remove_handle;
-		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+		mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
 		if (!mm2)
 			goto err_free_mm;
 
@@ -1134,16 +1124,17 @@
 		mm2->len = PAGE_SIZE;
 		insert_mmap(ucontext, mm2);
 	}
-	pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
-		 chp->cq.cqid, chp, chp->cq.size,
-		 chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
-	return &chp->ibcq;
+
+	pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
+		 chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+		 &chp->cq.dma_addr);
+	return 0;
 err_free_mm2:
 	kfree(mm2);
 err_free_mm:
 	kfree(mm);
 err_remove_handle:
-	remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
+	xa_erase_irq(&rhp->cqs, chp->cq.cqid);
 err_destroy_cq:
 	destroy_cq(&chp->rhp->rdev, &chp->cq,
 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
@@ -1153,8 +1144,7 @@
 err_free_wr_wait:
 	c4iw_put_wr_wait(chp->wr_waitp);
 err_free_chp:
-	kfree(chp);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index c13c0ba..599340c 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -81,14 +81,6 @@
 	int pos;
 };
 
-static int count_idrs(int id, void *p, void *data)
-{
-	int *countp = data;
-
-	*countp = *countp + 1;
-	return 0;
-}
-
 static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
 			    loff_t *ppos)
 {
@@ -250,13 +242,11 @@
 	}
 }
 
-static int dump_qp(int id, void *p, void *data)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+		   struct c4iw_debugfs_data *qpd)
 {
-	struct c4iw_qp *qp = p;
-	struct c4iw_debugfs_data *qpd = data;
 	int space;
 	int cc;
-
 	if (id != qp->wq.sq.qid)
 		return 0;
 
@@ -335,19 +325,24 @@
 
 static int qp_open(struct inode *inode, struct file *file)
 {
+	struct c4iw_qp *qp;
 	struct c4iw_debugfs_data *qpd;
+	unsigned long index;
 	int count = 1;
 
-	qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
+	qpd = kmalloc(sizeof(*qpd), GFP_KERNEL);
 	if (!qpd)
 		return -ENOMEM;
 
 	qpd->devp = inode->i_private;
 	qpd->pos = 0;
 
-	spin_lock_irq(&qpd->devp->lock);
-	idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
-	spin_unlock_irq(&qpd->devp->lock);
+	/*
+	 * No need to lock; we drop the lock to call vmalloc so it's racy
+	 * anyway.  Someone who cares should switch this over to seq_file
+	 */
+	xa_for_each(&qpd->devp->qps, index, qp)
+		count++;
 
 	qpd->bufsize = count * 180;
 	qpd->buf = vmalloc(qpd->bufsize);
@@ -356,9 +351,10 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_irq(&qpd->devp->lock);
-	idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
-	spin_unlock_irq(&qpd->devp->lock);
+	xa_lock_irq(&qpd->devp->qps);
+	xa_for_each(&qpd->devp->qps, index, qp)
+		dump_qp(index, qp, qpd);
+	xa_unlock_irq(&qpd->devp->qps);
 
 	qpd->buf[qpd->pos++] = 0;
 	file->private_data = qpd;
@@ -373,9 +369,8 @@
 	.llseek  = default_llseek,
 };
 
-static int dump_stag(int id, void *p, void *data)
+static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd)
 {
-	struct c4iw_debugfs_data *stagd = data;
 	int space;
 	int cc;
 	struct fw_ri_tpte tpte;
@@ -424,10 +419,12 @@
 static int stag_open(struct inode *inode, struct file *file)
 {
 	struct c4iw_debugfs_data *stagd;
+	void *p;
+	unsigned long index;
 	int ret = 0;
 	int count = 1;
 
-	stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
+	stagd = kmalloc(sizeof(*stagd), GFP_KERNEL);
 	if (!stagd) {
 		ret = -ENOMEM;
 		goto out;
@@ -435,9 +432,8 @@
 	stagd->devp = inode->i_private;
 	stagd->pos = 0;
 
-	spin_lock_irq(&stagd->devp->lock);
-	idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
-	spin_unlock_irq(&stagd->devp->lock);
+	xa_for_each(&stagd->devp->mrs, index, p)
+		count++;
 
 	stagd->bufsize = count * 256;
 	stagd->buf = vmalloc(stagd->bufsize);
@@ -446,9 +442,10 @@
 		goto err1;
 	}
 
-	spin_lock_irq(&stagd->devp->lock);
-	idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
-	spin_unlock_irq(&stagd->devp->lock);
+	xa_lock_irq(&stagd->devp->mrs);
+	xa_for_each(&stagd->devp->mrs, index, p)
+		dump_stag(index, stagd);
+	xa_unlock_irq(&stagd->devp->mrs);
 
 	stagd->buf[stagd->pos++] = 0;
 	file->private_data = stagd;
@@ -558,10 +555,8 @@
 	.write   = stats_clear,
 };
 
-static int dump_ep(int id, void *p, void *data)
+static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd)
 {
-	struct c4iw_ep *ep = p;
-	struct c4iw_debugfs_data *epd = data;
 	int space;
 	int cc;
 
@@ -617,10 +612,9 @@
 	return 0;
 }
 
-static int dump_listen_ep(int id, void *p, void *data)
+static
+int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd)
 {
-	struct c4iw_listen_ep *ep = p;
-	struct c4iw_debugfs_data *epd = data;
 	int space;
 	int cc;
 
@@ -674,6 +668,9 @@
 
 static int ep_open(struct inode *inode, struct file *file)
 {
+	struct c4iw_ep *ep;
+	struct c4iw_listen_ep *lep;
+	unsigned long index;
 	struct c4iw_debugfs_data *epd;
 	int ret = 0;
 	int count = 1;
@@ -686,11 +683,12 @@
 	epd->devp = inode->i_private;
 	epd->pos = 0;
 
-	spin_lock_irq(&epd->devp->lock);
-	idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
-	idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
-	idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
-	spin_unlock_irq(&epd->devp->lock);
+	xa_for_each(&epd->devp->hwtids, index, ep)
+		count++;
+	xa_for_each(&epd->devp->atids, index, ep)
+		count++;
+	xa_for_each(&epd->devp->stids, index, lep)
+		count++;
 
 	epd->bufsize = count * 240;
 	epd->buf = vmalloc(epd->bufsize);
@@ -699,11 +697,18 @@
 		goto err1;
 	}
 
-	spin_lock_irq(&epd->devp->lock);
-	idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
-	idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
-	idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
-	spin_unlock_irq(&epd->devp->lock);
+	xa_lock_irq(&epd->devp->hwtids);
+	xa_for_each(&epd->devp->hwtids, index, ep)
+		dump_ep(ep, epd);
+	xa_unlock_irq(&epd->devp->hwtids);
+	xa_lock_irq(&epd->devp->atids);
+	xa_for_each(&epd->devp->atids, index, ep)
+		dump_ep(ep, epd);
+	xa_unlock_irq(&epd->devp->atids);
+	xa_lock_irq(&epd->devp->stids);
+	xa_for_each(&epd->devp->stids, index, lep)
+		dump_listen_ep(lep, epd);
+	xa_unlock_irq(&epd->devp->stids);
 
 	file->private_data = epd;
 	goto out;
@@ -720,11 +725,8 @@
 	.read    = debugfs_read,
 };
 
-static int setup_debugfs(struct c4iw_dev *devp)
+static void setup_debugfs(struct c4iw_dev *devp)
 {
-	if (!devp->debugfs_root)
-		return -1;
-
 	debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
 				 (void *)devp, &qp_debugfs_fops, 4096);
 
@@ -740,7 +742,6 @@
 	if (c4iw_wr_log)
 		debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
 					 (void *)devp, &wr_log_debugfs_fops, 4096);
-	return 0;
 }
 
 void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
@@ -783,6 +784,7 @@
 static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 {
 	int err;
+	unsigned int factor;
 
 	c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 
@@ -806,8 +808,18 @@
 		return -EINVAL;
 	}
 
-	rdev->qpmask = rdev->lldi.udb_density - 1;
-	rdev->cqmask = rdev->lldi.ucq_density - 1;
+	/* This implementation requires a sge_host_page_size <= PAGE_SIZE. */
+	if (rdev->lldi.sge_host_page_size > PAGE_SIZE) {
+		pr_err("%s: unsupported sge host page size %u\n",
+		       pci_name(rdev->lldi.pdev),
+		       rdev->lldi.sge_host_page_size);
+		return -EINVAL;
+	}
+
+	factor = PAGE_SIZE / rdev->lldi.sge_host_page_size;
+	rdev->qpmask = (rdev->lldi.udb_density * factor) - 1;
+	rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1;
+
 	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
 		 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 		 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
@@ -924,16 +936,12 @@
 void c4iw_dealloc(struct uld_ctx *ctx)
 {
 	c4iw_rdev_close(&ctx->dev->rdev);
-	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
-	idr_destroy(&ctx->dev->cqidr);
-	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
-	idr_destroy(&ctx->dev->qpidr);
-	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
-	idr_destroy(&ctx->dev->mmidr);
-	wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
-	idr_destroy(&ctx->dev->hwtid_idr);
-	idr_destroy(&ctx->dev->stid_idr);
-	idr_destroy(&ctx->dev->atid_idr);
+	WARN_ON(!xa_empty(&ctx->dev->cqs));
+	WARN_ON(!xa_empty(&ctx->dev->qps));
+	WARN_ON(!xa_empty(&ctx->dev->mrs));
+	wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids));
+	WARN_ON(!xa_empty(&ctx->dev->stids));
+	WARN_ON(!xa_empty(&ctx->dev->atids));
 	if (ctx->dev->rdev.bar2_kva)
 		iounmap(ctx->dev->rdev.bar2_kva);
 	if (ctx->dev->rdev.oc_mw_kva)
@@ -970,7 +978,7 @@
 		pr_info("%s: On-Chip Queues not supported on this device\n",
 			pci_name(infop->pdev));
 
-	devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
+	devp = ib_alloc_device(c4iw_dev, ibdev);
 	if (!devp) {
 		pr_err("Cannot allocate ib device\n");
 		return ERR_PTR(-ENOMEM);
@@ -1037,13 +1045,12 @@
 		return ERR_PTR(ret);
 	}
 
-	idr_init(&devp->cqidr);
-	idr_init(&devp->qpidr);
-	idr_init(&devp->mmidr);
-	idr_init(&devp->hwtid_idr);
-	idr_init(&devp->stid_idr);
-	idr_init(&devp->atid_idr);
-	spin_lock_init(&devp->lock);
+	xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ);
+	xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ);
+	xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ);
+	xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ);
+	xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ);
+	xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ);
 	mutex_init(&devp->rdev.stats.lock);
 	mutex_init(&devp->db_mutex);
 	INIT_LIST_HEAD(&devp->db_fc_list);
@@ -1071,7 +1078,7 @@
 		pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
 			DRV_VERSION);
 
-	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
 		ctx = ERR_PTR(-ENOMEM);
 		goto out;
@@ -1239,10 +1246,9 @@
 	case CXGB4_STATE_START_RECOVERY:
 		pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
 		if (ctx->dev) {
-			struct ib_event event;
+			struct ib_event event = {};
 
 			ctx->dev->rdev.flags |= T4_FATAL_ERROR;
-			memset(&event, 0, sizeof event);
 			event.event  = IB_EVENT_DEVICE_FATAL;
 			event.device = &ctx->dev->ibdev;
 			ib_dispatch_event(&event);
@@ -1258,34 +1264,21 @@
 	return 0;
 }
 
-static int disable_qp_db(int id, void *p, void *data)
-{
-	struct c4iw_qp *qp = p;
-
-	t4_disable_wq_db(&qp->wq);
-	return 0;
-}
-
 static void stop_queues(struct uld_ctx *ctx)
 {
-	unsigned long flags;
+	struct c4iw_qp *qp;
+	unsigned long index, flags;
 
-	spin_lock_irqsave(&ctx->dev->lock, flags);
+	xa_lock_irqsave(&ctx->dev->qps, flags);
 	ctx->dev->rdev.stats.db_state_transitions++;
 	ctx->dev->db_state = STOPPED;
-	if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
-		idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
-	else
+	if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
+		xa_for_each(&ctx->dev->qps, index, qp)
+			t4_disable_wq_db(&qp->wq);
+	} else {
 		ctx->dev->rdev.status_page->db_off = 1;
-	spin_unlock_irqrestore(&ctx->dev->lock, flags);
-}
-
-static int enable_qp_db(int id, void *p, void *data)
-{
-	struct c4iw_qp *qp = p;
-
-	t4_enable_wq_db(&qp->wq);
-	return 0;
+	}
+	xa_unlock_irqrestore(&ctx->dev->qps, flags);
 }
 
 static void resume_rc_qp(struct c4iw_qp *qp)
@@ -1315,18 +1308,21 @@
 
 static void resume_queues(struct uld_ctx *ctx)
 {
-	spin_lock_irq(&ctx->dev->lock);
+	xa_lock_irq(&ctx->dev->qps);
 	if (ctx->dev->db_state != STOPPED)
 		goto out;
 	ctx->dev->db_state = FLOW_CONTROL;
 	while (1) {
 		if (list_empty(&ctx->dev->db_fc_list)) {
+			struct c4iw_qp *qp;
+			unsigned long index;
+
 			WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
 			ctx->dev->db_state = NORMAL;
 			ctx->dev->rdev.stats.db_state_transitions++;
 			if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
-				idr_for_each(&ctx->dev->qpidr, enable_qp_db,
-					     NULL);
+				xa_for_each(&ctx->dev->qps, index, qp)
+					t4_enable_wq_db(&qp->wq);
 			} else {
 				ctx->dev->rdev.status_page->db_off = 0;
 			}
@@ -1338,12 +1334,12 @@
 				resume_a_chunk(ctx);
 			}
 			if (!list_empty(&ctx->dev->db_fc_list)) {
-				spin_unlock_irq(&ctx->dev->lock);
+				xa_unlock_irq(&ctx->dev->qps);
 				if (DB_FC_RESUME_DELAY) {
 					set_current_state(TASK_UNINTERRUPTIBLE);
 					schedule_timeout(DB_FC_RESUME_DELAY);
 				}
-				spin_lock_irq(&ctx->dev->lock);
+				xa_lock_irq(&ctx->dev->qps);
 				if (ctx->dev->db_state != FLOW_CONTROL)
 					break;
 			}
@@ -1352,7 +1348,7 @@
 out:
 	if (ctx->dev->db_state != NORMAL)
 		ctx->dev->rdev.stats.db_fc_interruptions++;
-	spin_unlock_irq(&ctx->dev->lock);
+	xa_unlock_irq(&ctx->dev->qps);
 }
 
 struct qp_list {
@@ -1360,23 +1356,6 @@
 	struct c4iw_qp **qps;
 };
 
-static int add_and_ref_qp(int id, void *p, void *data)
-{
-	struct qp_list *qp_listp = data;
-	struct c4iw_qp *qp = p;
-
-	c4iw_qp_add_ref(&qp->ibqp);
-	qp_listp->qps[qp_listp->idx++] = qp;
-	return 0;
-}
-
-static int count_qps(int id, void *p, void *data)
-{
-	unsigned *countp = data;
-	(*countp)++;
-	return 0;
-}
-
 static void deref_qps(struct qp_list *qp_list)
 {
 	int idx;
@@ -1393,7 +1372,7 @@
 	for (idx = 0; idx < qp_list->idx; idx++) {
 		struct c4iw_qp *qp = qp_list->qps[idx];
 
-		spin_lock_irq(&qp->rhp->lock);
+		xa_lock_irq(&qp->rhp->qps);
 		spin_lock(&qp->lock);
 		ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
 					  qp->wq.sq.qid,
@@ -1403,7 +1382,7 @@
 			pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
 			       pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
 			spin_unlock(&qp->lock);
-			spin_unlock_irq(&qp->rhp->lock);
+			xa_unlock_irq(&qp->rhp->qps);
 			return;
 		}
 		qp->wq.sq.wq_pidx_inc = 0;
@@ -1417,12 +1396,12 @@
 			pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
 			       pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
 			spin_unlock(&qp->lock);
-			spin_unlock_irq(&qp->rhp->lock);
+			xa_unlock_irq(&qp->rhp->qps);
 			return;
 		}
 		qp->wq.rq.wq_pidx_inc = 0;
 		spin_unlock(&qp->lock);
-		spin_unlock_irq(&qp->rhp->lock);
+		xa_unlock_irq(&qp->rhp->qps);
 
 		/* Wait for the dbfifo to drain */
 		while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
@@ -1434,6 +1413,8 @@
 
 static void recover_queues(struct uld_ctx *ctx)
 {
+	struct c4iw_qp *qp;
+	unsigned long index;
 	int count = 0;
 	struct qp_list qp_list;
 	int ret;
@@ -1451,22 +1432,26 @@
 	}
 
 	/* Count active queues so we can build a list of queues to recover */
-	spin_lock_irq(&ctx->dev->lock);
+	xa_lock_irq(&ctx->dev->qps);
 	WARN_ON(ctx->dev->db_state != STOPPED);
 	ctx->dev->db_state = RECOVERY;
-	idr_for_each(&ctx->dev->qpidr, count_qps, &count);
+	xa_for_each(&ctx->dev->qps, index, qp)
+		count++;
 
 	qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC);
 	if (!qp_list.qps) {
-		spin_unlock_irq(&ctx->dev->lock);
+		xa_unlock_irq(&ctx->dev->qps);
 		return;
 	}
 	qp_list.idx = 0;
 
 	/* add and ref each qp so it doesn't get freed */
-	idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
+	xa_for_each(&ctx->dev->qps, index, qp) {
+		c4iw_qp_add_ref(&qp->ibqp);
+		qp_list.qps[qp_list.idx++] = qp;
+	}
 
-	spin_unlock_irq(&ctx->dev->lock);
+	xa_unlock_irq(&ctx->dev->qps);
 
 	/* now traverse the list in a safe context to recover the db state*/
 	recover_lost_dbs(ctx, &qp_list);
@@ -1475,10 +1460,10 @@
 	deref_qps(&qp_list);
 	kfree(qp_list.qps);
 
-	spin_lock_irq(&ctx->dev->lock);
+	xa_lock_irq(&ctx->dev->qps);
 	WARN_ON(ctx->dev->db_state != RECOVERY);
 	ctx->dev->db_state = STOPPED;
-	spin_unlock_irq(&ctx->dev->lock);
+	xa_unlock_irq(&ctx->dev->qps);
 }
 
 static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -1553,8 +1538,6 @@
 		return err;
 
 	c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
-	if (!c4iw_debugfs_root)
-		pr_warn("could not create debugfs entry, continuing\n");
 
 	reg_workq = create_singlethread_workqueue("Register_iWARP_device");
 	if (!reg_workq) {
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 8741d23..4cd877b 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -123,15 +123,15 @@
 	struct c4iw_qp *qhp;
 	u32 cqid;
 
-	spin_lock_irq(&dev->lock);
-	qhp = get_qhp(dev, CQE_QPID(err_cqe));
+	xa_lock_irq(&dev->qps);
+	qhp = xa_load(&dev->qps, CQE_QPID(err_cqe));
 	if (!qhp) {
 		pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
 		       CQE_QPID(err_cqe),
 		       CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
 		       CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
 		       CQE_WRID_LOW(err_cqe));
-		spin_unlock_irq(&dev->lock);
+		xa_unlock_irq(&dev->qps);
 		goto out;
 	}
 
@@ -146,13 +146,13 @@
 		       CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
 		       CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
 		       CQE_WRID_LOW(err_cqe));
-		spin_unlock_irq(&dev->lock);
+		xa_unlock_irq(&dev->qps);
 		goto out;
 	}
 
 	c4iw_qp_add_ref(&qhp->ibqp);
 	atomic_inc(&chp->refcnt);
-	spin_unlock_irq(&dev->lock);
+	xa_unlock_irq(&dev->qps);
 
 	/* Bad incoming write */
 	if (RQ_TYPE(err_cqe) &&
@@ -225,11 +225,11 @@
 	struct c4iw_cq *chp;
 	unsigned long flag;
 
-	spin_lock_irqsave(&dev->lock, flag);
-	chp = get_chp(dev, qid);
+	xa_lock_irqsave(&dev->cqs, flag);
+	chp = xa_load(&dev->cqs, qid);
 	if (chp) {
 		atomic_inc(&chp->refcnt);
-		spin_unlock_irqrestore(&dev->lock, flag);
+		xa_unlock_irqrestore(&dev->cqs, flag);
 		t4_clear_cq_armed(&chp->cq);
 		spin_lock_irqsave(&chp->comp_handler_lock, flag);
 		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
@@ -238,7 +238,7 @@
 			wake_up(&chp->wait);
 	} else {
 		pr_debug("unknown cqid 0x%x\n", qid);
-		spin_unlock_irqrestore(&dev->lock, flag);
+		xa_unlock_irqrestore(&dev->cqs, flag);
 	}
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f0fcead..7d06b0f 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -34,7 +34,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
 #include <linux/completion.h>
 #include <linux/netdevice.h>
 #include <linux/sched/mm.h>
@@ -315,16 +315,15 @@
 	struct ib_device ibdev;
 	struct c4iw_rdev rdev;
 	u32 device_cap_flags;
-	struct idr cqidr;
-	struct idr qpidr;
-	struct idr mmidr;
-	spinlock_t lock;
+	struct xarray cqs;
+	struct xarray qps;
+	struct xarray mrs;
 	struct mutex db_mutex;
 	struct dentry *debugfs_root;
 	enum db_state db_state;
-	struct idr hwtid_idr;
-	struct idr atid_idr;
-	struct idr stid_idr;
+	struct xarray hwtids;
+	struct xarray atids;
+	struct xarray stids;
 	struct list_head db_fc_list;
 	u32 avail_ird;
 	wait_queue_head_t wait;
@@ -349,70 +348,12 @@
 
 static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid)
 {
-	return idr_find(&rhp->cqidr, cqid);
+	return xa_load(&rhp->cqs, cqid);
 }
 
 static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid)
 {
-	return idr_find(&rhp->qpidr, qpid);
-}
-
-static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
-{
-	return idr_find(&rhp->mmidr, mmid);
-}
-
-static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
-				 void *handle, u32 id, int lock)
-{
-	int ret;
-
-	if (lock) {
-		idr_preload(GFP_KERNEL);
-		spin_lock_irq(&rhp->lock);
-	}
-
-	ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC);
-
-	if (lock) {
-		spin_unlock_irq(&rhp->lock);
-		idr_preload_end();
-	}
-
-	return ret < 0 ? ret : 0;
-}
-
-static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
-				void *handle, u32 id)
-{
-	return _insert_handle(rhp, idr, handle, id, 1);
-}
-
-static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
-				       void *handle, u32 id)
-{
-	return _insert_handle(rhp, idr, handle, id, 0);
-}
-
-static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
-				   u32 id, int lock)
-{
-	if (lock)
-		spin_lock_irq(&rhp->lock);
-	idr_remove(idr, id);
-	if (lock)
-		spin_unlock_irq(&rhp->lock);
-}
-
-static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
-{
-	_remove_handle(rhp, idr, id, 1);
-}
-
-static inline void remove_handle_nolock(struct c4iw_dev *rhp,
-					 struct idr *idr, u32 id)
-{
-	_remove_handle(rhp, idr, id, 0);
+	return xa_load(&rhp->qps, qpid);
 }
 
 extern uint c4iw_max_read_depth;
@@ -549,13 +490,13 @@
 	struct t4_wq wq;
 	spinlock_t lock;
 	struct mutex mutex;
-	struct kref kref;
 	wait_queue_head_t wait;
 	int sq_sig_all;
 	struct c4iw_srq *srq;
-	struct work_struct free_work;
 	struct c4iw_ucontext *ucontext;
 	struct c4iw_wr_wait *wr_waitp;
+	struct completion qp_rel_comp;
+	refcount_t qp_refcnt;
 };
 
 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -589,7 +530,6 @@
 	u32 key;
 	spinlock_t mmap_lock;
 	struct list_head mmaps;
-	struct kref kref;
 	bool is_32b_cqe;
 };
 
@@ -598,18 +538,6 @@
 	return container_of(c, struct c4iw_ucontext, ibucontext);
 }
 
-void _c4iw_free_ucontext(struct kref *kref);
-
-static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext)
-{
-	kref_put(&ucontext->kref, _c4iw_free_ucontext);
-}
-
-static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext)
-{
-	kref_get(&ucontext->kref);
-}
-
 struct c4iw_mm_entry {
 	struct list_head entry;
 	u64 addr;
@@ -982,6 +910,9 @@
 	int rcv_win;
 	u32 snd_wscale;
 	struct c4iw_ep_stats stats;
+	u32 srqe_idx;
+	u32 rx_pdu_out_cnt;
+	struct sk_buff *peer_abort_skb;
 };
 
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
@@ -1048,9 +979,8 @@
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
 void c4iw_qp_add_ref(struct ib_qp *qp);
 void c4iw_qp_rem_ref(struct ib_qp *qp);
-struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_num_sg);
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+			    u32 max_num_sg, struct ib_udata *udata);
 int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 		   unsigned int *sg_offset);
 int c4iw_dealloc_mw(struct ib_mw *mw);
@@ -1061,21 +991,18 @@
 					   u64 length, u64 virt, int acc,
 					   struct ib_udata *udata);
 struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-int c4iw_dereg_mr(struct ib_mr *ib_mr);
-int c4iw_destroy_cq(struct ib_cq *ib_cq);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
-			     const struct ib_cq_init_attr *attr,
-			     struct ib_ucontext *ib_context,
-			     struct ib_udata *udata);
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
+void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		   struct ib_udata *udata);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
 		    enum ib_srq_attr_mask srq_attr_mask,
 		    struct ib_udata *udata);
-int c4iw_destroy_srq(struct ib_srq *ib_srq);
-struct ib_srq *c4iw_create_srq(struct ib_pd *pd,
-			       struct ib_srq_init_attr *attrs,
-			       struct ib_udata *udata);
-int c4iw_destroy_qp(struct ib_qp *ib_qp);
+void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
+		    struct ib_udata *udata);
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
 struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
 			     struct ib_qp_init_attr *attrs,
 			     struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 7b76e6f..35c284a 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -130,8 +130,9 @@
 
 		copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
 			   len;
-		wr_len = roundup(sizeof *req + sizeof *sc +
-				 roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+		wr_len = roundup(sizeof(*req) + sizeof(*sc) +
+					 roundup(copy_len, T4_ULPTX_MIN_IO),
+				 16);
 
 		if (!skb) {
 			skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
@@ -274,13 +275,17 @@
 			   struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
 {
 	int err;
-	struct fw_ri_tpte tpt;
+	struct fw_ri_tpte *tpt;
 	u32 stag_idx;
 	static atomic_t key;
 
 	if (c4iw_fatal_error(rdev))
 		return -EIO;
 
+	tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+	if (!tpt)
+		return -ENOMEM;
+
 	stag_state = stag_state > 0;
 	stag_idx = (*stag) >> 8;
 
@@ -290,6 +295,7 @@
 			mutex_lock(&rdev->stats.lock);
 			rdev->stats.stag.fail++;
 			mutex_unlock(&rdev->stats.lock);
+			kfree(tpt);
 			return -ENOMEM;
 		}
 		mutex_lock(&rdev->stats.lock);
@@ -304,28 +310,28 @@
 
 	/* write TPT entry */
 	if (reset_tpt_entry)
-		memset(&tpt, 0, sizeof(tpt));
+		memset(tpt, 0, sizeof(*tpt));
 	else {
-		tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+		tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
 			FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
 			FW_RI_TPTE_STAGSTATE_V(stag_state) |
 			FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
-		tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+		tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
 			(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
 			FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
 						      FW_RI_VA_BASED_TO))|
 			FW_RI_TPTE_PS_V(page_size));
-		tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+		tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
 			FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
-		tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
-		tpt.va_hi = cpu_to_be32((u32)(to >> 32));
-		tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
-		tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
-		tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+		tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+		tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+		tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+		tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+		tpt->len_hi = cpu_to_be32((u32)(len >> 32));
 	}
 	err = write_adapter_mem(rdev, stag_idx +
 				(rdev->lldi.vr->stag.start >> 5),
-				sizeof(tpt), &tpt, skb, wr_waitp);
+				sizeof(*tpt), tpt, skb, wr_waitp);
 
 	if (reset_tpt_entry) {
 		c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -333,6 +339,7 @@
 		rdev->stats.stag.cur -= 32;
 		mutex_unlock(&rdev->stats.lock);
 	}
+	kfree(tpt);
 	return err;
 }
 
@@ -395,7 +402,7 @@
 	mhp->ibmr.iova = mhp->attr.va_fbo;
 	mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
 	pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
-	return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+	return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
 }
 
 static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
@@ -502,10 +509,9 @@
 			       u64 virt, int acc, struct ib_udata *udata)
 {
 	__be64 *pages;
-	int shift, n, len;
-	int i, k, entry;
+	int shift, n, i;
 	int err = -ENOMEM;
-	struct scatterlist *sg;
+	struct sg_dma_page_iter sg_iter;
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
 	struct c4iw_mr *mhp;
@@ -537,13 +543,13 @@
 
 	mhp->rhp = rhp;
 
-	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	mhp->umem = ib_umem_get(udata, start, length, acc, 0);
 	if (IS_ERR(mhp->umem))
 		goto err_free_skb;
 
-	shift = mhp->umem->page_shift;
+	shift = PAGE_SHIFT;
 
-	n = mhp->umem->nmap;
+	n = ib_umem_num_pages(mhp->umem);
 	err = alloc_pbl(mhp, n);
 	if (err)
 		goto err_umem_release;
@@ -556,21 +562,16 @@
 
 	i = n = 0;
 
-	for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
-		len = sg_dma_len(sg) >> shift;
-		for (k = 0; k < len; ++k) {
-			pages[i++] = cpu_to_be64(sg_dma_address(sg) +
-						 (k << shift));
-			if (i == PAGE_SIZE / sizeof *pages) {
-				err = write_pbl(&mhp->rhp->rdev,
-				      pages,
-				      mhp->attr.pbl_addr + (n << 3), i,
-				      mhp->wr_waitp);
-				if (err)
-					goto pbl_done;
-				n += i;
-				i = 0;
-			}
+	for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
+		pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
+		if (i == PAGE_SIZE / sizeof(*pages)) {
+			err = write_pbl(&mhp->rhp->rdev, pages,
+					mhp->attr.pbl_addr + (n << 3), i,
+					mhp->wr_waitp);
+			if (err)
+				goto pbl_done;
+			n += i;
+			i = 0;
 		}
 	}
 
@@ -651,7 +652,7 @@
 	mhp->attr.stag = stag;
 	mmid = (stag) >> 8;
 	mhp->ibmw.rkey = stag;
-	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+	if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
 		ret = -ENOMEM;
 		goto dealloc_win;
 	}
@@ -679,19 +680,18 @@
 	mhp = to_c4iw_mw(mw);
 	rhp = mhp->rhp;
 	mmid = (mw->rkey) >> 8;
-	remove_handle(rhp, &rhp->mmidr, mmid);
+	xa_erase_irq(&rhp->mrs, mmid);
 	deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
 			  mhp->wr_waitp);
 	kfree_skb(mhp->dereg_skb);
 	c4iw_put_wr_wait(mhp->wr_waitp);
-	kfree(mhp);
 	pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
+	kfree(mhp);
 	return 0;
 }
 
-struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_num_sg)
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+			    u32 max_num_sg, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
@@ -746,7 +746,7 @@
 	mhp->attr.state = 0;
 	mmid = (stag) >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+	if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
 		ret = -ENOMEM;
 		goto err_dereg;
 	}
@@ -792,7 +792,7 @@
 	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page);
 }
 
-int c4iw_dereg_mr(struct ib_mr *ib_mr)
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_mr *mhp;
@@ -803,7 +803,7 @@
 	mhp = to_c4iw_mr(ib_mr);
 	rhp = mhp->rhp;
 	mmid = mhp->attr.stag >> 8;
-	remove_handle(rhp, &rhp->mmidr, mmid);
+	xa_erase_irq(&rhp->mrs, mmid);
 	if (mhp->mpl)
 		dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
 				  mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
@@ -814,8 +814,7 @@
 				  mhp->attr.pbl_size << 3);
 	if (mhp->kva)
 		kfree((void *) (unsigned long) mhp->kva);
-	if (mhp->umem)
-		ib_umem_release(mhp->umem);
+	ib_umem_release(mhp->umem);
 	pr_debug("mmid 0x%x ptr %p\n", mmid, mhp);
 	c4iw_put_wr_wait(mhp->wr_waitp);
 	kfree(mhp);
@@ -827,9 +826,9 @@
 	struct c4iw_mr *mhp;
 	unsigned long flags;
 
-	spin_lock_irqsave(&rhp->lock, flags);
-	mhp = get_mhp(rhp, rkey >> 8);
+	xa_lock_irqsave(&rhp->mrs, flags);
+	mhp = xa_load(&rhp->mrs, rkey >> 8);
 	if (mhp)
 		mhp->attr.state = 0;
-	spin_unlock_irqrestore(&rhp->lock, flags);
+	xa_unlock_irqrestore(&rhp->mrs, flags);
 }
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 4eda687..d373ac0 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -58,51 +58,34 @@
 module_param(fastreg_support, int, 0644);
 MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
 
-void _c4iw_free_ucontext(struct kref *kref)
+static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
 {
-	struct c4iw_ucontext *ucontext;
+	struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
 	struct c4iw_dev *rhp;
 	struct c4iw_mm_entry *mm, *tmp;
 
-	ucontext = container_of(kref, struct c4iw_ucontext, kref);
+	pr_debug("context %p\n", context);
 	rhp = to_c4iw_dev(ucontext->ibucontext.device);
 
-	pr_debug("ucontext %p\n", ucontext);
 	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
 		kfree(mm);
 	c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
-	kfree(ucontext);
 }
 
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
+			       struct ib_udata *udata)
 {
-	struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
-
-	pr_debug("context %p\n", context);
-	c4iw_put_ucontext(ucontext);
-	return 0;
-}
-
-static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
-					       struct ib_udata *udata)
-{
-	struct c4iw_ucontext *context;
+	struct ib_device *ibdev = ucontext->device;
+	struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
 	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
 	struct c4iw_alloc_ucontext_resp uresp;
 	int ret = 0;
 	struct c4iw_mm_entry *mm = NULL;
 
 	pr_debug("ibdev %p\n", ibdev);
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
 	c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
 	INIT_LIST_HEAD(&context->mmaps);
 	spin_lock_init(&context->mmap_lock);
-	kref_init(&context->kref);
 
 	if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
 		pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
@@ -111,7 +94,7 @@
 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 		if (!mm) {
 			ret = -ENOMEM;
-			goto err_free;
+			goto err;
 		}
 
 		uresp.status_page_size = PAGE_SIZE;
@@ -131,13 +114,11 @@
 		mm->len = PAGE_SIZE;
 		insert_mmap(context, mm);
 	}
-	return &context->ibucontext;
+	return 0;
 err_mm:
 	kfree(mm);
-err_free:
-	kfree(context);
 err:
-	return ERR_PTR(ret);
+	return ret;
 }
 
 static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -209,7 +190,7 @@
 	return ret;
 }
 
-static int c4iw_deallocate_pd(struct ib_pd *pd)
+static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
@@ -221,15 +202,12 @@
 	mutex_lock(&rhp->rdev.stats.lock);
 	rhp->rdev.stats.pd.cur--;
 	mutex_unlock(&rhp->rdev.stats.lock);
-	kfree(php);
-	return 0;
 }
 
-static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
-				      struct ib_ucontext *context,
-				      struct ib_udata *udata)
+static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
 {
-	struct c4iw_pd *php;
+	struct c4iw_pd *php = to_c4iw_pd(pd);
+	struct ib_device *ibdev = pd->device;
 	u32 pdid;
 	struct c4iw_dev *rhp;
 
@@ -237,20 +215,16 @@
 	rhp = (struct c4iw_dev *) ibdev;
 	pdid =  c4iw_get_resource(&rhp->rdev.resource.pdid_table);
 	if (!pdid)
-		return ERR_PTR(-EINVAL);
-	php = kzalloc(sizeof(*php), GFP_KERNEL);
-	if (!php) {
-		c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
-		return ERR_PTR(-ENOMEM);
-	}
+		return -EINVAL;
+
 	php->pdid = pdid;
 	php->rhp = rhp;
-	if (context) {
+	if (udata) {
 		struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid};
 
 		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
-			c4iw_deallocate_pd(&php->ibpd);
-			return ERR_PTR(-EFAULT);
+			c4iw_deallocate_pd(&php->ibpd, udata);
+			return -EFAULT;
 		}
 	}
 	mutex_lock(&rhp->rdev.stats.lock);
@@ -259,7 +233,7 @@
 		rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
 	mutex_unlock(&rhp->rdev.stats.lock);
 	pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php);
-	return &php->ibpd;
+	return 0;
 }
 
 static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -297,7 +271,6 @@
 		return -EINVAL;
 
 	dev = to_c4iw_dev(ibdev);
-	memset(props, 0, sizeof *props);
 	memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
 	props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
 	props->fw_ver = dev->rdev.lldi.fw_vers;
@@ -332,32 +305,8 @@
 static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 			   struct ib_port_attr *props)
 {
-	struct c4iw_dev *dev;
-	struct net_device *netdev;
-	struct in_device *inetdev;
-
 	pr_debug("ibdev %p\n", ibdev);
 
-	dev = to_c4iw_dev(ibdev);
-	netdev = dev->rdev.lldi.ports[port-1];
-	/* props being zeroed by the caller, avoid zeroing it here */
-	props->max_mtu = IB_MTU_4096;
-	props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
-	if (!netif_carrier_ok(netdev))
-		props->state = IB_PORT_DOWN;
-	else {
-		inetdev = in_dev_get(netdev);
-		if (inetdev) {
-			if (inetdev->ifa_list)
-				props->state = IB_PORT_ACTIVE;
-			else
-				props->state = IB_PORT_INIT;
-			in_dev_put(inetdev);
-		} else
-			props->state = IB_PORT_INIT;
-	}
-
 	props->port_cap_flags =
 	    IB_PORT_CM_SUP |
 	    IB_PORT_SNMP_TUNNEL_SUP |
@@ -373,21 +322,23 @@
 	return 0;
 }
 
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
 	pr_debug("dev 0x%p\n", dev);
 	return sprintf(buf, "%d\n",
 		       CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
 }
+static DEVICE_ATTR_RO(hw_rev);
 
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static ssize_t hca_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
 	struct ethtool_drvinfo info;
 	struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
 
@@ -395,16 +346,19 @@
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
 	return sprintf(buf, "%s\n", info.driver);
 }
+static DEVICE_ATTR_RO(hca_type);
 
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
 	pr_debug("dev 0x%p\n", dev);
 	return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
 		       c4iw_dev->rdev.lldi.pdev->device);
 }
+static DEVICE_ATTR_RO(board_id);
 
 enum counters {
 	IP4INSEGS,
@@ -461,14 +415,15 @@
 	return stats->num_counters;
 }
 
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *c4iw_class_attributes[] = {
+	&dev_attr_hw_rev.attr,
+	&dev_attr_hca_type.attr,
+	&dev_attr_board_id.attr,
+	NULL
+};
 
-static struct device_attribute *c4iw_class_attributes[] = {
-	&dev_attr_hw_rev,
-	&dev_attr_hca_type,
-	&dev_attr_board_id,
+static const struct attribute_group c4iw_attr_group = {
+	.attrs = c4iw_class_attributes,
 };
 
 static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -502,24 +457,6 @@
 		 FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
 }
 
-static struct net_device *get_netdev(struct ib_device *dev, u8 port)
-{
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev);
-	struct c4iw_rdev *rdev = &c4iw_dev->rdev;
-	struct net_device *ndev;
-
-	if (!port || port > rdev->lldi.nports)
-		return NULL;
-
-	rcu_read_lock();
-	ndev = rdev->lldi.ports[port - 1];
-	if (ndev)
-		dev_hold(ndev);
-	rcu_read_unlock();
-
-	return ndev;
-}
-
 static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
 {
 	return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) &&
@@ -527,18 +464,83 @@
 		c4iw_restrack_funcs[res->type](msg, res) : 0;
 }
 
-void c4iw_register_device(struct work_struct *work)
+static const struct ib_device_ops c4iw_dev_ops = {
+	.owner = THIS_MODULE,
+	.driver_id = RDMA_DRIVER_CXGB4,
+	.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION,
+
+	.alloc_hw_stats = c4iw_alloc_stats,
+	.alloc_mr = c4iw_alloc_mr,
+	.alloc_mw = c4iw_alloc_mw,
+	.alloc_pd = c4iw_allocate_pd,
+	.alloc_ucontext = c4iw_alloc_ucontext,
+	.create_cq = c4iw_create_cq,
+	.create_qp = c4iw_create_qp,
+	.create_srq = c4iw_create_srq,
+	.dealloc_mw = c4iw_dealloc_mw,
+	.dealloc_pd = c4iw_deallocate_pd,
+	.dealloc_ucontext = c4iw_dealloc_ucontext,
+	.dereg_mr = c4iw_dereg_mr,
+	.destroy_cq = c4iw_destroy_cq,
+	.destroy_qp = c4iw_destroy_qp,
+	.destroy_srq = c4iw_destroy_srq,
+	.fill_res_entry = fill_res_entry,
+	.get_dev_fw_str = get_dev_fw_str,
+	.get_dma_mr = c4iw_get_dma_mr,
+	.get_hw_stats = c4iw_get_mib,
+	.get_port_immutable = c4iw_port_immutable,
+	.iw_accept = c4iw_accept_cr,
+	.iw_add_ref = c4iw_qp_add_ref,
+	.iw_connect = c4iw_connect,
+	.iw_create_listen = c4iw_create_listen,
+	.iw_destroy_listen = c4iw_destroy_listen,
+	.iw_get_qp = c4iw_get_qp,
+	.iw_reject = c4iw_reject_cr,
+	.iw_rem_ref = c4iw_qp_rem_ref,
+	.map_mr_sg = c4iw_map_mr_sg,
+	.mmap = c4iw_mmap,
+	.modify_qp = c4iw_ib_modify_qp,
+	.modify_srq = c4iw_modify_srq,
+	.poll_cq = c4iw_poll_cq,
+	.post_recv = c4iw_post_receive,
+	.post_send = c4iw_post_send,
+	.post_srq_recv = c4iw_post_srq_recv,
+	.query_device = c4iw_query_device,
+	.query_gid = c4iw_query_gid,
+	.query_pkey = c4iw_query_pkey,
+	.query_port = c4iw_query_port,
+	.query_qp = c4iw_ib_query_qp,
+	.reg_user_mr = c4iw_reg_user_mr,
+	.req_notify_cq = c4iw_arm_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+	INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
+};
+
+static int set_netdevs(struct ib_device *ib_dev, struct c4iw_rdev *rdev)
 {
 	int ret;
 	int i;
+
+	for (i = 0; i < rdev->lldi.nports; i++) {
+		ret = ib_device_set_netdev(ib_dev, rdev->lldi.ports[i],
+					   i + 1);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+void c4iw_register_device(struct work_struct *work)
+{
+	int ret;
 	struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
 	struct c4iw_dev *dev = ctx->dev;
 
 	pr_debug("c4iw_dev %p\n", dev);
-	strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
 	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
 	memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
-	dev->ibdev.owner = THIS_MODULE;
 	dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
 	if (fastreg_support)
 		dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -571,77 +573,20 @@
 	dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
 	dev->ibdev.num_comp_vectors =  dev->rdev.lldi.nciq;
 	dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
-	dev->ibdev.query_device = c4iw_query_device;
-	dev->ibdev.query_port = c4iw_query_port;
-	dev->ibdev.query_pkey = c4iw_query_pkey;
-	dev->ibdev.query_gid = c4iw_query_gid;
-	dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext;
-	dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext;
-	dev->ibdev.mmap = c4iw_mmap;
-	dev->ibdev.alloc_pd = c4iw_allocate_pd;
-	dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
-	dev->ibdev.create_qp = c4iw_create_qp;
-	dev->ibdev.modify_qp = c4iw_ib_modify_qp;
-	dev->ibdev.query_qp = c4iw_ib_query_qp;
-	dev->ibdev.destroy_qp = c4iw_destroy_qp;
-	dev->ibdev.create_srq = c4iw_create_srq;
-	dev->ibdev.modify_srq = c4iw_modify_srq;
-	dev->ibdev.destroy_srq = c4iw_destroy_srq;
-	dev->ibdev.create_cq = c4iw_create_cq;
-	dev->ibdev.destroy_cq = c4iw_destroy_cq;
-	dev->ibdev.poll_cq = c4iw_poll_cq;
-	dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
-	dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
-	dev->ibdev.dereg_mr = c4iw_dereg_mr;
-	dev->ibdev.alloc_mw = c4iw_alloc_mw;
-	dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
-	dev->ibdev.alloc_mr = c4iw_alloc_mr;
-	dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
-	dev->ibdev.req_notify_cq = c4iw_arm_cq;
-	dev->ibdev.post_send = c4iw_post_send;
-	dev->ibdev.post_recv = c4iw_post_receive;
-	dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
-	dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
-	dev->ibdev.get_hw_stats = c4iw_get_mib;
-	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
-	dev->ibdev.get_port_immutable = c4iw_port_immutable;
-	dev->ibdev.get_dev_fw_str = get_dev_fw_str;
-	dev->ibdev.get_netdev = get_netdev;
 
-	dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
-	if (!dev->ibdev.iwcm) {
-		ret = -ENOMEM;
-		goto err_dealloc_ctx;
-	}
+	memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name,
+	       sizeof(dev->ibdev.iw_ifname));
 
-	dev->ibdev.iwcm->connect = c4iw_connect;
-	dev->ibdev.iwcm->accept = c4iw_accept_cr;
-	dev->ibdev.iwcm->reject = c4iw_reject_cr;
-	dev->ibdev.iwcm->create_listen = c4iw_create_listen;
-	dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen;
-	dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
-	dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
-	dev->ibdev.iwcm->get_qp = c4iw_get_qp;
-	dev->ibdev.res.fill_res_entry = fill_res_entry;
-	memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
-	       sizeof(dev->ibdev.iwcm->ifname));
-
-	dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
-	ret = ib_register_device(&dev->ibdev, NULL);
+	rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
+	ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
+	ret = set_netdevs(&dev->ibdev, &dev->rdev);
 	if (ret)
-		goto err_kfree_iwcm;
-
-	for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
-		ret = device_create_file(&dev->ibdev.dev,
-					 c4iw_class_attributes[i]);
-		if (ret)
-			goto err_unregister_device;
-	}
+		goto err_dealloc_ctx;
+	ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
+	if (ret)
+		goto err_dealloc_ctx;
 	return;
-err_unregister_device:
-	ib_unregister_device(&dev->ibdev);
-err_kfree_iwcm:
-	kfree(dev->ibdev.iwcm);
+
 err_dealloc_ctx:
 	pr_err("%s - Failed registering iwarp device: %d\n",
 	       pci_name(ctx->lldi.pdev), ret);
@@ -651,13 +596,7 @@
 
 void c4iw_unregister_device(struct c4iw_dev *dev)
 {
-	int i;
-
 	pr_debug("c4iw_dev %p\n", dev);
-	for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
-		device_remove_file(&dev->ibdev.dev,
-				   c4iw_class_attributes[i]);
 	ib_unregister_device(&dev->ibdev);
-	kfree(dev->ibdev.iwcm);
 	return;
 }
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 347fe18..bbcac53 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/module.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "iw_cxgb4.h"
 
@@ -56,18 +57,18 @@
 
 static int max_fr_immd = T4_MAX_FR_IMMD;
 module_param(max_fr_immd, int, 0644);
-MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
+MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate");
 
 static int alloc_ird(struct c4iw_dev *dev, u32 ird)
 {
 	int ret = 0;
 
-	spin_lock_irq(&dev->lock);
+	xa_lock_irq(&dev->qps);
 	if (ird <= dev->avail_ird)
 		dev->avail_ird -= ird;
 	else
 		ret = -ENOMEM;
-	spin_unlock_irq(&dev->lock);
+	xa_unlock_irq(&dev->qps);
 
 	if (ret)
 		dev_warn(&dev->rdev.lldi.pdev->dev,
@@ -78,9 +79,9 @@
 
 static void free_ird(struct c4iw_dev *dev, int ird)
 {
-	spin_lock_irq(&dev->lock);
+	xa_lock_irq(&dev->qps);
 	dev->avail_ird += ird;
-	spin_unlock_irq(&dev->lock);
+	xa_unlock_irq(&dev->qps);
 }
 
 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
@@ -99,7 +100,7 @@
 static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
 {
 	dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
-			  pci_unmap_addr(sq, mapping));
+			  dma_unmap_addr(sq, mapping));
 }
 
 static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
@@ -132,7 +133,7 @@
 	if (!sq->queue)
 		return -ENOMEM;
 	sq->phys_addr = virt_to_phys(sq->queue);
-	pci_unmap_addr_set(sq, mapping, sq->dma_addr);
+	dma_unmap_addr_set(sq, mapping, sq->dma_addr);
 	return 0;
 }
 
@@ -273,18 +274,18 @@
 			 (unsigned long long)virt_to_phys(wq->sq.queue),
 			 wq->rq.queue,
 			 (unsigned long long)virt_to_phys(wq->rq.queue));
-		memset(wq->rq.queue, 0, wq->rq.memsize);
 		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 	}
 
 	wq->db = rdev->lldi.db_reg;
 
-	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
+	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
+					 CXGB4_BAR2_QTYPE_EGRESS,
 					 &wq->sq.bar2_qid,
 					 user ? &wq->sq.bar2_pa : NULL);
 	if (need_rq)
 		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
-						 T4_BAR2_QTYPE_EGRESS,
+						 CXGB4_BAR2_QTYPE_EGRESS,
 						 &wq->rq.bar2_qid,
 						 user ? &wq->rq.bar2_pa : NULL);
 
@@ -301,7 +302,7 @@
 	wq->rq.msn = 1;
 
 	/* build fw_ri_res_wr */
-	wr_len = sizeof *res_wr + 2 * sizeof *res;
+	wr_len = sizeof(*res_wr) + 2 * sizeof(*res);
 	if (need_rq)
 		wr_len += sizeof(*res);
 	skb = alloc_skb(wr_len, GFP_KERNEL);
@@ -437,7 +438,7 @@
 			rem -= len;
 		}
 	}
-	len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+	len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp));
 	if (len)
 		memset(dstp, 0, len);
 	immdp->op = FW_RI_DATA_IMMD;
@@ -526,7 +527,7 @@
 					 T4_MAX_SEND_INLINE, &plen);
 			if (ret)
 				return ret;
-			size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+			size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) +
 			       plen;
 		} else {
 			ret = build_isgl((__be64 *)sq->queue,
@@ -535,7 +536,7 @@
 					 wr->sg_list, wr->num_sge, &plen);
 			if (ret)
 				return ret;
-			size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+			size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) +
 			       wr->num_sge * sizeof(struct fw_ri_sge);
 		}
 	} else {
@@ -543,7 +544,7 @@
 		wqe->send.u.immd_src[0].r1 = 0;
 		wqe->send.u.immd_src[0].r2 = 0;
 		wqe->send.u.immd_src[0].immdlen = 0;
-		size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+		size = sizeof(wqe->send) + sizeof(struct fw_ri_immd);
 		plen = 0;
 	}
 	*len16 = DIV_ROUND_UP(size, 16);
@@ -577,7 +578,7 @@
 					 T4_MAX_WRITE_INLINE, &plen);
 			if (ret)
 				return ret;
-			size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+			size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) +
 			       plen;
 		} else {
 			ret = build_isgl((__be64 *)sq->queue,
@@ -586,7 +587,7 @@
 					 wr->sg_list, wr->num_sge, &plen);
 			if (ret)
 				return ret;
-			size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+			size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) +
 			       wr->num_sge * sizeof(struct fw_ri_sge);
 		}
 	} else {
@@ -594,7 +595,7 @@
 		wqe->write.u.immd_src[0].r1 = 0;
 		wqe->write.u.immd_src[0].r2 = 0;
 		wqe->write.u.immd_src[0].immdlen = 0;
-		size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+		size = sizeof(wqe->write) + sizeof(struct fw_ri_immd);
 		plen = 0;
 	}
 	*len16 = DIV_ROUND_UP(size, 16);
@@ -631,7 +632,10 @@
 
 	wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
 	wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
-	wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
+	if (wr->next->opcode == IB_WR_SEND)
+		wcwr->stag_inv = 0;
+	else
+		wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
 	wcwr->r2 = 0;
 	wcwr->r3 = 0;
 
@@ -678,7 +682,7 @@
 	}
 	wqe->read.r2 = 0;
 	wqe->read.r5 = 0;
-	*len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+	*len16 = DIV_ROUND_UP(sizeof(wqe->read), 16);
 	return 0;
 }
 
@@ -725,7 +729,10 @@
 
 	/* SEND_WITH_INV swsqe */
 	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
-	swsqe->opcode = FW_RI_SEND_WITH_INV;
+	if (wr->next->opcode == IB_WR_SEND)
+		swsqe->opcode = FW_RI_SEND;
+	else
+		swsqe->opcode = FW_RI_SEND_WITH_INV;
 	swsqe->idx = qhp->wq.sq.pidx;
 	swsqe->complete = 0;
 	swsqe->signaled = send_signaled;
@@ -758,8 +765,8 @@
 			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
 	if (ret)
 		return ret;
-	*len16 = DIV_ROUND_UP(sizeof wqe->recv +
-			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
+	*len16 = DIV_ROUND_UP(
+		sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16);
 	return 0;
 }
 
@@ -878,49 +885,21 @@
 {
 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->inv.r2 = 0;
-	*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+	*len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16);
 	return 0;
 }
 
-static void free_qp_work(struct work_struct *work)
-{
-	struct c4iw_ucontext *ucontext;
-	struct c4iw_qp *qhp;
-	struct c4iw_dev *rhp;
-
-	qhp = container_of(work, struct c4iw_qp, free_work);
-	ucontext = qhp->ucontext;
-	rhp = qhp->rhp;
-
-	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
-	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
-
-	if (ucontext)
-		c4iw_put_ucontext(ucontext);
-	c4iw_put_wr_wait(qhp->wr_waitp);
-	kfree(qhp);
-}
-
-static void queue_qp_free(struct kref *kref)
-{
-	struct c4iw_qp *qhp;
-
-	qhp = container_of(kref, struct c4iw_qp, kref);
-	pr_debug("qhp %p\n", qhp);
-	queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
-}
-
 void c4iw_qp_add_ref(struct ib_qp *qp)
 {
 	pr_debug("ib_qp %p\n", qp);
-	kref_get(&to_c4iw_qp(qp)->kref);
+	refcount_inc(&to_c4iw_qp(qp)->qp_refcnt);
 }
 
 void c4iw_qp_rem_ref(struct ib_qp *qp)
 {
 	pr_debug("ib_qp %p\n", qp);
-	kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
+	if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt))
+		complete(&to_c4iw_qp(qp)->qp_rel_comp);
 }
 
 static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -933,7 +912,7 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&qhp->rhp->lock, flags);
+	xa_lock_irqsave(&qhp->rhp->qps, flags);
 	spin_lock(&qhp->lock);
 	if (qhp->rhp->db_state == NORMAL)
 		t4_ring_sq_db(&qhp->wq, inc, NULL);
@@ -942,7 +921,7 @@
 		qhp->wq.sq.wq_pidx_inc += inc;
 	}
 	spin_unlock(&qhp->lock);
-	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+	xa_unlock_irqrestore(&qhp->rhp->qps, flags);
 	return 0;
 }
 
@@ -950,7 +929,7 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&qhp->rhp->lock, flags);
+	xa_lock_irqsave(&qhp->rhp->qps, flags);
 	spin_lock(&qhp->lock);
 	if (qhp->rhp->db_state == NORMAL)
 		t4_ring_rq_db(&qhp->wq, inc, NULL);
@@ -959,7 +938,7 @@
 		qhp->wq.rq.wq_pidx_inc += inc;
 	}
 	spin_unlock(&qhp->lock);
-	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+	xa_unlock_irqrestore(&qhp->rhp->qps, flags);
 	return 0;
 }
 
@@ -1132,9 +1111,9 @@
 	/*
 	 * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
 	 * the response for small NVMEe-oF READ requests.  If the chain is
-	 * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
-	 * meet the requirements of the fw_ri_write_cmpl_wr work request,
-	 * then build and post the write_cmpl WR.  If any of the tests
+	 * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths
+	 * and lengths meet the requirements of the fw_ri_write_cmpl_wr work
+	 * request, then build and post the write_cmpl WR. If any of the tests
 	 * below are not true, then we continue on with the tradtional WRITE
 	 * and SEND WRs.
 	 */
@@ -1144,7 +1123,8 @@
 	    wr && wr->next && !wr->next->next &&
 	    wr->opcode == IB_WR_RDMA_WRITE &&
 	    wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
-	    wr->next->opcode == IB_WR_SEND_WITH_INV &&
+	    (wr->next->opcode == IB_WR_SEND ||
+	    wr->next->opcode == IB_WR_SEND_WITH_INV) &&
 	    wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
 	    wr->next->num_sge == 1 && num_wrs >= 2) {
 		post_write_cmpl(qhp, wr);
@@ -1599,7 +1579,7 @@
 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
 
 	wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
-	wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+	wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term));
 	term = (struct terminate_message *)wqe->u.terminate.termmsg;
 	if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
 		term->layer_etype = qhp->attr.layer_etype;
@@ -1744,16 +1724,15 @@
 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
 {
 	pr_debug("p2p_type = %d\n", p2p_type);
-	memset(&init->u, 0, sizeof init->u);
+	memset(&init->u, 0, sizeof(init->u));
 	switch (p2p_type) {
 	case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
 		init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
 		init->u.write.stag_sink = cpu_to_be32(1);
 		init->u.write.to_sink = cpu_to_be64(1);
 		init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
-		init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
-						   sizeof(struct fw_ri_immd),
-						   16);
+		init->u.write.len16 = DIV_ROUND_UP(
+			sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16);
 		break;
 	case FW_RI_INIT_P2PTYPE_READ_REQ:
 		init->u.write.opcode = FW_RI_RDMA_READ_WR;
@@ -1761,7 +1740,7 @@
 		init->u.read.to_src_lo = cpu_to_be32(1);
 		init->u.read.stag_sink = cpu_to_be32(1);
 		init->u.read.to_sink_lo = cpu_to_be32(1);
-		init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+		init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16);
 		break;
 	}
 }
@@ -1775,7 +1754,7 @@
 	pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
 		 qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
 
-	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+	skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
 	if (!skb) {
 		ret = -ENOMEM;
 		goto out;
@@ -1969,10 +1948,10 @@
 			qhp->attr.layer_etype = attrs->layer_etype;
 			qhp->attr.ecode = attrs->ecode;
 			ep = qhp->ep;
+			c4iw_get_ep(&ep->com);
+			disconnect = 1;
 			if (!internal) {
-				c4iw_get_ep(&qhp->ep->com);
 				terminate = 1;
-				disconnect = 1;
 			} else {
 				terminate = qhp->attr.send_term;
 				ret = rdma_fini(rhp, qhp, ep);
@@ -2088,14 +2067,16 @@
 	return ret;
 }
 
-int c4iw_destroy_qp(struct ib_qp *ib_qp)
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_qp *qhp;
+	struct c4iw_ucontext *ucontext;
 	struct c4iw_qp_attributes attrs;
 
 	qhp = to_c4iw_qp(ib_qp);
 	rhp = qhp->rhp;
+	ucontext = qhp->ucontext;
 
 	attrs.next_state = C4IW_QP_STATE_ERROR;
 	if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
@@ -2104,17 +2085,26 @@
 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
 	wait_event(qhp->wait, !qhp->ep);
 
-	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
-
-	spin_lock_irq(&rhp->lock);
+	xa_lock_irq(&rhp->qps);
+	__xa_erase(&rhp->qps, qhp->wq.sq.qid);
 	if (!list_empty(&qhp->db_fc_entry))
 		list_del_init(&qhp->db_fc_entry);
-	spin_unlock_irq(&rhp->lock);
+	xa_unlock_irq(&rhp->qps);
 	free_ird(rhp, qhp->attr.max_ird);
 
 	c4iw_qp_rem_ref(ib_qp);
 
+	wait_for_completion(&qhp->qp_rel_comp);
+
 	pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
+	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
+
+	destroy_qp(&rhp->rdev, &qhp->wq,
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
+
+	c4iw_put_wr_wait(qhp->wr_waitp);
+
+	kfree(qhp);
 	return 0;
 }
 
@@ -2128,7 +2118,8 @@
 	struct c4iw_cq *rchp;
 	struct c4iw_create_qp_resp uresp;
 	unsigned int sqsize, rqsize = 0;
-	struct c4iw_ucontext *ucontext;
+	struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct c4iw_ucontext, ibucontext);
 	int ret;
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
 	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
@@ -2162,8 +2153,6 @@
 	if (sqsize < 8)
 		sqsize = 8;
 
-	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
-
 	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
 	if (!qhp)
 		return ERR_PTR(-ENOMEM);
@@ -2225,10 +2214,10 @@
 	spin_lock_init(&qhp->lock);
 	mutex_init(&qhp->mutex);
 	init_waitqueue_head(&qhp->wait);
-	kref_init(&qhp->kref);
-	INIT_WORK(&qhp->free_work, free_qp_work);
+	init_completion(&qhp->qp_rel_comp);
+	refcount_set(&qhp->qp_refcnt, 1);
 
-	ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+	ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL);
 	if (ret)
 		goto err_destroy_qp;
 
@@ -2297,7 +2286,7 @@
 			ucontext->key += PAGE_SIZE;
 		}
 		spin_unlock(&ucontext->mmap_lock);
-		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 		if (ret)
 			goto err_free_ma_sync_key;
 		sq_key_mm->key = uresp.sq_key;
@@ -2330,7 +2319,6 @@
 			insert_mmap(ucontext, ma_sync_key_mm);
 		}
 
-		c4iw_get_ucontext(ucontext);
 		qhp->ucontext = ucontext;
 	}
 	if (!attrs->srq) {
@@ -2365,7 +2353,7 @@
 err_free_sq_key:
 	kfree(sq_key_mm);
 err_remove_handle:
-	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+	xa_erase_irq(&rhp->qps, qhp->wq.sq.qid);
 err_destroy_qp:
 	destroy_qp(&rhp->rdev, &qhp->wq,
 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
@@ -2382,7 +2370,7 @@
 	struct c4iw_dev *rhp;
 	struct c4iw_qp *qhp;
 	enum c4iw_qp_attr_mask mask = 0;
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {};
 
 	pr_debug("ib_qp %p\n", ibqp);
 
@@ -2394,7 +2382,6 @@
 	if (!attr_mask)
 		return 0;
 
-	memset(&attrs, 0, sizeof attrs);
 	qhp = to_c4iw_qp(ibqp);
 	rhp = qhp->rhp;
 
@@ -2478,8 +2465,8 @@
 {
 	struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
 
-	memset(attr, 0, sizeof *attr);
-	memset(init_attr, 0, sizeof *init_attr);
+	memset(attr, 0, sizeof(*attr));
+	memset(init_attr, 0, sizeof(*init_attr));
 	attr->qp_state = to_ib_qp_state(qhp->attr.state);
 	init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
 	init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
@@ -2521,7 +2508,7 @@
 
 	dma_free_coherent(&rdev->lldi.pdev->dev,
 			  wq->memsize, wq->queue,
-			pci_unmap_addr(wq, mapping));
+			dma_unmap_addr(wq, mapping));
 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
 	kfree(wq->sw_rq);
 	c4iw_put_qpid(rdev, wq->qid, uctx);
@@ -2563,16 +2550,14 @@
 	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
 		T4_RQT_ENTRY_SHIFT;
 
-	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
-				       wq->memsize, &wq->dma_addr,
-			GFP_KERNEL);
+	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, wq->memsize,
+				       &wq->dma_addr, GFP_KERNEL);
 	if (!wq->queue)
 		goto err_free_rqtpool;
 
-	memset(wq->queue, 0, wq->memsize);
-	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+	dma_unmap_addr_set(wq, mapping, wq->dma_addr);
 
-	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
 				      &wq->bar2_qid,
 			user ? &wq->bar2_pa : NULL);
 
@@ -2590,7 +2575,7 @@
 	/* build fw_ri_res_wr */
 	wr_len = sizeof(*res_wr) + sizeof(*res);
 
-	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb)
 		goto err_free_queue;
 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
@@ -2649,7 +2634,7 @@
 err_free_queue:
 	dma_free_coherent(&rdev->lldi.pdev->dev,
 			  wq->memsize, wq->queue,
-			pci_unmap_addr(wq, mapping));
+			dma_unmap_addr(wq, mapping));
 err_free_rqtpool:
 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
 err_free_pending_wrs:
@@ -2681,11 +2666,12 @@
 	}
 }
 
-struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
+int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
 			       struct ib_udata *udata)
 {
+	struct ib_pd *pd = ib_srq->pd;
 	struct c4iw_dev *rhp;
-	struct c4iw_srq *srq;
+	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
 	struct c4iw_pd *php;
 	struct c4iw_create_srq_resp uresp;
 	struct c4iw_ucontext *ucontext;
@@ -2700,11 +2686,11 @@
 	rhp = php->rhp;
 
 	if (!rhp->rdev.lldi.vr->srq.size)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
 	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
-		return ERR_PTR(-E2BIG);
+		return -E2BIG;
 
 	/*
 	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
@@ -2712,17 +2698,12 @@
 	rqsize = attrs->attr.max_wr + 1;
 	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
 
-	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
-
-	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq)
-		return ERR_PTR(-ENOMEM);
+	ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+					     ibucontext);
 
 	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
-	if (!srq->wr_waitp) {
-		ret = -ENOMEM;
-		goto err_free_srq;
-	}
+	if (!srq->wr_waitp)
+		return -ENOMEM;
 
 	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
 	if (srq->idx < 0) {
@@ -2756,15 +2737,11 @@
 	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
 		srq->flags = T4_SRQ_LIMIT_SUPPORT;
 
-	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
-	if (ret)
-		goto err_free_queue;
-
 	if (udata) {
 		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
 		if (!srq_key_mm) {
 			ret = -ENOMEM;
-			goto err_remove_handle;
+			goto err_free_queue;
 		}
 		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
 		if (!srq_db_key_mm) {
@@ -2802,29 +2779,25 @@
 			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
 
 	spin_lock_init(&srq->lock);
-	return &srq->ibsrq;
+	return 0;
+
 err_free_srq_db_key_mm:
 	kfree(srq_db_key_mm);
 err_free_srq_key_mm:
 	kfree(srq_key_mm);
-err_remove_handle:
-	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
 err_free_queue:
 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
 		       srq->wr_waitp);
 err_free_skb:
-	if (srq->destroy_skb)
-		kfree_skb(srq->destroy_skb);
+	kfree_skb(srq->destroy_skb);
 err_free_srq_idx:
 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
 err_free_wr_wait:
 	c4iw_put_wr_wait(srq->wr_waitp);
-err_free_srq:
-	kfree(srq);
-	return ERR_PTR(ret);
+	return ret;
 }
 
-int c4iw_destroy_srq(struct ib_srq *ibsrq)
+void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_srq *srq;
@@ -2834,14 +2807,10 @@
 	rhp = srq->rhp;
 
 	pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
-	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
-	ucontext = ibsrq->uobject ?
-		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
+	ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+					     ibucontext);
 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
 		       srq->wr_waitp);
 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
 	c4iw_put_wr_wait(srq->wr_waitp);
-	kfree(srq);
-	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 57ed26b..5c95c78 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -126,7 +126,7 @@
 		rdev->stats.qid.cur += rdev->qpmask + 1;
 		mutex_unlock(&rdev->stats.lock);
 		for (i = qid+1; i & rdev->qpmask; i++) {
-			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 			if (!entry)
 				goto out;
 			entry->qid = i;
@@ -137,13 +137,13 @@
 		 * now put the same ids on the qp list since they all
 		 * map to the same db/gts page.
 		 */
-		entry = kmalloc(sizeof *entry, GFP_KERNEL);
+		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 		if (!entry)
 			goto out;
 		entry->qid = qid;
 		list_add_tail(&entry->entry, &uctx->qpids);
 		for (i = qid+1; i & rdev->qpmask; i++) {
-			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 			if (!entry)
 				goto out;
 			entry->qid = i;
@@ -165,7 +165,7 @@
 {
 	struct c4iw_qid_list *entry;
 
-	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return;
 	pr_debug("qid 0x%x\n", qid);
@@ -200,7 +200,7 @@
 		rdev->stats.qid.cur += rdev->qpmask + 1;
 		mutex_unlock(&rdev->stats.lock);
 		for (i = qid+1; i & rdev->qpmask; i++) {
-			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 			if (!entry)
 				goto out;
 			entry->qid = i;
@@ -211,13 +211,13 @@
 		 * now put the same ids on the cq list since they all
 		 * map to the same db/gts page.
 		 */
-		entry = kmalloc(sizeof *entry, GFP_KERNEL);
+		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 		if (!entry)
 			goto out;
 		entry->qid = qid;
 		list_add_tail(&entry->entry, &uctx->cqids);
 		for (i = qid; i & rdev->qpmask; i++) {
-			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 			if (!entry)
 				goto out;
 			entry->qid = i;
@@ -239,7 +239,7 @@
 {
 	struct c4iw_qid_list *entry;
 
-	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return;
 	pr_debug("qid 0x%x\n", qid);
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index 9a7520e..f82d46e 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -149,7 +149,7 @@
 	if (qhp->ucontext)
 		return 0;
 
-	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
 	if (!table_attr)
 		goto err;
 
@@ -216,7 +216,7 @@
 	if (!uep)
 		return 0;
 
-	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
 	if (!table_attr)
 		goto err_free_uep;
 
@@ -387,7 +387,7 @@
 	if (ibcq->uobject)
 		return 0;
 
-	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
 	if (!table_attr)
 		goto err;
 
@@ -447,7 +447,7 @@
 	if (!stag)
 		return 0;
 
-	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
 	if (!table_attr)
 		goto err;
 
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e42021f..b170817 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -35,6 +35,7 @@
 #include "t4_regs.h"
 #include "t4_values.h"
 #include "t4_msg.h"
+#include "t4_tcb.h"
 #include "t4fw_ri_api.h"
 
 #define T4_MAX_NUM_PD 65536
@@ -397,7 +398,7 @@
 struct t4_srq {
 	union t4_recv_wr *queue;
 	dma_addr_t dma_addr;
-	DECLARE_PCI_UNMAP_ADDR(mapping);
+	DEFINE_DMA_UNMAP_ADDR(mapping);
 	struct t4_swrqe *sw_rq;
 	void __iomem *bar2_va;
 	u64 bar2_pa;