Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 4f385ec..fe2fc9e 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -36,6 +36,8 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
 
 struct ib_addr {
 	union {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908..2734c89 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
 #include <net/ip.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
-#include <net/ipv6.h>
 #include <net/net_namespace.h>
 
 /**
@@ -95,20 +94,18 @@
  * @timeout_ms: Amount of time to wait for the address resolution to complete.
  * @callback: Call invoked once address resolution has completed, timed out,
  *   or been canceled.  A status of 0 indicates success.
+ * @resolve_by_gid_attr:	Resolve the ip based on the GID attribute from
+ *				rdma_dev_addr.
  * @context: User-specified context associated with the call.
  */
 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
-		    struct rdma_dev_addr *addr, int timeout_ms,
+		    struct rdma_dev_addr *addr, unsigned long timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
-		    void *context);
+		    bool resolve_by_gid_attr, void *context);
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
-		    const struct net_device *dev,
-		    const unsigned char *dst_dev_addr);
-
 int rdma_addr_size(const struct sockaddr *addr);
 int rdma_addr_size_in6(struct sockaddr_in6 *addr);
 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 62e990b..870b5e6 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -54,6 +54,10 @@
 		       void *),
 	void *context);
 
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
+			    u16 *vlan_id, u8 *smac);
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
 /**
  * ib_get_cached_pkey - Returns a cached PKey table entry
  * @device: The device to query.
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5..49f4f75 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@
 	struct sa_path_rec	*path;
 	const struct ib_gid_attr *sgid_attr;
 	__be64			service_id;
-	int			timeout_ms;
+	unsigned long		timeout_ms;
 	const void		*private_data;
 	u8			private_data_len;
 	u8			max_cm_retries;
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
index f62b842..f8982e4 100644
--- a/include/rdma/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -88,6 +88,6 @@
 					 int                 list_len,
 					 u64                 io_virtual_address);
 
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
 
 #endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 6e35416..9a90bd0 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -100,6 +100,8 @@
 	__be64 compare_data; /* potentially unaligned */
 } __packed;
 
+#include <rdma/tid_rdma_defs.h>
+
 union ib_ehdrs {
 	struct {
 		__be32 deth[2];
@@ -117,6 +119,16 @@
 	__be32 aeth;
 	__be32 ieth;
 	struct ib_atomic_eth atomic_eth;
+	/* TID RDMA headers */
+	union {
+		struct tid_rdma_read_req r_req;
+		struct tid_rdma_read_resp r_rsp;
+		struct tid_rdma_write_req w_req;
+		struct tid_rdma_write_resp w_rsp;
+		struct tid_rdma_write_data w_data;
+		struct tid_rdma_resync resync;
+		struct tid_rdma_ack ack;
+	} tid_rdma;
 }  __packed;
 
 struct ib_other_headers {
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index f6ba366..eea946f 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -198,7 +198,7 @@
 	__be16			attr_offset;
 	__be16			reserved;
 	ib_sa_comp_mask		comp_mask;
-} __attribute__ ((packed));
+} __packed;
 
 struct ib_mad {
 	struct ib_mad_hdr	mad_hdr;
@@ -227,7 +227,7 @@
 	struct ib_rmpp_hdr	rmpp_hdr;
 	struct ib_sa_hdr	sa_hdr;
 	u8			data[IB_MGMT_SA_DATA];
-} __attribute__ ((packed));
+} __packed;
 
 struct ib_vendor_mad {
 	struct ib_mad_hdr	mad_hdr;
@@ -277,6 +277,7 @@
 	IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
 	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
 	IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_PORT_CAP_MASK2_SUP = 1 << 15,
 	IB_PORT_CM_SUP = 1 << 16,
 	IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
 	IB_PORT_REINIT_SUP = 1 << 18,
@@ -295,6 +296,15 @@
 	IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
 };
 
+enum ib_port_capability_mask2_bits {
+	IB_PORT_SET_NODE_DESC_SUP		= 1 << 0,
+	IB_PORT_EX_PORT_INFO_EX_SUP		= 1 << 1,
+	IB_PORT_VIRT_SUP			= 1 << 2,
+	IB_PORT_SWITCH_PORT_STATE_TABLE_SUP	= 1 << 3,
+	IB_PORT_LINK_WIDTH_2X_SUP		= 1 << 4,
+	IB_PORT_LINK_SPEED_HDR_SUP		= 1 << 5,
+};
+
 #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
 
 struct opa_class_port_info {
@@ -606,12 +616,11 @@
 	void			*context;
 	u32			hi_tid;
 	u32			flags;
+	void			*security;
+	struct list_head	mad_agent_sec_list;
 	u8			port_num;
 	u8			rmpp_version;
-	void			*security;
 	bool			smp_allowed;
-	bool			lsm_nb_reg;
-	struct notifier_block   lsm_nb;
 };
 
 /**
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a..1952097 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_sa_client *client,
-		       struct ib_device *device, u8 port_num,
-		       struct sa_path_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct sa_path_rec *resp,
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+		       u8 port_num, struct sa_path_rec *rec,
+		       ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+		       gfp_t gfp_mask,
+		       void (*callback)(int status, struct sa_path_rec *resp,
 					void *context),
-		       void *context,
-		       struct ib_sa_query **query);
+		       void *context, struct ib_sa_query **query);
 
 int ib_sa_service_rec_query(struct ib_sa_client *client,
-			 struct ib_device *device, u8 port_num,
-			 u8 method,
-			 struct ib_sa_service_rec *rec,
-			 ib_sa_comp_mask comp_mask,
-			 int timeout_ms, gfp_t gfp_mask,
-			 void (*callback)(int status,
-					  struct ib_sa_service_rec *resp,
-					  void *context),
-			 void *context,
-			 struct ib_sa_query **sa_query);
+			    struct ib_device *device, u8 port_num, u8 method,
+			    struct ib_sa_service_rec *rec,
+			    ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+			    gfp_t gfp_mask,
+			    void (*callback)(int status,
+					     struct ib_sa_service_rec *resp,
+					     void *context),
+			    void *context, struct ib_sa_query **sa_query);
 
 struct ib_sa_multicast {
 	struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@
 			      struct ib_device *device, u8 port_num,
 			      struct ib_sa_guidinfo_rec *rec,
 			      ib_sa_comp_mask comp_mask, u8 method,
-			      int timeout_ms, gfp_t gfp_mask,
+			      unsigned long timeout_ms, gfp_t gfp_mask,
 			      void (*callback)(int status,
 					       struct ib_sa_guidinfo_rec *resp,
 					       void *context),
-			      void *context,
-			      struct ib_sa_query **sa_query);
+			      void *context, struct ib_sa_query **sa_query);
 
 bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
 				    struct ib_device *device,
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index b439e98..7be0028 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -61,7 +61,7 @@
 	u8	data[IB_SMP_DATA_SIZE];
 	u8	initial_path[IB_SMP_MAX_PATH_HOPS];
 	u8	return_path[IB_SMP_MAX_PATH_HOPS];
-} __attribute__ ((packed));
+} __packed;
 
 #define IB_SMP_DIRECTION			cpu_to_be16(0x8000)
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd638..a91b2af 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,65 +36,57 @@
 #include <linux/list.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
 
 struct ib_ucontext;
 struct ib_umem_odp;
 
 struct ib_umem {
-	struct ib_ucontext     *context;
+	struct ib_device       *ibdev;
+	struct mm_struct       *owning_mm;
 	size_t			length;
 	unsigned long		address;
-	int			page_shift;
-	int                     writable;
-	int                     hugetlb;
+	u32 writable : 1;
+	u32 is_odp : 1;
 	struct work_struct	work;
-	struct mm_struct       *mm;
-	unsigned long		diff;
-	struct ib_umem_odp     *odp_data;
 	struct sg_table sg_head;
 	int             nmap;
-	int             npages;
+	unsigned int    sg_nents;
 };
 
 /* Returns the offset of the umem start relative to the first page. */
 static inline int ib_umem_offset(struct ib_umem *umem)
 {
-	return umem->address & (BIT(umem->page_shift) - 1);
-}
-
-/* Returns the first page of an ODP umem. */
-static inline unsigned long ib_umem_start(struct ib_umem *umem)
-{
-	return umem->address - ib_umem_offset(umem);
-}
-
-/* Returns the address of the page after the last one of an ODP umem. */
-static inline unsigned long ib_umem_end(struct ib_umem *umem)
-{
-	return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
+	return umem->address & ~PAGE_MASK;
 }
 
 static inline size_t ib_umem_num_pages(struct ib_umem *umem)
 {
-	return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
+	return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
+		ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
+	       PAGE_SHIFT;
 }
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 			    size_t size, int access, int dmasync);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		      size_t length);
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+				     unsigned long pgsz_bitmap,
+				     unsigned long virt);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
 #include <linux/err.h>
 
-static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+static inline struct ib_umem *ib_umem_get(struct ib_udata *udata,
 					  unsigned long addr, size_t size,
-					  int access, int dmasync) {
+					  int access, int dmasync)
+{
 	return ERR_PTR(-EINVAL);
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
@@ -103,6 +95,12 @@
 		      		    size_t length) {
 	return -EINVAL;
 }
+static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
+					 unsigned long pgsz_bitmap,
+					 unsigned long virt) {
+	return -EINVAL;
+}
+
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5..253df1a 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -37,12 +37,10 @@
 #include <rdma/ib_verbs.h>
 #include <linux/interval_tree.h>
 
-struct umem_odp_node {
-	u64 __subtree_last;
-	struct rb_node rb;
-};
-
 struct ib_umem_odp {
+	struct ib_umem umem;
+	struct ib_ucontext_per_mm *per_mm;
+
 	/*
 	 * An array of the pages included in the on-demand paging umem.
 	 * Indices of pages that are currently not mapped into the device will
@@ -64,33 +62,49 @@
 	struct mutex		umem_mutex;
 	void			*private; /* for the HW driver to use. */
 
-	/* When false, use the notifier counter in the ucontext struct. */
-	bool mn_counters_active;
 	int notifiers_seq;
 	int notifiers_count;
-
-	/* A linked list of umems that don't have private mmu notifier
-	 * counters yet. */
-	struct list_head no_private_counters;
-	struct ib_umem		*umem;
+	int npages;
 
 	/* Tree tracking */
-	struct umem_odp_node	interval_tree;
+	struct interval_tree_node interval_tree;
+
+	/*
+	 * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+	 * only as an anchor for the driver to hold onto the per_mm. FIXME:
+	 * This should be removed and drivers should work with the per_mm
+	 * directly.
+	 */
+	bool is_implicit_odp;
 
 	struct completion	notifier_completion;
 	int			dying;
+	unsigned int		page_shift;
 	struct work_struct	work;
 };
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+	return container_of(umem, struct ib_umem_odp, umem);
+}
 
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
-		    int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
-				  unsigned long addr,
-				  size_t size);
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
+{
+	return umem_odp->interval_tree.start;
+}
 
-void ib_umem_odp_release(struct ib_umem *umem);
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
+{
+	return umem_odp->interval_tree.last + 1;
+}
+
+static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
+{
+	return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >>
+	       umem_odp->page_shift;
+}
 
 /*
  * The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +119,33 @@
 
 #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
 
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
-			      u64 access_mask, unsigned long current_seq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+struct ib_ucontext_per_mm {
+	struct mmu_notifier mn;
+	struct pid *tgid;
+
+	struct rb_root_cached umem_tree;
+	/* Protects umem_tree */
+	struct rw_semaphore umem_rwsem;
+};
+
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+				    size_t size, int access);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+					       int access);
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
+					    unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
+
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+			      u64 bcnt, u64 access_mask,
+			      unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
 				 u64 bound);
 
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
 			      void *cookie);
 /*
  * Call the callback on each ib_umem in the range. Returns the logical or of
@@ -126,49 +160,45 @@
  * Find first region intersecting with address range.
  * Return NULL if not found
  */
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-				       u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+	struct interval_tree_node *node;
 
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+	node = interval_tree_iter_first(root, addr, addr + length - 1);
+	if (!node)
+		return NULL;
+	return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
 					     unsigned long mmu_seq)
 {
 	/*
 	 * This code is strongly based on the KVM code from
 	 * mmu_notifier_retry. Should be called with
-	 * the relevant locks taken (item->odp_data->umem_mutex
+	 * the relevant locks taken (umem_odp->umem_mutex
 	 * and the ucontext umem_mutex semaphore locked for read).
 	 */
 
-	/* Do not allow page faults while the new ib_umem hasn't seen a state
-	 * with zero notifiers yet, and doesn't have its own valid set of
-	 * private counters. */
-	if (!item->odp_data->mn_counters_active)
+	if (unlikely(umem_odp->notifiers_count))
 		return 1;
-
-	if (unlikely(item->odp_data->notifiers_count))
-		return 1;
-	if (item->odp_data->notifiers_seq != mmu_seq)
+	if (umem_odp->notifiers_seq != mmu_seq)
 		return 1;
 	return 0;
 }
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
-static inline int ib_umem_odp_get(struct ib_ucontext *context,
-				  struct ib_umem *umem,
-				  int access)
-{
-	return -EINVAL;
-}
-
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
-						unsigned long addr,
-						size_t size)
+static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
+						  unsigned long addr,
+						  size_t size, int access)
 {
 	return ERR_PTR(-EINVAL);
 }
 
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ec299fc..e7e733a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -41,14 +41,11 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
-#include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/rwsem.h>
-#include <linux/scatterlist.h>
 #include <linux/workqueue.h>
-#include <linux/socket.h>
 #include <linux/irq_poll.h>
 #include <uapi/linux/if_ether.h>
 #include <net/ipv6.h>
@@ -56,21 +53,98 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
-
+#include <linux/refcount.h>
 #include <linux/if_link.h>
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <linux/uaccess.h>
 #include <linux/cgroup_rdma.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/dim.h>
 #include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
 #include <rdma/restrack.h>
+#include <rdma/signature.h>
 #include <uapi/rdma/rdma_user_ioctl.h>
 #include <uapi/rdma/ib_user_ioctl_verbs.h>
 
 #define IB_FW_VERSION_NAME_MAX	ETHTOOL_FWVERS_LEN
 
+struct ib_umem_odp;
+
 extern struct workqueue_struct *ib_wq;
 extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
+
+__printf(3, 4) __cold
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
+		  const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define ibdev_dbg(__dev, format, args...)                       \
+	dynamic_ibdev_dbg(__dev, format, ##args)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...)           \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	if (__ratelimit(&_rs))                                          \
+		ibdev_level(ibdev, fmt, ##__VA_ARGS__);                 \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...)                          \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs))      \
+		__dynamic_ibdev_dbg(&descriptor, ibdev, fmt,            \
+				    ##__VA_ARGS__);                     \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
 
 union ib_gid {
 	u8	raw[16];
@@ -92,7 +166,7 @@
 
 #define ROCE_V2_UDP_DPORT      4791
 struct ib_gid_attr {
-	struct net_device	*ndev;
+	struct net_device __rcu	*ndev;
 	struct ib_device	*device;
 	union ib_gid		gid;
 	enum ib_gid_type	gid_type;
@@ -100,16 +174,6 @@
 	u8			port_num;
 };
 
-enum rdma_node_type {
-	/* IB values map to NodeInfo:NodeType. */
-	RDMA_NODE_IB_CA 	= 1,
-	RDMA_NODE_IB_SWITCH,
-	RDMA_NODE_IB_ROUTER,
-	RDMA_NODE_RNIC,
-	RDMA_NODE_USNIC,
-	RDMA_NODE_USNIC_UDP,
-};
-
 enum {
 	/* set the local administered indication */
 	IB_SA_WELL_KNOWN_GUID	= BIT_ULL(57) | 2,
@@ -119,7 +183,8 @@
 	RDMA_TRANSPORT_IB,
 	RDMA_TRANSPORT_IWARP,
 	RDMA_TRANSPORT_USNIC,
-	RDMA_TRANSPORT_USNIC_UDP
+	RDMA_TRANSPORT_USNIC_UDP,
+	RDMA_TRANSPORT_UNSPECIFIED,
 };
 
 enum rdma_protocol_type {
@@ -130,7 +195,7 @@
 };
 
 __attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type);
+rdma_node_get_transport(unsigned int node_type);
 
 enum rdma_network_type {
 	RDMA_NETWORK_IB,
@@ -229,7 +294,7 @@
 	 */
 	IB_DEVICE_CROSS_CHANNEL			= (1 << 27),
 	IB_DEVICE_MANAGED_FLOW_STEERING		= (1 << 29),
-	IB_DEVICE_SIGNATURE_HANDOVER		= (1 << 30),
+	IB_DEVICE_INTEGRITY_HANDOVER		= (1 << 30),
 	IB_DEVICE_ON_DEMAND_PAGING		= (1ULL << 31),
 	IB_DEVICE_SG_GAPS_REG			= (1ULL << 32),
 	IB_DEVICE_VIRTUAL_FUNCTION		= (1ULL << 33),
@@ -238,17 +303,7 @@
 	IB_DEVICE_RDMA_NETDEV_OPA_VNIC		= (1ULL << 35),
 	/* The device supports padding incoming writes to cacheline. */
 	IB_DEVICE_PCI_WRITE_END_PADDING		= (1ULL << 36),
-};
-
-enum ib_signature_prot_cap {
-	IB_PROT_T10DIF_TYPE_1 = 1,
-	IB_PROT_T10DIF_TYPE_2 = 1 << 1,
-	IB_PROT_T10DIF_TYPE_3 = 1 << 2,
-};
-
-enum ib_signature_guard_cap {
-	IB_GUARD_T10DIF_CRC	= 1,
-	IB_GUARD_T10DIF_CSUM	= 1 << 1,
+	IB_DEVICE_ALLOW_USER_UNREG		= (1ULL << 37),
 };
 
 enum ib_atomic_cap {
@@ -268,6 +323,7 @@
 	IB_ODP_SUPPORT_WRITE	= 1 << 2,
 	IB_ODP_SUPPORT_READ	= 1 << 3,
 	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+	IB_ODP_SUPPORT_SRQ_RECV	= 1 << 5,
 };
 
 struct ib_odp_caps {
@@ -276,6 +332,7 @@
 		uint32_t  rc_odp_caps;
 		uint32_t  uc_odp_caps;
 		uint32_t  ud_odp_caps;
+		uint32_t  xrc_odp_caps;
 	} per_transport_caps;
 };
 
@@ -290,8 +347,8 @@
 };
 
 enum ib_tm_cap_flags {
-	/*  Support tag matching on RC transport */
-	IB_TM_CAP_RC		    = 1 << 0,
+	/*  Support tag matching with rendezvous offload for RC transport */
+	IB_TM_CAP_RNDV_RC = 1 << 0,
 };
 
 struct ib_tm_caps {
@@ -309,7 +366,7 @@
 
 struct ib_cq_init_attr {
 	unsigned int	cqe;
-	int		comp_vector;
+	u32		comp_vector;
 	u32		flags;
 };
 
@@ -374,6 +431,7 @@
 	int			max_srq_wr;
 	int			max_srq_sge;
 	unsigned int		max_fast_reg_page_list_len;
+	unsigned int		max_pi_fast_reg_page_list_len;
 	u16			max_pkeys;
 	u8			local_ca_ack_delay;
 	int			sig_prot_cap;
@@ -432,8 +490,19 @@
 	IB_PORT_ACTIVE_DEFER	= 5
 };
 
+enum ib_port_phys_state {
+	IB_PORT_PHYS_STATE_SLEEP = 1,
+	IB_PORT_PHYS_STATE_POLLING = 2,
+	IB_PORT_PHYS_STATE_DISABLED = 3,
+	IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+	IB_PORT_PHYS_STATE_LINK_UP = 5,
+	IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+	IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
 enum ib_port_width {
 	IB_WIDTH_1X	= 1,
+	IB_WIDTH_2X	= 16,
 	IB_WIDTH_4X	= 2,
 	IB_WIDTH_8X	= 4,
 	IB_WIDTH_12X	= 8
@@ -443,6 +512,7 @@
 {
 	switch (width) {
 	case IB_WIDTH_1X:  return  1;
+	case IB_WIDTH_2X:  return  2;
 	case IB_WIDTH_4X:  return  4;
 	case IB_WIDTH_8X:  return  8;
 	case IB_WIDTH_12X: return 12;
@@ -592,6 +662,7 @@
 	u8			active_width;
 	u8			active_speed;
 	u8                      phys_state;
+	u16			port_cap_flags2;
 };
 
 enum ib_device_modify_flags {
@@ -729,7 +800,11 @@
 	IB_RATE_25_GBPS  = 15,
 	IB_RATE_100_GBPS = 16,
 	IB_RATE_200_GBPS = 17,
-	IB_RATE_300_GBPS = 18
+	IB_RATE_300_GBPS = 18,
+	IB_RATE_28_GBPS  = 19,
+	IB_RATE_50_GBPS  = 20,
+	IB_RATE_400_GBPS = 21,
+	IB_RATE_600_GBPS = 22,
 };
 
 /**
@@ -752,118 +827,26 @@
  * enum ib_mr_type - memory region type
  * @IB_MR_TYPE_MEM_REG:       memory region that is used for
  *                            normal registration
- * @IB_MR_TYPE_SIGNATURE:     memory region that is used for
- *                            signature operations (data-integrity
- *                            capable regions)
  * @IB_MR_TYPE_SG_GAPS:       memory region that is capable to
  *                            register any arbitrary sg lists (without
  *                            the normal mr constraints - see
  *                            ib_map_mr_sg)
+ * @IB_MR_TYPE_DM:            memory region that is used for device
+ *                            memory registration
+ * @IB_MR_TYPE_USER:          memory region that is used for the user-space
+ *                            application
+ * @IB_MR_TYPE_DMA:           memory region that is used for DMA operations
+ *                            without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY:     memory region that is used for
+ *                            data integrity operations
  */
 enum ib_mr_type {
 	IB_MR_TYPE_MEM_REG,
-	IB_MR_TYPE_SIGNATURE,
 	IB_MR_TYPE_SG_GAPS,
-};
-
-/**
- * Signature types
- * IB_SIG_TYPE_NONE: Unprotected.
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
- */
-enum ib_signature_type {
-	IB_SIG_TYPE_NONE,
-	IB_SIG_TYPE_T10_DIF,
-};
-
-/**
- * Signature T10-DIF block-guard types
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
- */
-enum ib_t10_dif_bg_type {
-	IB_T10DIF_CRC,
-	IB_T10DIF_CSUM
-};
-
-/**
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
- *     domain.
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
- * @pi_interval: protection information interval.
- * @bg: seed of guard computation.
- * @app_tag: application tag of guard block
- * @ref_tag: initial guard block reference tag.
- * @ref_remap: Indicate wethear the reftag increments each block
- * @app_escape: Indicate to skip block check if apptag=0xffff
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
- * @apptag_check_mask: check bitmask of application tag.
- */
-struct ib_t10_dif_domain {
-	enum ib_t10_dif_bg_type bg_type;
-	u16			pi_interval;
-	u16			bg;
-	u16			app_tag;
-	u32			ref_tag;
-	bool			ref_remap;
-	bool			app_escape;
-	bool			ref_escape;
-	u16			apptag_check_mask;
-};
-
-/**
- * struct ib_sig_domain - Parameters for signature domain
- * @sig_type: specific signauture type
- * @sig: union of all signature domain attributes that may
- *     be used to set domain layout.
- */
-struct ib_sig_domain {
-	enum ib_signature_type sig_type;
-	union {
-		struct ib_t10_dif_domain dif;
-	} sig;
-};
-
-/**
- * struct ib_sig_attrs - Parameters for signature handover operation
- * @check_mask: bitmask for signature byte check (8 bytes)
- * @mem: memory domain layout desciptor.
- * @wire: wire domain layout desciptor.
- */
-struct ib_sig_attrs {
-	u8			check_mask;
-	struct ib_sig_domain	mem;
-	struct ib_sig_domain	wire;
-};
-
-enum ib_sig_err_type {
-	IB_SIG_BAD_GUARD,
-	IB_SIG_BAD_REFTAG,
-	IB_SIG_BAD_APPTAG,
-};
-
-/**
- * Signature check masks (8 bytes in total) according to the T10-PI standard:
- *  -------- -------- ------------
- * | GUARD  | APPTAG |   REFTAG   |
- * |  2B    |  2B    |    4B      |
- *  -------- -------- ------------
- */
-enum {
-	IB_SIG_CHECK_GUARD	= 0xc0,
-	IB_SIG_CHECK_APPTAG	= 0x30,
-	IB_SIG_CHECK_REFTAG	= 0x0f,
-};
-
-/**
- * struct ib_sig_err - signature error descriptor
- */
-struct ib_sig_err {
-	enum ib_sig_err_type	err_type;
-	u32			expected;
-	u32			actual;
-	u64			sig_err_offset;
-	u32			key;
+	IB_MR_TYPE_DM,
+	IB_MR_TYPE_USER,
+	IB_MR_TYPE_DMA,
+	IB_MR_TYPE_INTEGRITY,
 };
 
 enum ib_mr_status_check {
@@ -1120,7 +1103,7 @@
 	IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
 	IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
 	IB_QP_CREATE_NETIF_QP			= 1 << 5,
-	IB_QP_CREATE_SIGNATURE_EN		= 1 << 6,
+	IB_QP_CREATE_INTEGRITY_EN		= 1 << 6,
 	/* FREE					= 1 << 7, */
 	IB_QP_CREATE_SCATTER_FCS		= 1 << 8,
 	IB_QP_CREATE_CVLAN_STRIPPING		= 1 << 9,
@@ -1137,7 +1120,9 @@
  */
 
 struct ib_qp_init_attr {
+	/* Consumer's event_handler callback must not block */
 	void                  (*event_handler)(struct ib_event *, void *);
+
 	void		       *qp_context;
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
@@ -1146,7 +1131,7 @@
 	struct ib_qp_cap	cap;
 	enum ib_sig_type	sq_sig_type;
 	enum ib_qp_type		qp_type;
-	enum ib_qp_create_flags	create_flags;
+	u32			create_flags;
 
 	/*
 	 * Only needed for special QP types, or when using the RW API.
@@ -1297,7 +1282,7 @@
 
 	/* These are kernel only and can not be issued by userspace */
 	IB_WR_REG_MR = 0x20,
-	IB_WR_REG_SIG_MR,
+	IB_WR_REG_MR_INTEGRITY,
 
 	/* reserve values for low level drivers' internal use.
 	 * These values will not be used at all in the ib core layer.
@@ -1407,20 +1392,6 @@
 	return container_of(wr, struct ib_reg_wr, wr);
 }
 
-struct ib_sig_handover_wr {
-	struct ib_send_wr	wr;
-	struct ib_sig_attrs    *sig_attrs;
-	struct ib_mr	       *sig_mr;
-	int			access_flags;
-	struct ib_sge	       *prot;
-};
-
-static inline const struct ib_sig_handover_wr *
-sig_handover_wr(const struct ib_send_wr *wr)
-{
-	return container_of(wr, struct ib_sig_handover_wr, wr);
-}
-
 struct ib_recv_wr {
 	struct ib_recv_wr      *next;
 	union {
@@ -1491,29 +1462,15 @@
 	 * it is set when we are closing the file descriptor and indicates
 	 * that mm_sem may be locked.
 	 */
-	int			closing;
+	bool closing;
 
 	bool cleanup_retryable;
 
-	struct pid             *tgid;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	struct rb_root_cached   umem_tree;
-	/*
-	 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
-	 * mmu notifiers registration.
-	 */
-	struct rw_semaphore	umem_rwsem;
-	void (*invalidate_range)(struct ib_umem *umem,
-				 unsigned long start, unsigned long end);
-
-	struct mmu_notifier	mn;
-	atomic_t		notifier_count;
-	/* A list of umems that don't have private mmu notifier counters yet. */
-	struct list_head	no_private_counters;
-	int                     odp_mrs_count;
-#endif
-
 	struct ib_rdmacg_object	cg_obj;
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry res;
 };
 
 struct ib_uobject {
@@ -1576,9 +1533,10 @@
 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
 
 enum ib_poll_context {
-	IB_POLL_DIRECT,		/* caller context, no hw completions */
-	IB_POLL_SOFTIRQ,	/* poll from softirq context */
-	IB_POLL_WORKQUEUE,	/* poll from workqueue */
+	IB_POLL_DIRECT,		   /* caller context, no hw completions */
+	IB_POLL_SOFTIRQ,	   /* poll from softirq context */
+	IB_POLL_WORKQUEUE,	   /* poll from workqueue */
+	IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
 };
 
 struct ib_cq {
@@ -1595,6 +1553,8 @@
 		struct irq_poll		iop;
 		struct work_struct	work;
 	};
+	struct workqueue_struct *comp_wq;
+	struct dim *dim;
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
@@ -1779,10 +1739,14 @@
 	struct ib_qp_security  *qp_sec;
 	u8			port;
 
+	bool			integrity_en;
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
 	struct rdma_restrack_entry     res;
+
+	/* The counter the qp is bind to */
+	struct rdma_counter    *counter;
 };
 
 struct ib_dm {
@@ -1801,6 +1765,7 @@
 	u64		   iova;
 	u64		   length;
 	unsigned int	   page_size;
+	enum ib_mr_type	   type;
 	bool		   need_inval;
 	union {
 		struct ib_uobject	*uobject;	/* user */
@@ -1808,7 +1773,7 @@
 	};
 
 	struct ib_dm      *dm;
-
+	struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
 	 */
@@ -2182,11 +2147,8 @@
 struct ib_cache {
 	rwlock_t                lock;
 	struct ib_event_handler event_handler;
-	struct ib_port_cache   *ports;
 };
 
-struct iw_cm_verbs;
-
 struct ib_port_immutable {
 	int                           pkey_tbl_len;
 	int                           gid_tbl_len;
@@ -2194,6 +2156,23 @@
 	u32                           max_mad_size;
 };
 
+struct ib_port_data {
+	struct ib_device *ib_dev;
+
+	struct ib_port_immutable immutable;
+
+	spinlock_t pkey_list_lock;
+	struct list_head pkey_list;
+
+	struct ib_port_cache cache;
+
+	spinlock_t netdev_lock;
+	struct net_device __rcu *netdev;
+	struct hlist_node ndev_hash_link;
+	struct rdma_port_counter port_counter;
+	struct rdma_hw_stats *hw_stats;
+};
+
 /* rdma netdev type - specifies protocol type */
 enum rdma_netdev_t {
 	RDMA_NETDEV_OPA_VNIC,
@@ -2229,10 +2208,14 @@
 			    union ib_gid *gid, u16 mlid);
 };
 
-struct ib_port_pkey_list {
-	/* Lock to hold while modifying the list. */
-	spinlock_t                    list_lock;
-	struct list_head              pkey_list;
+struct rdma_netdev_alloc_params {
+	size_t sizeof_priv;
+	unsigned int txqs;
+	unsigned int rxqs;
+	void *param;
+
+	int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+				      struct net_device *netdev, void *param);
 };
 
 struct ib_counters {
@@ -2249,33 +2232,253 @@
 };
 
 struct uverbs_attr_bundle;
+struct iw_cm_id;
+struct iw_cm_conn_param;
 
-struct ib_device {
-	/* Do not access @dma_device directly from ULP nor from HW drivers. */
-	struct device                *dma_device;
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member)                      \
+	.size_##ib_struct =                                                    \
+		(sizeof(struct drv_struct) +                                   \
+		 BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) +      \
+		 BUILD_BUG_ON_ZERO(                                            \
+			 !__same_type(((struct drv_struct *)NULL)->member,     \
+				      struct ib_struct)))
 
-	char                          name[IB_DEVICE_NAME_MAX];
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                         \
+	((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
 
-	struct list_head              event_handler_list;
-	spinlock_t                    event_handler_lock;
+#define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
+	rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
 
-	spinlock_t                    client_data_lock;
-	struct list_head              core_list;
-	/* Access to the client_data_list is protected by the client_data_lock
-	 * spinlock and the lists_rwsem read-write semaphore */
-	struct list_head              client_data_list;
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
 
-	struct ib_cache               cache;
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+	struct module *owner;
+	enum rdma_driver_id driver_id;
+	u32 uverbs_abi_ver;
+	unsigned int uverbs_no_driver_id_binding:1;
+
+	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+			 const struct ib_send_wr **bad_send_wr);
+	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+			 const struct ib_recv_wr **bad_recv_wr);
+	void (*drain_rq)(struct ib_qp *qp);
+	void (*drain_sq)(struct ib_qp *qp);
+	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+	int (*post_srq_recv)(struct ib_srq *srq,
+			     const struct ib_recv_wr *recv_wr,
+			     const struct ib_recv_wr **bad_recv_wr);
+	int (*process_mad)(struct ib_device *device, int process_mad_flags,
+			   u8 port_num, const struct ib_wc *in_wc,
+			   const struct ib_grh *in_grh,
+			   const struct ib_mad_hdr *in_mad, size_t in_mad_size,
+			   struct ib_mad_hdr *out_mad, size_t *out_mad_size,
+			   u16 *out_mad_pkey_index);
+	int (*query_device)(struct ib_device *device,
+			    struct ib_device_attr *device_attr,
+			    struct ib_udata *udata);
+	int (*modify_device)(struct ib_device *device, int device_modify_mask,
+			     struct ib_device_modify *device_modify);
+	void (*get_dev_fw_str)(struct ib_device *device, char *str);
+	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+						     int comp_vector);
+	int (*query_port)(struct ib_device *device, u8 port_num,
+			  struct ib_port_attr *port_attr);
+	int (*modify_port)(struct ib_device *device, u8 port_num,
+			   int port_modify_mask,
+			   struct ib_port_modify *port_modify);
 	/**
-	 * port_immutable is indexed by port number
+	 * The following mandatory functions are used only at device
+	 * registration.  Keep functions such as these at the end of this
+	 * structure to avoid cache line misses when accessing struct ib_device
+	 * in fast paths.
 	 */
-	struct ib_port_immutable     *port_immutable;
+	int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+				  struct ib_port_immutable *immutable);
+	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+					       u8 port_num);
+	/**
+	 * When calling get_netdev, the HW vendor's driver should return the
+	 * net device of device @device at port @port_num or NULL if such
+	 * a net device doesn't exist. The vendor driver should call dev_hold
+	 * on this net device. The HW vendor's device driver must guarantee
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
+	 */
+	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+	/**
+	 * rdma netdev operation
+	 *
+	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+	 * must return -EOPNOTSUPP if it doesn't support the specified type.
+	 */
+	struct net_device *(*alloc_rdma_netdev)(
+		struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+		const char *name, unsigned char name_assign_type,
+		void (*setup)(struct net_device *));
 
-	int			      num_comp_vectors;
-
-	struct ib_port_pkey_list     *port_pkey_list;
-
-	struct iw_cm_verbs	     *iwcm;
+	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+				      enum rdma_netdev_t type,
+				      struct rdma_netdev_alloc_params *params);
+	/**
+	 * query_gid should be return GID value for @device, when @port_num
+	 * link layer is either IB or iWarp. It is no-op if @port_num port
+	 * is RoCE link layer.
+	 */
+	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+			 union ib_gid *gid);
+	/**
+	 * When calling add_gid, the HW vendor's driver should add the gid
+	 * of device of port at gid index available at @attr. Meta-info of
+	 * that gid (for example, the network device related to this gid) is
+	 * available at @attr. @context allows the HW vendor driver to store
+	 * extra information together with a GID entry. The HW vendor driver may
+	 * allocate memory to contain this information and store it in @context
+	 * when a new GID entry is written to. Params are consistent until the
+	 * next call of add_gid or delete_gid. The function should return 0 on
+	 * success or error otherwise. The function could be called
+	 * concurrently for different ports. This function is only called when
+	 * roce_gid_table is used.
+	 */
+	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+	/**
+	 * When calling del_gid, the HW vendor's driver should delete the
+	 * gid of device @device at gid index gid_index of port port_num
+	 * available in @attr.
+	 * Upon the deletion of a GID entry, the HW vendor must free any
+	 * allocated memory. The caller will clear @context afterwards.
+	 * This function is only called when roce_gid_table is used.
+	 */
+	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+			  u16 *pkey);
+	int (*alloc_ucontext)(struct ib_ucontext *context,
+			      struct ib_udata *udata);
+	void (*dealloc_ucontext)(struct ib_ucontext *context);
+	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+	void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+	int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
+			 u32 flags, struct ib_udata *udata);
+	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+	int (*create_srq)(struct ib_srq *srq,
+			  struct ib_srq_init_attr *srq_init_attr,
+			  struct ib_udata *udata);
+	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+			  enum ib_srq_attr_mask srq_attr_mask,
+			  struct ib_udata *udata);
+	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+	void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+	struct ib_qp *(*create_qp)(struct ib_pd *pd,
+				   struct ib_qp_init_attr *qp_init_attr,
+				   struct ib_udata *udata);
+	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_udata *udata);
+	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+	int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
+	int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
+			 struct ib_udata *udata);
+	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+	void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+				     u64 virt_addr, int mr_access_flags,
+				     struct ib_udata *udata);
+	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+			     u64 virt_addr, int mr_access_flags,
+			     struct ib_pd *pd, struct ib_udata *udata);
+	int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
+	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+				  u32 max_num_sg, struct ib_udata *udata);
+	struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
+					    u32 max_num_data_sg,
+					    u32 max_num_meta_sg);
+	int (*advise_mr)(struct ib_pd *pd,
+			 enum ib_uverbs_advise_mr_advice advice, u32 flags,
+			 struct ib_sge *sg_list, u32 num_sge,
+			 struct uverbs_attr_bundle *attrs);
+	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+			 unsigned int *sg_offset);
+	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+			       struct ib_mr_status *mr_status);
+	struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
+				  struct ib_udata *udata);
+	int (*dealloc_mw)(struct ib_mw *mw);
+	struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
+				    struct ib_fmr_attr *fmr_attr);
+	int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
+			    u64 iova);
+	int (*unmap_fmr)(struct list_head *fmr_list);
+	int (*dealloc_fmr)(struct ib_fmr *fmr);
+	void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+				 unsigned long start, unsigned long end);
+	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
+				      struct ib_udata *udata);
+	int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+	struct ib_flow *(*create_flow)(struct ib_qp *qp,
+				       struct ib_flow_attr *flow_attr,
+				       int domain, struct ib_udata *udata);
+	int (*destroy_flow)(struct ib_flow *flow_id);
+	struct ib_flow_action *(*create_flow_action_esp)(
+		struct ib_device *device,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*destroy_flow_action)(struct ib_flow_action *action);
+	int (*modify_flow_action_esp)(
+		struct ib_flow_action *action,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+				 int state);
+	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+			     struct ifla_vf_info *ivf);
+	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+			    struct ifla_vf_stats *stats);
+	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+			   int type);
+	struct ib_wq *(*create_wq)(struct ib_pd *pd,
+				   struct ib_wq_init_attr *init_attr,
+				   struct ib_udata *udata);
+	void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+			 u32 wq_attr_mask, struct ib_udata *udata);
+	struct ib_rwq_ind_table *(*create_rwq_ind_table)(
+		struct ib_device *device,
+		struct ib_rwq_ind_table_init_attr *init_attr,
+		struct ib_udata *udata);
+	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+	struct ib_dm *(*alloc_dm)(struct ib_device *device,
+				  struct ib_ucontext *context,
+				  struct ib_dm_alloc_attr *attr,
+				  struct uverbs_attr_bundle *attrs);
+	int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+				   struct ib_dm_mr_attr *attr,
+				   struct uverbs_attr_bundle *attrs);
+	struct ib_counters *(*create_counters)(
+		struct ib_device *device, struct uverbs_attr_bundle *attrs);
+	int (*destroy_counters)(struct ib_counters *counters);
+	int (*read_counters)(struct ib_counters *counters,
+			     struct ib_counters_read_attr *counters_read_attr,
+			     struct uverbs_attr_bundle *attrs);
+	int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
+			    int data_sg_nents, unsigned int *data_sg_offset,
+			    struct scatterlist *meta_sg, int meta_sg_nents,
+			    unsigned int *meta_sg_offset);
 
 	/**
 	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
@@ -2283,8 +2486,8 @@
 	 *   core when the device is removed.  A lifespan of -1 in the return
 	 *   struct tells the core to set a default lifespan.
 	 */
-	struct rdma_hw_stats      *(*alloc_hw_stats)(struct ib_device *device,
-						     u8 port_num);
+	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+						u8 port_num);
 	/**
 	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
 	 * @index - The index in the value array we wish to have updated, or
@@ -2297,261 +2500,121 @@
 	 * Drivers are allowed to update all counters in leiu of just the
 	 *   one given in index at their option
 	 */
-	int		           (*get_hw_stats)(struct ib_device *device,
-						   struct rdma_hw_stats *stats,
-						   u8 port, int index);
-	int		           (*query_device)(struct ib_device *device,
-						   struct ib_device_attr *device_attr,
-						   struct ib_udata *udata);
-	int		           (*query_port)(struct ib_device *device,
-						 u8 port_num,
-						 struct ib_port_attr *port_attr);
-	enum rdma_link_layer	   (*get_link_layer)(struct ib_device *device,
-						     u8 port_num);
-	/* When calling get_netdev, the HW vendor's driver should return the
-	 * net device of device @device at port @port_num or NULL if such
-	 * a net device doesn't exist. The vendor driver should call dev_hold
-	 * on this net device. The HW vendor's device driver must guarantee
-	 * that this function returns NULL before the net device has finished
-	 * NETDEV_UNREGISTER state.
+	int (*get_hw_stats)(struct ib_device *device,
+			    struct rdma_hw_stats *stats, u8 port, int index);
+	/*
+	 * This function is called once for each port when a ib device is
+	 * registered.
 	 */
-	struct net_device	  *(*get_netdev)(struct ib_device *device,
-						 u8 port_num);
-	/* query_gid should be return GID value for @device, when @port_num
-	 * link layer is either IB or iWarp. It is no-op if @port_num port
-	 * is RoCE link layer.
-	 */
-	int		           (*query_gid)(struct ib_device *device,
-						u8 port_num, int index,
-						union ib_gid *gid);
-	/* When calling add_gid, the HW vendor's driver should add the gid
-	 * of device of port at gid index available at @attr. Meta-info of
-	 * that gid (for example, the network device related to this gid) is
-	 * available at @attr. @context allows the HW vendor driver to store
-	 * extra information together with a GID entry. The HW vendor driver may
-	 * allocate memory to contain this information and store it in @context
-	 * when a new GID entry is written to. Params are consistent until the
-	 * next call of add_gid or delete_gid. The function should return 0 on
-	 * success or error otherwise. The function could be called
-	 * concurrently for different ports. This function is only called when
-	 * roce_gid_table is used.
-	 */
-	int		           (*add_gid)(const struct ib_gid_attr *attr,
-					      void **context);
-	/* When calling del_gid, the HW vendor's driver should delete the
-	 * gid of device @device at gid index gid_index of port port_num
-	 * available in @attr.
-	 * Upon the deletion of a GID entry, the HW vendor must free any
-	 * allocated memory. The caller will clear @context afterwards.
-	 * This function is only called when roce_gid_table is used.
-	 */
-	int		           (*del_gid)(const struct ib_gid_attr *attr,
-					      void **context);
-	int		           (*query_pkey)(struct ib_device *device,
-						 u8 port_num, u16 index, u16 *pkey);
-	int		           (*modify_device)(struct ib_device *device,
-						    int device_modify_mask,
-						    struct ib_device_modify *device_modify);
-	int		           (*modify_port)(struct ib_device *device,
-						  u8 port_num, int port_modify_mask,
-						  struct ib_port_modify *port_modify);
-	struct ib_ucontext *       (*alloc_ucontext)(struct ib_device *device,
-						     struct ib_udata *udata);
-	int                        (*dealloc_ucontext)(struct ib_ucontext *context);
-	int                        (*mmap)(struct ib_ucontext *context,
-					   struct vm_area_struct *vma);
-	struct ib_pd *             (*alloc_pd)(struct ib_device *device,
-					       struct ib_ucontext *context,
-					       struct ib_udata *udata);
-	int                        (*dealloc_pd)(struct ib_pd *pd);
-	struct ib_ah *             (*create_ah)(struct ib_pd *pd,
-						struct rdma_ah_attr *ah_attr,
-						struct ib_udata *udata);
-	int                        (*modify_ah)(struct ib_ah *ah,
-						struct rdma_ah_attr *ah_attr);
-	int                        (*query_ah)(struct ib_ah *ah,
-					       struct rdma_ah_attr *ah_attr);
-	int                        (*destroy_ah)(struct ib_ah *ah);
-	struct ib_srq *            (*create_srq)(struct ib_pd *pd,
-						 struct ib_srq_init_attr *srq_init_attr,
-						 struct ib_udata *udata);
-	int                        (*modify_srq)(struct ib_srq *srq,
-						 struct ib_srq_attr *srq_attr,
-						 enum ib_srq_attr_mask srq_attr_mask,
-						 struct ib_udata *udata);
-	int                        (*query_srq)(struct ib_srq *srq,
-						struct ib_srq_attr *srq_attr);
-	int                        (*destroy_srq)(struct ib_srq *srq);
-	int                        (*post_srq_recv)(struct ib_srq *srq,
-						    const struct ib_recv_wr *recv_wr,
-						    const struct ib_recv_wr **bad_recv_wr);
-	struct ib_qp *             (*create_qp)(struct ib_pd *pd,
-						struct ib_qp_init_attr *qp_init_attr,
-						struct ib_udata *udata);
-	int                        (*modify_qp)(struct ib_qp *qp,
-						struct ib_qp_attr *qp_attr,
-						int qp_attr_mask,
-						struct ib_udata *udata);
-	int                        (*query_qp)(struct ib_qp *qp,
-					       struct ib_qp_attr *qp_attr,
-					       int qp_attr_mask,
-					       struct ib_qp_init_attr *qp_init_attr);
-	int                        (*destroy_qp)(struct ib_qp *qp);
-	int                        (*post_send)(struct ib_qp *qp,
-						const struct ib_send_wr *send_wr,
-						const struct ib_send_wr **bad_send_wr);
-	int                        (*post_recv)(struct ib_qp *qp,
-						const struct ib_recv_wr *recv_wr,
-						const struct ib_recv_wr **bad_recv_wr);
-	struct ib_cq *             (*create_cq)(struct ib_device *device,
-						const struct ib_cq_init_attr *attr,
-						struct ib_ucontext *context,
-						struct ib_udata *udata);
-	int                        (*modify_cq)(struct ib_cq *cq, u16 cq_count,
-						u16 cq_period);
-	int                        (*destroy_cq)(struct ib_cq *cq);
-	int                        (*resize_cq)(struct ib_cq *cq, int cqe,
-						struct ib_udata *udata);
-	int                        (*poll_cq)(struct ib_cq *cq, int num_entries,
-					      struct ib_wc *wc);
-	int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt);
-	int                        (*req_notify_cq)(struct ib_cq *cq,
-						    enum ib_cq_notify_flags flags);
-	int                        (*req_ncomp_notif)(struct ib_cq *cq,
-						      int wc_cnt);
-	struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
-						 int mr_access_flags);
-	struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
-						  u64 start, u64 length,
-						  u64 virt_addr,
-						  int mr_access_flags,
-						  struct ib_udata *udata);
-	int			   (*rereg_user_mr)(struct ib_mr *mr,
-						    int flags,
-						    u64 start, u64 length,
-						    u64 virt_addr,
-						    int mr_access_flags,
-						    struct ib_pd *pd,
-						    struct ib_udata *udata);
-	int                        (*dereg_mr)(struct ib_mr *mr);
-	struct ib_mr *		   (*alloc_mr)(struct ib_pd *pd,
-					       enum ib_mr_type mr_type,
-					       u32 max_num_sg);
-	int                        (*map_mr_sg)(struct ib_mr *mr,
-						struct scatterlist *sg,
-						int sg_nents,
-						unsigned int *sg_offset);
-	struct ib_mw *             (*alloc_mw)(struct ib_pd *pd,
-					       enum ib_mw_type type,
-					       struct ib_udata *udata);
-	int                        (*dealloc_mw)(struct ib_mw *mw);
-	struct ib_fmr *	           (*alloc_fmr)(struct ib_pd *pd,
-						int mr_access_flags,
-						struct ib_fmr_attr *fmr_attr);
-	int		           (*map_phys_fmr)(struct ib_fmr *fmr,
-						   u64 *page_list, int list_len,
-						   u64 iova);
-	int		           (*unmap_fmr)(struct list_head *fmr_list);
-	int		           (*dealloc_fmr)(struct ib_fmr *fmr);
-	int                        (*attach_mcast)(struct ib_qp *qp,
-						   union ib_gid *gid,
-						   u16 lid);
-	int                        (*detach_mcast)(struct ib_qp *qp,
-						   union ib_gid *gid,
-						   u16 lid);
-	int                        (*process_mad)(struct ib_device *device,
-						  int process_mad_flags,
-						  u8 port_num,
-						  const struct ib_wc *in_wc,
-						  const struct ib_grh *in_grh,
-						  const struct ib_mad_hdr *in_mad,
-						  size_t in_mad_size,
-						  struct ib_mad_hdr *out_mad,
-						  size_t *out_mad_size,
-						  u16 *out_mad_pkey_index);
-	struct ib_xrcd *	   (*alloc_xrcd)(struct ib_device *device,
-						 struct ib_ucontext *ucontext,
-						 struct ib_udata *udata);
-	int			   (*dealloc_xrcd)(struct ib_xrcd *xrcd);
-	struct ib_flow *	   (*create_flow)(struct ib_qp *qp,
-						  struct ib_flow_attr
-						  *flow_attr,
-						  int domain,
-						  struct ib_udata *udata);
-	int			   (*destroy_flow)(struct ib_flow *flow_id);
-	int			   (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
-						      struct ib_mr_status *mr_status);
-	void			   (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
-	void			   (*drain_rq)(struct ib_qp *qp);
-	void			   (*drain_sq)(struct ib_qp *qp);
-	int			   (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
-							int state);
-	int			   (*get_vf_config)(struct ib_device *device, int vf, u8 port,
-						   struct ifla_vf_info *ivf);
-	int			   (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
-						   struct ifla_vf_stats *stats);
-	int			   (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
-						  int type);
-	struct ib_wq *		   (*create_wq)(struct ib_pd *pd,
-						struct ib_wq_init_attr *init_attr,
-						struct ib_udata *udata);
-	int			   (*destroy_wq)(struct ib_wq *wq);
-	int			   (*modify_wq)(struct ib_wq *wq,
-						struct ib_wq_attr *attr,
-						u32 wq_attr_mask,
-						struct ib_udata *udata);
-	struct ib_rwq_ind_table *  (*create_rwq_ind_table)(struct ib_device *device,
-							   struct ib_rwq_ind_table_init_attr *init_attr,
-							   struct ib_udata *udata);
-	int                        (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
-	struct ib_flow_action *	   (*create_flow_action_esp)(struct ib_device *device,
-							     const struct ib_flow_action_attrs_esp *attr,
-							     struct uverbs_attr_bundle *attrs);
-	int			   (*destroy_flow_action)(struct ib_flow_action *action);
-	int			   (*modify_flow_action_esp)(struct ib_flow_action *action,
-							     const struct ib_flow_action_attrs_esp *attr,
-							     struct uverbs_attr_bundle *attrs);
-	struct ib_dm *             (*alloc_dm)(struct ib_device *device,
-					       struct ib_ucontext *context,
-					       struct ib_dm_alloc_attr *attr,
-					       struct uverbs_attr_bundle *attrs);
-	int                        (*dealloc_dm)(struct ib_dm *dm);
-	struct ib_mr *             (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
-						struct ib_dm_mr_attr *attr,
-						struct uverbs_attr_bundle *attrs);
-	struct ib_counters *	(*create_counters)(struct ib_device *device,
-						   struct uverbs_attr_bundle *attrs);
-	int	(*destroy_counters)(struct ib_counters	*counters);
-	int	(*read_counters)(struct ib_counters *counters,
-				 struct ib_counters_read_attr *counters_read_attr,
-				 struct uverbs_attr_bundle *attrs);
-
+	int (*init_port)(struct ib_device *device, u8 port_num,
+			 struct kobject *port_sysfs);
 	/**
-	 * rdma netdev operation
-	 *
-	 * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
-	 * doesn't support the specified rdma netdev type.
+	 * Allows rdma drivers to add their own restrack attributes.
 	 */
-	struct net_device *(*alloc_rdma_netdev)(
-					struct ib_device *device,
-					u8 port_num,
-					enum rdma_netdev_t type,
-					const char *name,
-					unsigned char name_assign_type,
-					void (*setup)(struct net_device *));
+	int (*fill_res_entry)(struct sk_buff *msg,
+			      struct rdma_restrack_entry *entry);
 
-	struct module               *owner;
-	struct device                dev;
-	struct kobject               *ports_parent;
-	struct list_head             port_list;
+	/* Device lifecycle callbacks */
+	/*
+	 * Called after the device becomes registered, before clients are
+	 * attached
+	 */
+	int (*enable_driver)(struct ib_device *dev);
+	/*
+	 * This is called as part of ib_dealloc_device().
+	 */
+	void (*dealloc_driver)(struct ib_device *dev);
 
-	enum {
-		IB_DEV_UNINITIALIZED,
-		IB_DEV_REGISTERED,
-		IB_DEV_UNREGISTERED
-	}                            reg_state;
+	/* iWarp CM callbacks */
+	void (*iw_add_ref)(struct ib_qp *qp);
+	void (*iw_rem_ref)(struct ib_qp *qp);
+	struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
+	int (*iw_connect)(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *conn_param);
+	int (*iw_accept)(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *conn_param);
+	int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+	int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
+	int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+	/**
+	 * counter_bind_qp - Bind a QP to a counter.
+	 * @counter - The counter to be bound. If counter->id is zero then
+	 *   the driver needs to allocate a new counter and set counter->id
+	 */
+	int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+	/**
+	 * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+	 *   counter and bind it onto the default one
+	 */
+	int (*counter_unbind_qp)(struct ib_qp *qp);
+	/**
+	 * counter_dealloc -De-allocate the hw counter
+	 */
+	int (*counter_dealloc)(struct rdma_counter *counter);
+	/**
+	 * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
+	 * the driver initialized data.
+	 */
+	struct rdma_hw_stats *(*counter_alloc_stats)(
+		struct rdma_counter *counter);
+	/**
+	 * counter_update_stats - Query the stats value of this counter
+	 */
+	int (*counter_update_stats)(struct rdma_counter *counter);
 
-	int			     uverbs_abi_ver;
+	DECLARE_RDMA_OBJ_SIZE(ib_ah);
+	DECLARE_RDMA_OBJ_SIZE(ib_cq);
+	DECLARE_RDMA_OBJ_SIZE(ib_pd);
+	DECLARE_RDMA_OBJ_SIZE(ib_srq);
+	DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+};
+
+struct ib_core_device {
+	/* device must be the first element in structure until,
+	 * union of ib_core_device and device exists in ib_device.
+	 */
+	struct device dev;
+	possible_net_t rdma_net;
+	struct kobject *ports_kobj;
+	struct list_head port_list;
+	struct ib_device *owner; /* reach back to owner ib_device */
+};
+
+struct rdma_restrack_root;
+struct ib_device {
+	/* Do not access @dma_device directly from ULP nor from HW drivers. */
+	struct device                *dma_device;
+	struct ib_device_ops	     ops;
+	char                          name[IB_DEVICE_NAME_MAX];
+	struct rcu_head rcu_head;
+
+	struct list_head              event_handler_list;
+	spinlock_t                    event_handler_lock;
+
+	struct rw_semaphore	      client_data_rwsem;
+	struct xarray                 client_data;
+	struct mutex                  unregistration_lock;
+
+	struct ib_cache               cache;
+	/**
+	 * port_data is indexed by port number
+	 */
+	struct ib_port_data *port_data;
+
+	int			      num_comp_vectors;
+
+	union {
+		struct device		dev;
+		struct ib_core_device	coredev;
+	};
+
+	/* First group for device attributes,
+	 * Second group for driver provided attributes (optional).
+	 * It is NULL terminated array.
+	 */
+	const struct attribute_group	*groups[3];
+
 	u64			     uverbs_cmd_mask;
 	u64			     uverbs_ex_cmd_mask;
 
@@ -2559,6 +2622,10 @@
 	__be64			     node_guid;
 	u32			     local_dma_lkey;
 	u16                          is_switch:1;
+	/* Indicates kernel verbs support, should not be used in drivers */
+	u16                          kverbs_provider:1;
+	/* CQ adaptive moderation (RDMA DIM) */
+	u16                          use_cq_dim:1;
 	u8                           node_type;
 	u8                           phys_port_cnt;
 	struct ib_device_attr        attrs;
@@ -2570,30 +2637,39 @@
 #endif
 
 	u32                          index;
+	struct rdma_restrack_root *res;
+
+	const struct uapi_definition   *driver_def;
+
 	/*
-	 * Implementation details of the RDMA core, don't use in drivers
+	 * Positive refcount indicates that the device is currently
+	 * registered and cannot be unregistered.
 	 */
-	struct rdma_restrack_root     res;
+	refcount_t refcount;
+	struct completion unreg_completion;
+	struct work_struct unregistration_work;
 
-	/**
-	 * The following mandatory functions are used only at device
-	 * registration.  Keep functions such as these at the end of this
-	 * structure to avoid cache line misses when accessing struct ib_device
-	 * in fast paths.
-	 */
-	int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
-	void (*get_dev_fw_str)(struct ib_device *, char *str);
-	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
-						     int comp_vector);
+	const struct rdma_link_ops *link_ops;
 
-	const struct uverbs_object_tree_def *const *driver_specs;
-	enum rdma_driver_id		driver_id;
+	/* Protects compat_devs xarray modifications */
+	struct mutex compat_devs_mutex;
+	/* Maintains compat devices for each net namespace */
+	struct xarray compat_devs;
+
+	/* Used by iWarp CM */
+	char iw_ifname[IFNAMSIZ];
+	u32 iw_driver_flags;
 };
 
+struct ib_client_nl_info;
 struct ib_client {
-	char  *name;
+	const char *name;
 	void (*add)   (struct ib_device *);
 	void (*remove)(struct ib_device *, void *client_data);
+	void (*rename)(struct ib_device *dev, void *client_data);
+	int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+			   struct ib_client_nl_info *res);
+	int (*get_global_nl_info)(struct ib_client_nl_info *res);
 
 	/* Returns the net_dev belonging to this ib_client and matching the
 	 * given parameters.
@@ -2617,25 +2693,114 @@
 			const union ib_gid *gid,
 			const struct sockaddr *addr,
 			void *client_data);
-	struct list_head list;
+
+	refcount_t uses;
+	struct completion uses_zero;
+	u32 client_id;
+
+	/* kverbs are not required by the client */
+	u8 no_kverbs_req:1;
 };
 
-struct ib_device *ib_alloc_device(size_t size);
+/*
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+	/* internal states */
+	struct scatterlist *__sg;	/* sg holding the current aligned block */
+	dma_addr_t __dma_addr;		/* unaligned DMA address of this block */
+	unsigned int __sg_nents;	/* number of SG entries */
+	unsigned int __sg_advance;	/* number of bytes to advance in sg in next step */
+	unsigned int __pg_bit;		/* alignment of current block */
+};
+
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member)                                    \
+	container_of(_ib_alloc_device(sizeof(struct drv_struct) +              \
+				      BUILD_BUG_ON_ZERO(offsetof(              \
+					      struct drv_struct, member))),    \
+		     struct drv_struct, member)
+
 void ib_dealloc_device(struct ib_device *device);
 
 void ib_get_device_fw_str(struct ib_device *device, char *str);
 
-int ib_register_device(struct ib_device *device,
-		       int (*port_callback)(struct ib_device *,
-					    u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name);
 void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
 
 int ib_register_client   (struct ib_client *client);
 void ib_unregister_client(struct ib_client *client);
 
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+			     struct scatterlist *sglist,
+			     unsigned int nents,
+			     unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+	return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz)		\
+	for (__rdma_block_iter_start(biter, sglist, nents,	\
+				     pgsz);			\
+	     __rdma_block_iter_next(biter);)
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+				       struct ib_client *client)
+{
+	return xa_load(&device->client_data, client->client_id);
+}
 void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			 void *data);
+void ib_set_device_ops(struct ib_device *device,
+		       const struct ib_device_ops *ops);
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+		      unsigned long pfn, unsigned long size, pgprot_t prot);
+#else
+static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
+				    struct vm_area_struct *vma,
+				    unsigned long pfn, unsigned long size,
+				    pgprot_t prot)
+{
+	return -EINVAL;
+}
+#endif
 
 static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
 {
@@ -2720,7 +2885,6 @@
  * @next_state: Next QP state
  * @type: QP type
  * @mask: Mask of supplied QP attributes
- * @ll : link layer of port
  *
  * This function is a helper function that a low-level driver's
  * modify_qp method can use to validate the consumer's input.  It
@@ -2729,8 +2893,7 @@
  * and that the attribute mask supplied is allowed for the transition.
  */
 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-			enum ib_qp_type type, enum ib_qp_attr_mask mask,
-			enum rdma_link_layer ll);
+			enum ib_qp_type type, enum ib_qp_attr_mask mask);
 
 void ib_register_event_handler(struct ib_event_handler *event_handler);
 void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -2770,6 +2933,16 @@
 }
 
 /**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter)                                       \
+	for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type(   \
+						     unsigned int, iter)));    \
+	     iter <= rdma_end_port(device); (iter)++)
+
+/**
  * rdma_end_port - Return the last valid port number for the device
  * specified
  *
@@ -2792,34 +2965,38 @@
 static inline bool rdma_is_grh_required(const struct ib_device *device,
 					u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags &
-		RDMA_CORE_PORT_IB_GRH_REQUIRED;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_PORT_IB_GRH_REQUIRED;
 }
 
 static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IB;
 }
 
 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags &
-		(RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
 }
 
 static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
 }
 
 static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE;
 }
 
 static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IWARP;
 }
 
 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
@@ -2830,12 +3007,14 @@
 
 static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_RAW_PACKET;
 }
 
 static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_USNIC;
 }
 
 /**
@@ -2852,7 +3031,8 @@
  */
 static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_MAD;
 }
 
 /**
@@ -2876,8 +3056,8 @@
  */
 static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
 {
-	return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
-		== RDMA_CORE_CAP_OPA_MAD;
+	return device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_CAP_OPA_MAD;
 }
 
 /**
@@ -2902,7 +3082,8 @@
  */
 static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SMI;
 }
 
 /**
@@ -2922,7 +3103,8 @@
  */
 static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_CM;
 }
 
 /**
@@ -2939,7 +3121,8 @@
  */
 static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IW_CM;
 }
 
 /**
@@ -2959,7 +3142,8 @@
  */
 static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SA;
 }
 
 /**
@@ -2999,7 +3183,8 @@
  */
 static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_AF_IB;
 }
 
 /**
@@ -3020,7 +3205,8 @@
  */
 static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_ETH_AH;
 }
 
 /**
@@ -3034,7 +3220,7 @@
  */
 static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
 {
-	return (device->port_immutable[port_num].core_cap_flags &
+	return (device->port_data[port_num].immutable.core_cap_flags &
 		RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
 }
 
@@ -3052,7 +3238,7 @@
  */
 static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].max_mad_size;
+	return device->port_data[port_num].immutable.max_mad_size;
 }
 
 /**
@@ -3072,7 +3258,7 @@
 					   u8 port_num)
 {
 	return rdma_protocol_roce(device, port_num) &&
-		device->add_gid && device->del_gid;
+		device->ops.add_gid && device->ops.del_gid;
 }
 
 /*
@@ -3087,6 +3273,30 @@
 	return rdma_protocol_iwarp(dev, port_num);
 }
 
+/**
+ * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
+ *
+ * @addr: address
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ */
+static inline unsigned int rdma_find_pg_bit(unsigned long addr,
+					    unsigned long pgsz_bitmap)
+{
+	unsigned long align;
+	unsigned long pgsz;
+
+	align = addr & -addr;
+
+	/* Find page bit such that addr is aligned to the highest supported
+	 * HW page size
+	 */
+	pgsz = pgsz_bitmap & ~(-align << 1);
+	if (!pgsz)
+		return __ffs(pgsz_bitmap);
+
+	return __fls(pgsz);
+}
+
 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
 			 int state);
 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3128,19 +3338,44 @@
 
 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 		const char *caller);
+
 #define ib_alloc_pd(device, flags) \
 	__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-void ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_dealloc_pd_user - Deallocate kernel/user PD
+ * @pd: The protection domain
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+	ib_dealloc_pd_user(pd, NULL);
+}
+
+enum rdma_create_ah_flags {
+	/* In a sleepable context */
+	RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
 
 /**
  * rdma_create_ah - Creates an address handle for the given address vector.
  * @pd: The protection domain associated with the address handle.
  * @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
  *
  * The address handle is used to reference a local or global destination
  * in all UD QP post sends.
  */
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr);
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+			     u32 flags);
 
 /**
  * rdma_create_user_ah - Creates an address handle for the given address vector.
@@ -3230,11 +3465,30 @@
  */
 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
 
+enum rdma_destroy_ah_flags {
+	/* In a sleepable context */
+	RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
+
 /**
- * rdma_destroy_ah - Destroys an address handle.
+ * rdma_destroy_ah_user - Destroys an address handle.
  * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
  */
-int rdma_destroy_ah(struct ib_ah *ah);
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
+ */
+static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+	return rdma_destroy_ah_user(ah, flags, NULL);
+}
 
 /**
  * ib_create_srq - Creates a SRQ associated with the specified protection
@@ -3278,10 +3532,22 @@
 		 struct ib_srq_attr *srq_attr);
 
 /**
- * ib_destroy_srq - Destroys the specified SRQ.
+ * ib_destroy_srq_user - Destroys the specified SRQ.
  * @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
  */
-int ib_destroy_srq(struct ib_srq *srq);
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline int ib_destroy_srq(struct ib_srq *srq)
+{
+	return ib_destroy_srq_user(srq, NULL);
+}
 
 /**
  * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
@@ -3296,19 +3562,39 @@
 {
 	const struct ib_recv_wr *dummy;
 
-	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
+	return srq->device->ops.post_srq_recv(srq, recv_wr,
+					      bad_recv_wr ? : &dummy);
 }
 
 /**
- * ib_create_qp - Creates a QP associated with the specified protection
+ * ib_create_qp_user - Creates a QP associated with the specified protection
  *   domain.
  * @pd: The protection domain associated with the QP.
  * @qp_init_attr: A list of initial attributes required to create the
  *   QP.  If QP creation succeeds, then the attributes are updated to
  *   the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
  */
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
-			   struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
+				struct ib_qp_init_attr *qp_init_attr,
+				struct ib_udata *udata);
+
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ *   QP.  If QP creation succeeds, then the attributes are updated to
+ *   the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
+ *
+ * NOTE: for user qp use ib_create_qp_user with valid udata!
+ */
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+					 struct ib_qp_init_attr *qp_init_attr)
+{
+	return ib_create_qp_user(pd, qp_init_attr, NULL);
+}
 
 /**
  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3358,8 +3644,20 @@
 /**
  * ib_destroy_qp - Destroys the specified QP.
  * @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
  */
-int ib_destroy_qp(struct ib_qp *qp);
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
+ */
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+	return ib_destroy_qp_user(qp, NULL);
+}
 
 /**
  * ib_open_qp - Obtain a reference to an existing sharable QP.
@@ -3399,7 +3697,7 @@
 {
 	const struct ib_send_wr *dummy;
 
-	return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
+	return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
 }
 
 /**
@@ -3416,16 +3714,88 @@
 {
 	const struct ib_recv_wr *dummy;
 
-	return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
+	return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
 }
 
-struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
-			    int nr_cqe, int comp_vector,
-			    enum ib_poll_context poll_ctx, const char *caller);
-#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
-	__ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
+struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
+				 int nr_cqe, int comp_vector,
+				 enum ib_poll_context poll_ctx,
+				 const char *caller, struct ib_udata *udata);
 
-void ib_free_cq(struct ib_cq *cq);
+/**
+ * ib_alloc_cq_user: Allocate kernel/user CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ * @udata: Valid user data or NULL for kernel objects
+ */
+static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
+					     void *private, int nr_cqe,
+					     int comp_vector,
+					     enum ib_poll_context poll_ctx,
+					     struct ib_udata *udata)
+{
+	return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+				  KBUILD_MODNAME, udata);
+}
+
+/**
+ * ib_alloc_cq: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ *
+ * NOTE: for user cq use ib_alloc_cq_user with valid udata!
+ */
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+					int nr_cqe, int comp_vector,
+					enum ib_poll_context poll_ctx)
+{
+	return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+				NULL);
+}
+
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+				int nr_cqe, enum ib_poll_context poll_ctx,
+				const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+					    void *private, int nr_cqe,
+					    enum ib_poll_context poll_ctx)
+{
+	return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+				 KBUILD_MODNAME);
+}
+
+/**
+ * ib_free_cq_user - Free kernel/user CQ
+ * @cq: The CQ to free
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_free_cq - Free kernel CQ
+ * @cq: The CQ to free
+ *
+ * NOTE: for user cq use ib_free_cq_user with valid udata!
+ */
+static inline void ib_free_cq(struct ib_cq *cq)
+{
+	ib_free_cq_user(cq, NULL);
+}
+
 int ib_process_cq_direct(struct ib_cq *cq, int budget);
 
 /**
@@ -3469,10 +3839,22 @@
 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 
 /**
- * ib_destroy_cq - Destroys the specified CQ.
+ * ib_destroy_cq_user - Destroys the specified CQ.
  * @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
  */
-int ib_destroy_cq(struct ib_cq *cq);
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline void ib_destroy_cq(struct ib_cq *cq)
+{
+	ib_destroy_cq_user(cq, NULL);
+}
 
 /**
  * ib_poll_cq - poll a CQ for completion(s)
@@ -3489,7 +3871,7 @@
 static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
 			     struct ib_wc *wc)
 {
-	return cq->device->poll_cq(cq, num_entries, wc);
+	return cq->device->ops.poll_cq(cq, num_entries, wc);
 }
 
 /**
@@ -3522,7 +3904,7 @@
 static inline int ib_req_notify_cq(struct ib_cq *cq,
 				   enum ib_cq_notify_flags flags)
 {
-	return cq->device->req_notify_cq(cq, flags);
+	return cq->device->ops.req_notify_cq(cq, flags);
 }
 
 /**
@@ -3534,8 +3916,8 @@
  */
 static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
 {
-	return cq->device->req_ncomp_notif ?
-		cq->device->req_ncomp_notif(cq, wc_cnt) :
+	return cq->device->ops.req_ncomp_notif ?
+		cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
 		-ENOSYS;
 }
 
@@ -3652,32 +4034,18 @@
 {
 	dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
 }
-/**
- * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
- *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_address() into sg_dma_address().
- */
-static inline u64 ib_sg_dma_address(struct ib_device *dev,
-				    struct scatterlist *sg)
-{
-	return sg_dma_address(sg);
-}
 
 /**
- * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
+ * @dev: The device to query
  *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_len() into sg_dma_len().
+ * The returned value represents a size in bytes.
  */
-static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
-					 struct scatterlist *sg)
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
 {
-	return sg_dma_len(sg);
+	struct device_dma_parameters *p = dev->dma_device->dma_parms;
+
+	return p ? p->max_segment_size : UINT_MAX;
 }
 
 /**
@@ -3740,17 +4108,41 @@
 }
 
 /**
- * ib_dereg_mr - Deregisters a memory region and removes it from the
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
+ *   HCA translation table.
+ * @mr: The memory region to deregister.
+ * @udata: Valid user data or NULL for kernel object
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
+
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
  *   HCA translation table.
  * @mr: The memory region to deregister.
  *
  * This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
  */
-int ib_dereg_mr(struct ib_mr *mr);
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+	return ib_dereg_mr_user(mr, NULL);
+}
 
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
-			  enum ib_mr_type mr_type,
-			  u32 max_num_sg);
+struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
+			       u32 max_num_sg, struct ib_udata *udata);
+
+static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+					enum ib_mr_type mr_type, u32 max_num_sg)
+{
+	return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
+}
+
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+				    u32 max_num_data_sg,
+				    u32 max_num_meta_sg);
 
 /**
  * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -3799,7 +4191,7 @@
 				  u64 *page_list, int list_len,
 				  u64 iova)
 {
-	return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+	return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova);
 }
 
 /**
@@ -3848,8 +4240,9 @@
 /**
  * ib_dealloc_xrcd - Deallocates an XRC domain.
  * @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
  */
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
 
 static inline int ib_check_mr_access(int flags)
 {
@@ -3893,12 +4286,39 @@
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 		       struct ib_mr_status *mr_status);
 
+/**
+ * ib_device_try_get: Hold a registration lock
+ * device: The device to lock
+ *
+ * A device under an active registration lock cannot become unregistered. It
+ * is only possible to obtain a registration lock on a device that is fully
+ * registered, otherwise this function returns false.
+ *
+ * The registration lock is only necessary for actions which require the
+ * device to still be registered. Uses that only require the device pointer to
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
+ *
+ */
+static inline bool ib_device_try_get(struct ib_device *dev)
+{
+	return refcount_inc_not_zero(&dev->refcount);
+}
+
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+					enum rdma_driver_id driver_id);
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
 					    u16 pkey, const union ib_gid *gid,
 					    const struct sockaddr *addr);
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+			 unsigned int port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+
 struct ib_wq *ib_create_wq(struct ib_pd *pd,
 			   struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq);
+int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
 		 u32 wq_attr_mask);
 struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
@@ -3908,6 +4328,10 @@
 
 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
 		 unsigned int *sg_offset, unsigned int page_size);
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+		    int data_sg_nents, unsigned int *data_sg_offset,
+		    struct scatterlist *meta_sg, int meta_sg_nents,
+		    unsigned int *meta_sg_offset, unsigned int page_size);
 
 static inline int
 ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
@@ -4152,27 +4576,13 @@
 ib_get_vector_affinity(struct ib_device *device, int comp_vector)
 {
 	if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
-	    !device->get_vector_affinity)
+	    !device->ops.get_vector_affinity)
 		return NULL;
 
-	return device->get_vector_affinity(device, comp_vector);
+	return device->ops.get_vector_affinity(device, comp_vector);
 
 }
 
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
-			       struct ib_qp *qp, struct ib_device *device)
-{
-	uobj->object = ibflow;
-	ibflow->uobject = uobj;
-
-	if (qp) {
-		atomic_inc(&qp->usecnt);
-		ibflow->qp = qp;
-	}
-
-	ibflow->device = device;
-}
-
 /**
  * rdma_roce_rescan_device - Rescan all of the network devices in the system
  * and add their gids, as needed, to the relevant RoCE devices.
@@ -4181,8 +4591,70 @@
  */
 void rdma_roce_rescan_device(struct ib_device *ibdev);
 
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
 
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
-			       struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+				     enum rdma_netdev_t type, const char *name,
+				     unsigned char name_assign_type,
+				     void (*setup)(struct net_device *));
+
+int rdma_init_netdev(struct ib_device *device, u8 port_num,
+		     enum rdma_netdev_t type, const char *name,
+		     unsigned char name_assign_type,
+		     void (*setup)(struct net_device *),
+		     struct net_device *netdev);
+
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ *				 driver specific sysfs entries at
+ *				 for infiniband class.
+ *
+ * @device:	device pointer for which attributes to be created
+ * @group:	Pointer to group which should be added when device
+ *		is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+			    const struct attribute_group *group)
+{
+	dev->groups[1] = group;
+}
+
+/**
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
+ *
+ * @device:	device pointer for which ib_device pointer to retrieve
+ *
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
+ *
+ */
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
+{
+	struct ib_core_device *coredev =
+		container_of(device, struct ib_core_device, dev);
+
+	return coredev->owner;
+}
+
+/**
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
+ *			       ib_device holder structure from device pointer.
+ *
+ * NOTE: New drivers should not make use of this API; This API is only for
+ * existing drivers who have exposed sysfs entries using
+ * rdma_set_device_sysfs_group().
+ */
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member)           \
+	container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
+
+bool rdma_dev_access_netns(const struct ib_device *device,
+			   const struct net *net);
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 5cd7701..5aa8a9c 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -94,7 +94,8 @@
 	void (*add_ref)(struct iw_cm_id *);
 	void (*rem_ref)(struct iw_cm_id *);
 	u8  tos;
-	bool mapped;
+	bool tos_set:1;
+	bool mapped:1;
 };
 
 struct iw_cm_conn_param {
@@ -105,28 +106,16 @@
 	u32 qpn;
 };
 
-struct iw_cm_verbs {
-	void		(*add_ref)(struct ib_qp *qp);
+enum iw_flags {
 
-	void		(*rem_ref)(struct ib_qp *qp);
-
-	struct ib_qp *	(*get_qp)(struct ib_device *device,
-				  int qpn);
-
-	int		(*connect)(struct iw_cm_id *cm_id,
-				   struct iw_cm_conn_param *conn_param);
-
-	int		(*accept)(struct iw_cm_id *cm_id,
-				  struct iw_cm_conn_param *conn_param);
-
-	int		(*reject)(struct iw_cm_id *cm_id,
-				  const void *pdata, u8 pdata_len);
-
-	int		(*create_listen)(struct iw_cm_id *cm_id,
-					 int backlog);
-
-	int		(*destroy_listen)(struct iw_cm_id *cm_id);
-	char		ifname[IFNAMSIZ];
+	/*
+	 * This flag allows the iwcm and iwpmd to still advertise
+	 * mappings but the real and mapped port numbers are the
+	 * same.  Further, iwpmd will not bind any user socket to
+	 * reserve the port.  This is required for soft iwarp
+	 * to play in the port mapped iwarp space.
+	 */
+	IW_F_NO_PORT_MAP = (1 << 0),
 };
 
 /**
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index fda3167..c895350 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -33,6 +33,9 @@
 #ifndef _IW_PORTMAP_H
 #define _IW_PORTMAP_H
 
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
 #define IWPM_ULIBNAME_SIZE	32
 #define IWPM_DEVNAME_SIZE	32
 #define IWPM_IFNAME_SIZE	16
@@ -58,167 +61,31 @@
 	struct sockaddr_storage mapped_loc_addr;
 	struct sockaddr_storage rem_addr;
 	struct sockaddr_storage mapped_rem_addr;
+	u32 flags;
 };
 
-/**
- * iwpm_init - Allocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes up.
- */
 int iwpm_init(u8);
-
-/**
- * iwpm_exit - Deallocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes down.
- */
 int iwpm_exit(u8);
-
-/**
- * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
- *
- * Returns true if the pid is greater than zero, otherwise returns false
- */
 int iwpm_valid_pid(void);
-
-/**
- * iwpm_register_pid - Send a netlink query to userspace
- *                     to get the iwarp port mapper pid
- * @pm_msg: Contains driver info to send to the userspace port mapper
- * @nl_client: The index of the netlink client
- */
 int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_mapping - Send a netlink add mapping request to
- *                    the userspace port mapper
- * @pm_msg: Contains the local ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores
- * the port mapper response (mapped address info)
- */
 int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
- *				 to the userspace port mapper
- * @pm_msg: Contains the local and remote ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores the
- * port mapper response (mapped local and remote address info)
- */
 int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_remove_mapping - Send a netlink remove mapping request
- *                       to the userspace port mapper
- *
- * @local_addr: Local ip/tcp address to remove
- * @nl_client: The index of the netlink client
- */
 int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
-
-/**
- * iwpm_register_pid_cb - Process the port mapper response to
- *                        iwpm_register_pid query
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * If successful, the function receives the userspace port mapper pid
- * which is used in future communication with the port mapper
- */
 int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_mapping_cb - Process the port mapper response to
- *                       iwpm_add_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_and_query_mapping_cb - Process the port mapper response to
- *                                 iwpm_add_and_query_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_remote_info_cb - Process remote connecting peer address info, which
- *                       the port mapper has received from the connecting peer
- *
- * @cb: Contains the received message (payload and netlink header)
- *
- * Stores the IPv4/IPv6 address info in a hash table
- */
 int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_error_cb - Process port mapper notification for error
- *
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_info_cb - Process a notification that the userspace
- *                        port mapper daemon is started
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * Using the received port mapper pid, send all the local mapping
- * info records to the userspace port mapper
- */
 int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_ack_mapping_info_cb - Process the port mapper ack for
- *                            the provided local mapping info records
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_get_remote_info - Get the remote connecting peer address info
- *
- * @mapped_loc_addr: Mapped local address of the listening peer
- * @mapped_rem_addr: Mapped remote address of the connecting peer
- * @remote_addr: To store the remote address of the connecting peer
- * @nl_client: The index of the netlink client
- *
- * The remote address info is retrieved and provided to the client in
- * the remote_addr. After that it is removed from the hash table
- */
 int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
 			struct sockaddr_storage *mapped_rem_addr,
 			struct sockaddr_storage *remote_addr, u8 nl_client);
-
-/**
- * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
- *                       info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- * @nl_client: The index of the netlink client
- */
 int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
-			struct sockaddr_storage *mapped_addr, u8 nl_client);
-
-/**
- * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
- *                       info from the hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- *
- * Returns err code if mapping info is not found in the hash table,
- * otherwise returns 0
- */
+			struct sockaddr_storage *mapped_addr, u8 nl_client,
+			u32 map_flags);
 int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
 			struct sockaddr_storage *mapped_addr);
 
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb);
 #endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h
index 986010b..e77123b 100644
--- a/include/rdma/mr_pool.h
+++ b/include/rdma/mr_pool.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 #ifndef _RDMA_MR_POOL_H
 #define _RDMA_MR_POOL_H 1
@@ -19,7 +11,7 @@
 void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
 
 int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
-		enum ib_mr_type type, u32 max_num_sg);
+		enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg);
 void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
 
 #endif /* _RDMA_MR_POOL_H */
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index b4f0ac0..bdbfe25 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -33,6 +33,8 @@
 #if !defined(OPA_PORT_INFO_H)
 #define OPA_PORT_INFO_H
 
+#include <rdma/opa_smi.h>
+
 #define OPA_PORT_LINK_MODE_NOP	0		/* No change */
 #define OPA_PORT_LINK_MODE_OPA	4		/* Port mode is OPA */
 
@@ -413,6 +415,6 @@
 	u8     local_port_num;
 	u8     reserved12;
 	u8     reserved13;                       /* was guid_cap */
-} __attribute__ ((packed));
+} __packed;
 
 #endif /* OPA_PORT_INFO_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index f789611..c7b2ef1 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -98,7 +98,7 @@
 
 struct opa_node_description {
 	u8 data[64];
-} __attribute__ ((packed));
+} __packed;
 
 struct opa_node_info {
 	u8      base_version;
@@ -114,7 +114,7 @@
 	__be32  revision;
 	u8      local_port_num;
 	u8      vendor_id[3];   /* network byte order */
-} __attribute__ ((packed));
+} __packed;
 
 #define OPA_PARTITION_TABLE_BLK_SIZE 32
 
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f..71f48cf 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@
  * @ps: RDMA port space.
  * @qp_type: type of queue pair associated with the id.
  *
- * The id holds a reference on the network namespace until it is destroyed.
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
  */
 #define rdma_create_id(net, event_handler, context, ps, qp_type) \
 	__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@
  * @timeout_ms: Time to wait for resolution to complete.
  */
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      const struct sockaddr *dst_addr, int timeout_ms);
+		      const struct sockaddr *dst_addr,
+		      unsigned long timeout_ms);
 
 /**
  * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@
  * Users must have first called rdma_resolve_addr to resolve a dst_addr
  * into an RDMA address before calling this routine.
  */
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
 
 /**
  * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
@@ -369,6 +374,7 @@
  */
 int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
 
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
  /**
  * rdma_get_service_id - Return the IB service ID for a specified address.
  * @id: Communication identifier associated with the address.
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
new file mode 100644
index 0000000..eb99856
--- /dev/null
+++ b/include/rdma/rdma_counter.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_COUNTER_H_
+#define _RDMA_COUNTER_H_
+
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+
+#include <rdma/restrack.h>
+#include <rdma/rdma_netlink.h>
+
+struct ib_device;
+struct ib_qp;
+
+struct auto_mode_param {
+	int qp_type;
+};
+
+struct rdma_counter_mode {
+	enum rdma_nl_counter_mode mode;
+	enum rdma_nl_counter_mask mask;
+	struct auto_mode_param param;
+};
+
+struct rdma_port_counter {
+	struct rdma_counter_mode mode;
+	struct rdma_hw_stats *hstats;
+	unsigned int num_counters;
+	struct mutex lock;
+};
+
+struct rdma_counter {
+	struct rdma_restrack_entry	res;
+	struct ib_device		*device;
+	uint32_t			id;
+	struct kref			kref;
+	struct rdma_counter_mode	mode;
+	struct mutex			lock;
+	struct rdma_hw_stats		*stats;
+	u8				port;
+};
+
+void rdma_counter_init(struct ib_device *dev);
+void rdma_counter_release(struct ib_device *dev);
+int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
+			       bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
+
+int rdma_counter_query_stats(struct rdma_counter *counter);
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index);
+int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+			  u32 qp_num, u32 counter_id);
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+				u32 qp_num, u32 *counter_id);
+int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+			    u32 qp_num, u32 counter_id);
+int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+			  enum rdma_nl_counter_mode *mode,
+			  enum rdma_nl_counter_mask *mask);
+
+#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703..ab22759 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -6,6 +6,12 @@
 #include <linux/netlink.h>
 #include <uapi/rdma/rdma_netlink.h>
 
+enum {
+	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
+	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+	RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+};
+
 struct rdma_nl_cbs {
 	int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh,
 		    struct netlink_ext_ack *extack);
@@ -70,33 +76,50 @@
 
 /**
  * Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid);
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
 
 /**
  * Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid);
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
 
 /**
  * Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @group: Netlink group ID
  * @flags: allocation flags
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+		      unsigned int group, gfp_t flags);
 
 /**
  * Check if there are any listeners to the netlink group
  * @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
+ * Returns true on success or false if no listeners.
  */
-int rdma_nl_chk_listeners(unsigned int group);
+bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+	struct list_head list;
+	const char *type;
+	int (*newlink)(const char *ibdev_name, struct net_device *ndev);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
+#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type)
+
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a..ac5a943 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -2,7 +2,7 @@
 #define DEF_RDMA_VT_H
 
 /*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -59,7 +59,6 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 #include <rdma/rdmavt_mr.h>
-#include <rdma/rdmavt_qp.h>
 
 #define RVT_MAX_PKEY_VALUES 16
 
@@ -72,6 +71,8 @@
 	struct list_head list;
 };
 
+struct rvt_qp;
+struct rvt_qpn_table;
 struct rvt_ibport {
 	struct rvt_qp __rcu *qp[2];
 	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
@@ -115,6 +116,7 @@
 	u64 n_unaligned;
 	u64 n_rc_dupreq;
 	u64 n_rc_seqnak;
+	u64 n_rc_crwaits;
 	u16 pkey_violations;
 	u16 qkey_violations;
 	u16 mkey_violations;
@@ -149,6 +151,10 @@
 
 #define RVT_CQN_MAX 16 /* maximum length of cq name */
 
+#define RVT_SGE_COPY_MEMCPY	0
+#define RVT_SGE_COPY_CACHELESS	1
+#define RVT_SGE_COPY_ADAPTIVE	2
+
 /*
  * Things that are driver specific, module parameters in hfi1 and qib
  */
@@ -161,6 +167,9 @@
 	 */
 	unsigned int lkey_table_size;
 	unsigned int qp_table_size;
+	unsigned int sge_copy_mode;
+	unsigned int wss_threshold;
+	unsigned int wss_clean_period;
 	int qpn_start;
 	int qpn_inc;
 	int qpn_res_start;
@@ -175,9 +184,15 @@
 	u32 max_mad_size;
 	u8 qos_shift;
 	u8 max_rdma_atomic;
+	u8 extra_rdma_atomic;
 	u8 reserved_operations;
 };
 
+/* User context */
+struct rvt_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
 /* Protection domain */
 struct rvt_pd {
 	struct ib_pd ibpd;
@@ -188,11 +203,37 @@
 struct rvt_ah {
 	struct ib_ah ibah;
 	struct rdma_ah_attr attr;
-	atomic_t refcount;
 	u8 vl;
 	u8 log_pmtu;
 };
 
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+	struct list_head pending_mmaps;
+	struct ib_ucontext *context;
+	void *obj;
+	__u64 offset;
+	struct kref ref;
+	u32 size;
+};
+
+/* memory working set size */
+struct rvt_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
+	unsigned int clean_period;
+};
+
 struct rvt_dev_info;
 struct rvt_swqe;
 struct rvt_driver_provided {
@@ -211,11 +252,18 @@
 	 * version requires the s_lock not to be held. The other assumes the
 	 * s_lock is held.
 	 */
-	void (*schedule_send)(struct rvt_qp *qp);
-	void (*schedule_send_no_lock)(struct rvt_qp *qp);
+	bool (*schedule_send)(struct rvt_qp *qp);
+	bool (*schedule_send_no_lock)(struct rvt_qp *qp);
 
-	/* Driver specific work request checking */
-	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+	/*
+	 * Driver specific work request setup and checking.
+	 * This function is allowed to perform any setup, checks, or
+	 * adjustments required to the SWQE in order to be usable by
+	 * underlying protocols. This includes private data structure
+	 * allocations.
+	 */
+	int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+			 bool *call_send);
 
 	/*
 	 * Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -223,9 +271,6 @@
 	 */
 	void (*do_send)(struct rvt_qp *qp);
 
-	/* Passed to ib core registration. Callback to create syfs files */
-	int (*port_callback)(struct ib_device *, u8, struct kobject *);
-
 	/*
 	 * Returns a pointer to the undelying hardware's PCI device. This is
 	 * used to display information as to what hardware is being referenced
@@ -242,6 +287,13 @@
 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
 
 	/*
+	 * Init a struture allocated with qp_priv_alloc(). This should be
+	 * called after all qp fields have been initialized in rdmavt.
+	 */
+	int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			    struct ib_qp_init_attr *init_attr);
+
+	/*
 	 * Free the driver's private qp structure.
 	 */
 	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
@@ -371,6 +423,9 @@
 	/* post send table */
 	const struct rvt_operation_params *post_parms;
 
+	/* opcode translation table */
+	const enum ib_wc_opcode *wc_opcode;
+
 	/* Driver specific helper functions */
 	struct rvt_driver_provided driver_f;
 
@@ -411,6 +466,8 @@
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
 	spinlock_t n_mcast_grps_lock;
 
+	/* Memory Working Set Size */
+	struct rvt_wss *wss;
 };
 
 /**
@@ -423,7 +480,14 @@
 				      const char *fmt, const char *name,
 				      const int unit)
 {
-	snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+	/*
+	 * FIXME: rvt and its users want to touch the ibdev before
+	 * registration and have things like the name work. We don't have the
+	 * infrastructure in the core to support this directly today, hack it
+	 * to work by setting the name manually here.
+	 */
+	dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+	strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
 }
 
 /**
@@ -434,7 +498,7 @@
  */
 static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
 {
-	return rdi->ibdev.name;
+	return dev_name(&rdi->ibdev.dev);
 }
 
 static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
@@ -452,16 +516,6 @@
 	return  container_of(ibdev, struct rvt_dev_info, ibdev);
 }
 
-static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct rvt_srq, ibsrq);
-}
-
-static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct rvt_qp, ibqp);
-}
-
 static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
 {
 	/*
@@ -476,7 +530,14 @@
  */
 static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
 {
-	return rdi->dparms.max_rdma_atomic + 1;
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic + 1;
+}
+
+static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
+{
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic;
 }
 
 /*
@@ -492,54 +553,9 @@
 		return rdi->ports[port_index]->pkey_table[index];
 }
 
-/**
- * rvt_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
- */
-/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */
-static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
-					    struct rvt_ibport *rvp,
-					    u32 qpn) __must_hold(RCU)
-{
-	struct rvt_qp *qp = NULL;
-
-	if (unlikely(qpn <= 1)) {
-		qp = rcu_dereference(rvp->qp[qpn]);
-	} else {
-		u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
-
-		for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn)
-				break;
-	}
-	return qp;
-}
-
-/**
- * rvt_mod_retry_timer - mod a retry timer
- * @qp - the QP
- * Modify a potentially already running retry timer
- */
-static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
-{
-	struct ib_qp *ibqp = &qp->ibqp;
-	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
-	lockdep_assert_held(&qp->s_lock);
-	qp->s_flags |= RVT_S_TIMER;
-	/* 4.096 usec. * (1 << qp->timeout) */
-	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
-		  rdi->busy_jiffies);
-}
-
 struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
 void rvt_dealloc_device(struct rvt_dev_info *rdi);
-int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id);
+int rvt_register_device(struct rvt_dev_info *rvd);
 void rvt_unregister_device(struct rvt_dev_info *rvd);
 int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
 int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 75dc65c..574eb72 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -53,6 +53,7 @@
 
 #include <linux/kthread.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -61,18 +62,27 @@
 #define RVT_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
 
 /*
+ * Define read macro that apply smp_load_acquire memory barrier
+ * when reading indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val)
+
+/*
+ * Define write macro that uses smp_store_release memory barrier
+ * when writing indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x)
+#include <rdma/rvt-abi.h>
+
+/*
  * This structure is used to contain the head pointer, tail pointer,
  * and completion queue entries as a single memory allocation so
  * it can be mmap'ed into user space.
  */
-struct rvt_cq_wc {
+struct rvt_k_cq_wc {
 	u32 head;               /* index of next entry to fill */
 	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
+	struct ib_wc kqueue[];
 };
 
 /*
@@ -84,10 +94,12 @@
 	spinlock_t lock; /* protect changes in this struct */
 	u8 notify;
 	u8 triggered;
+	u8 cq_full;
 	int comp_vector_cpu;
 	struct rvt_dev_info *rdi;
 	struct rvt_cq_wc *queue;
 	struct rvt_mmap_info *ip;
+	struct rvt_k_cq_wc *kqueue;
 };
 
 static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
@@ -95,6 +107,6 @@
 	return container_of(ibcq, struct rvt_cq, ibcq);
 }
 
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
 
 #endif          /* DEF_RDMAVT_INCCQH */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5..b550ae8 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
 #define DEF_RDMAVT_INCQP_H
 
 /*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
 #include <rdma/ib_pack.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdmavt_cq.h>
+#include <rdma/rvt-abi.h>
 /*
  * Atomic bit definitions for r_aflags.
  */
@@ -83,7 +84,6 @@
  * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
  *                  next send completion entry not via send DMA
  * RVT_S_WAIT_PIO - waiting for a send buffer to be available
- * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets
  * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
  * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
  * RVT_S_WAIT_KMEM - waiting for kernel memory to be available
@@ -157,6 +157,22 @@
 #define RVT_SEND_RESERVE_USED           IB_SEND_RESERVED_START
 #define RVT_SEND_COMPLETION_ONLY	(IB_SEND_RESERVED_START << 1)
 
+/**
+ * rvt_ud_wr - IB UD work plus AH cache
+ * @wr: valid IB work request
+ * @attr: pointer to an allocated AH attribute
+ *
+ * Special case the UD WR so we can keep track of the AH attributes.
+ *
+ * NOTE: This data structure is stricly ordered wr then attr. I.e the attr
+ * MUST come after wr.  The ib_ud_wr is sized and copied in rvt_post_one_wr.
+ * The copy assumes that wr is first.
+ */
+struct rvt_ud_wr {
+	struct ib_ud_wr wr;
+	struct rdma_ah_attr *attr;
+};
+
 /*
  * Send work request queue entry.
  * The size of the sg_list is determined when the QP is created and stored
@@ -165,7 +181,7 @@
 struct rvt_swqe {
 	union {
 		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
+		struct rvt_ud_wr ud_wr;
 		struct ib_reg_wr reg_wr;
 		struct ib_rdma_wr rdma_wr;
 		struct ib_atomic_wr atomic_wr;
@@ -174,36 +190,88 @@
 	u32 lpsn;               /* last packet sequence number */
 	u32 ssn;                /* send sequence number */
 	u32 length;             /* total length of data in sg_list */
+	void *priv;             /* driver dependent field */
 	struct rvt_sge sg_list[0];
 };
 
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+/**
+ * struct rvt_krwq - kernel struct receive work request
+ * @p_lock: lock to protect producer of the kernel buffer
+ * @head: index of next entry to fill
+ * @c_lock:lock to protect consumer of the kernel buffer
+ * @tail: index of next entry to pull
+ * @count: count is aproximate of total receive enteries posted
+ * @rvt_rwqe: struct of receive work request queue entry
+ *
+ * This structure is used to contain the head pointer,
+ * tail pointer and receive work queue entries for kernel
+ * mode user.
  */
-struct rvt_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
+struct rvt_krwq {
+	spinlock_t p_lock;	/* protect producer */
+	u32 head;               /* new work requests posted to the head */
+
+	/* protect consumer */
+	spinlock_t c_lock ____cacheline_aligned_in_smp;
+	u32 tail;               /* receives pull requests from here. */
+	u32 count;		/* approx count of receive entries posted */
+	struct rvt_rwqe *curr_wq;
+	struct rvt_rwqe wq[];
 };
 
 /*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
+ * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah
+ * @swqe: valid Send WQE
+ *
  */
-struct rvt_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct rvt_rwqe wq[0];
-};
+static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe)
+{
+	return ibah_to_rvtah(swqe->ud_wr.wr.ah);
+}
+
+/**
+ * rvt_get_swqe_ah_attr - Return the cached ah attribute information
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.attr;
+}
+
+/**
+ * rvt_get_swqe_remote_qpn - Access the remote QPN value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.remote_qpn;
+}
+
+/**
+ * rvt_get_swqe_remote_qkey - Acces the remote qkey value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.remote_qkey;
+}
+
+/**
+ * rvt_get_swqe_pkey_index - Access the pkey index
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.pkey_index;
+}
 
 struct rvt_rq {
 	struct rvt_rwq *wq;
+	struct rvt_krwq *kwq;
 	u32 size;               /* size of RWQE array */
 	u8 max_sge;
 	/* protect changes in this struct */
@@ -211,20 +279,6 @@
 };
 
 /*
- * This structure is used by rvt_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct rvt_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
  * This structure holds the information that the send tasklet needs
  * to send a RDMA read response or atomic operation.
  */
@@ -235,6 +289,7 @@
 	u32 lpsn;
 	u8 opcode;
 	u8 sent;
+	void *priv;
 };
 
 #define	RC_QP_SCALING_INTERVAL	5
@@ -244,6 +299,7 @@
 #define RVT_OPERATION_ATOMIC_SGE  0x00000004
 #define RVT_OPERATION_LOCAL       0x00000008
 #define RVT_OPERATION_USE_RESERVE 0x00000010
+#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
 
 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
 
@@ -373,6 +429,7 @@
 	u8 s_rnr_retry;         /* requester RNR retry counter */
 	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
 	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+	u8 s_acked_ack_queue;   /* index into s_ack_queue[] */
 
 	struct rvt_sge_state s_ack_rdma_sge;
 	struct timer_list s_timer;
@@ -395,6 +452,16 @@
 	u32 limit;
 };
 
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
 #define RVT_QPN_MAX                 BIT(24)
 #define RVT_QPNMAP_ENTRIES          (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
 #define RVT_BITS_PER_PAGE           (PAGE_SIZE * BITS_PER_BYTE)
@@ -473,7 +540,7 @@
 static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
 {
 	return (struct rvt_rwqe *)
-		((char *)rq->wq->wq +
+		((char *)rq->kwq->curr_wq +
 		 (sizeof(struct rvt_rwqe) +
 		  rq->max_sge * sizeof(struct ib_sge)) * n);
 }
@@ -541,7 +608,7 @@
 /**
  * rvt_qp_wqe_unreserve - clean reserved operation
  * @qp - the rvt qp
- * @wqe - the send wqe
+ * @flags - send wqe flags
  *
  * This decrements the reserve use count.
  *
@@ -553,11 +620,9 @@
  * the compiler does not juggle the order of the s_last
  * ring index and the decrementing of s_reserved_used.
  */
-static inline void rvt_qp_wqe_unreserve(
-	struct rvt_qp *qp,
-	struct rvt_swqe *wqe)
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
 {
-	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+	if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
 		atomic_dec(&qp->s_reserved_used);
 		/* insure no compiler re-order up to s_last change */
 		smp_mb__after_atomic();
@@ -566,42 +631,6 @@
 
 extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
 
-/**
- * rvt_qp_swqe_complete() - insert send completion
- * @qp - the qp
- * @wqe - the send wqe
- * @status - completion status
- *
- * Insert a send completion into the completion
- * queue if the qp indicates it should be done.
- *
- * See IBTA 10.7.3.1 for info on completion
- * control.
- */
-static inline void rvt_qp_swqe_complete(
-	struct rvt_qp *qp,
-	struct rvt_swqe *wqe,
-	enum ib_wc_opcode opcode,
-	enum ib_wc_status status)
-{
-	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
-		return;
-	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-	     status != IB_WC_SUCCESS) {
-		struct ib_wc wc;
-
-		memset(&wc, 0, sizeof(wc));
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = status;
-		wc.opcode = opcode;
-		wc.qp = &qp->ibqp;
-		wc.byte_len = wqe->length;
-		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
-			     status != IB_WC_SUCCESS);
-	}
-}
-
 /*
  * Compare the lower 24 bits of the msn values.
  * Returns an integer <, ==, or > than zero.
@@ -629,6 +658,16 @@
 void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
 
 /**
+ * rvt_restart_sge - rewind the sge state for a wqe
+ * @ss: the sge state pointer
+ * @wqe: the wqe to rewind
+ * @len: the data length from the start of the wqe in bytes
+ *
+ * Returns the remaining data length.
+ */
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
+
+/**
  * @qp - the qp pair
  * @len - the length
  *
@@ -664,19 +703,206 @@
 	return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
 }
 
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+					    struct rvt_ibport *rvp,
+					    u32 qpn) __must_hold(RCU)
+{
+	struct rvt_qp *qp = NULL;
+
+	if (unlikely(qpn <= 1)) {
+		qp = rcu_dereference(rvp->qp[qpn]);
+	} else {
+		u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+		for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next))
+			if (qp->ibqp.qp_num == qpn)
+				break;
+	}
+	return qp;
+}
+
+/**
+ * rvt_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
+ * Modify a potentially already running retry timer
+ */
+static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	lockdep_assert_held(&qp->s_lock);
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
+		  (qp->timeout_jiffies << shift));
+}
+
+static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+{
+	return rvt_mod_retry_timer_ext(qp, 0);
+}
+
+/**
+ * rvt_put_qp_swqe - drop refs held by swqe
+ * @qp: the send qp
+ * @wqe: the send wqe
+ *
+ * This drops any references held by the swqe
+ */
+static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	rvt_put_swqe(wqe);
+	if (qp->allowed_ops == IB_OPCODE_UD)
+		rdma_destroy_ah_attr(wqe->ud_wr.attr);
+}
+
+/**
+ * rvt_qp_sqwe_incr - increment ring index
+ * @qp: the qp
+ * @val: the starting value
+ *
+ * Return: the new value wrapping as appropriate
+ */
+static inline u32
+rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
+{
+	if (++val >= qp->s_size)
+		val = 0;
+	return val;
+}
+
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+/**
+ * rvt_recv_cq - add a new entry to completion queue
+ *			by receive queue
+ * @qp: receive queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * receive queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc,
+			       bool solicited)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq);
+
+	if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+		rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_send_cq - add a new entry to completion queue
+ *                        by send queue
+ * @qp: send queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * send queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
+			       bool solicited)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq);
+
+	if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+		rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_qp_complete_swqe - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @opcode - wc operation (driver dependent)
+ * @status - completion status
+ *
+ * Update the s_last information, and then insert a send
+ * completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ *
+ * Return: new last
+ */
+static inline u32
+rvt_qp_complete_swqe(struct rvt_qp *qp,
+		     struct rvt_swqe *wqe,
+		     enum ib_wc_opcode opcode,
+		     enum ib_wc_status status)
+{
+	bool need_completion;
+	u64 wr_id;
+	u32 byte_len, last;
+	int flags = wqe->wr.send_flags;
+
+	rvt_qp_wqe_unreserve(qp, flags);
+	rvt_put_qp_swqe(qp, wqe);
+
+	need_completion =
+		!(flags & RVT_SEND_RESERVE_USED) &&
+		(!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+		(flags & IB_SEND_SIGNALED) ||
+		status != IB_WC_SUCCESS);
+	if (need_completion) {
+		wr_id = wqe->wr.wr_id;
+		byte_len = wqe->length;
+		/* above fields required before writing s_last */
+	}
+	last = rvt_qp_swqe_incr(qp, qp->s_last);
+	/* see rvt_qp_is_avail() */
+	smp_store_release(&qp->s_last, last);
+	if (need_completion) {
+		struct ib_wc w = {
+			.wr_id = wr_id,
+			.status = status,
+			.opcode = opcode,
+			.qp = &qp->ibqp,
+			.byte_len = byte_len,
+		};
+		rvt_send_cq(qp, &w, status != IB_WC_SUCCESS);
+	}
+	return last;
+}
+
 extern const int  ib_rvt_state_ops[];
 
 struct rvt_dev_info;
 int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only);
 void rvt_comm_est(struct rvt_qp *qp);
-int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
 void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 unsigned long rvt_rnr_tbl_to_usec(u32 index);
 enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
 void rvt_del_timers_sync(struct rvt_qp *qp);
 void rvt_stop_rc_timers(struct rvt_qp *qp);
-void rvt_add_retry_timer(struct rvt_qp *qp);
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
+static inline void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+	rvt_add_retry_timer_ext(qp, 0);
+}
+
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+		  void *data, u32 length,
+		  bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
 
 /**
  * struct rvt_qp_iter - the iterator for QPs
@@ -700,6 +926,88 @@
 	int n;
 };
 
+/**
+ * ib_cq_tail - Return tail index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get tail
+ * of cq buffer.
+ */
+static inline u32 ib_cq_tail(struct ib_cq *send_cq)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+	return ibcq_to_rvtcq(send_cq)->ip ?
+	       RDMA_READ_UAPI_ATOMIC(cq->queue->tail) :
+	       ibcq_to_rvtcq(send_cq)->kqueue->tail;
+}
+
+/**
+ * ib_cq_head - Return head index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get head
+ * of cq buffer.
+ */
+static inline u32 ib_cq_head(struct ib_cq *send_cq)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+	return ibcq_to_rvtcq(send_cq)->ip ?
+	       RDMA_READ_UAPI_ATOMIC(cq->queue->head) :
+	       ibcq_to_rvtcq(send_cq)->kqueue->head;
+}
+
+/**
+ * rvt_free_rq - free memory allocated for rvt_rq struct
+ * @rvt_rq: request queue data structure
+ *
+ * This function should only be called if the rvt_mmap_info()
+ * has not succeeded.
+ */
+static inline void rvt_free_rq(struct rvt_rq *rq)
+{
+	kvfree(rq->kwq);
+	rq->kwq = NULL;
+	vfree(rq->wq);
+	rq->wq = NULL;
+}
+
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	lockdep_assert_held(&qp->s_lock);
+	if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+	    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+		struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+		qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+		rvp->n_rc_crwaits++;
+		return false;
+	}
+	return true;
+}
+
 struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
 				     u64 v,
 				     void (*cb)(struct rvt_qp *qp, u64 v));
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33..83df1ec 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -7,12 +7,15 @@
 #define _RDMA_RESTRACK_H_
 
 #include <linux/typecheck.h>
-#include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/kref.h>
 #include <linux/completion.h>
 #include <linux/sched/task.h>
 #include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
+
+struct ib_device;
+struct sk_buff;
 
 /**
  * enum rdma_restrack_type - HW objects to track
@@ -39,36 +42,19 @@
 	 */
 	RDMA_RESTRACK_MR,
 	/**
+	 * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+	 */
+	RDMA_RESTRACK_CTX,
+	/**
+	 * @RDMA_RESTRACK_COUNTER: Statistic Counter
+	 */
+	RDMA_RESTRACK_COUNTER,
+	/**
 	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
 	 */
 	RDMA_RESTRACK_MAX
 };
 
-#define RDMA_RESTRACK_HASH_BITS	8
-struct rdma_restrack_entry;
-
-/**
- * struct rdma_restrack_root - main resource tracking management
- * entity, per-device
- */
-struct rdma_restrack_root {
-	/*
-	 * @rwsem: Read/write lock to protect lists
-	 */
-	struct rw_semaphore	rwsem;
-	/**
-	 * @hash: global database for all resources per-device
-	 */
-	DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
-	/**
-	 * @fill_res_entry: driver-specific fill function
-	 *
-	 * Allows rdma drivers to add their own restrack attributes.
-	 */
-	int (*fill_res_entry)(struct sk_buff *msg,
-			      struct rdma_restrack_entry *entry);
-};
-
 /**
  * struct rdma_restrack_entry - metadata per-entry
  */
@@ -105,42 +91,24 @@
 	 */
 	const char		*kern_name;
 	/**
-	 * @node: hash table entry
-	 */
-	struct hlist_node	node;
-	/**
 	 * @type: various objects in restrack database
 	 */
 	enum rdma_restrack_type	type;
+	/**
+	 * @user: user resource
+	 */
+	bool			user;
+	/**
+	 * @id: ID to expose to users
+	 */
+	u32 id;
 };
 
-/**
- * rdma_restrack_init() - initialize resource tracking
- * @res:  resource tracking root
- */
-void rdma_restrack_init(struct rdma_restrack_root *res);
+int rdma_restrack_count(struct ib_device *dev,
+			enum rdma_restrack_type type);
 
-/**
- * rdma_restrack_clean() - clean resource tracking
- * @res:  resource tracking root
- */
-void rdma_restrack_clean(struct rdma_restrack_root *res);
-
-/**
- * rdma_restrack_count() - the current usage of specific object
- * @res:  resource entry
- * @type: actual type of object to operate
- * @ns:   PID namespace
- */
-int rdma_restrack_count(struct rdma_restrack_root *res,
-			enum rdma_restrack_type type,
-			struct pid_namespace *ns);
-
-/**
- * rdma_restrack_add() - add object to the reource tracking database
- * @res:  resource entry
- */
-void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_kadd(struct rdma_restrack_entry *res);
+void rdma_restrack_uadd(struct rdma_restrack_entry *res);
 
 /**
  * rdma_restrack_del() - delete object from the reource tracking database
@@ -155,7 +123,7 @@
  */
 static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
 {
-	return !res->task;
+	return !res->user;
 }
 
 /**
@@ -173,16 +141,10 @@
 /**
  * rdma_restrack_set_task() - set the task for this resource
  * @res:  resource entry
- * @task: task struct
+ * @caller: kernel name, the current task will be used if the caller is NULL.
  */
-static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
-					  struct task_struct *task)
-{
-	if (res->task)
-		put_task_struct(res->task);
-	get_task_struct(task);
-	res->task = task;
-}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+			    const char *caller);
 
 /*
  * Helper functions for rdma drivers when filling out
@@ -194,4 +156,7 @@
 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value);
 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name,
 			       u64 value);
+struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
+						   enum rdma_restrack_type type,
+						   u32 id);
 #endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index a3cbbc7..6ad9dc8 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 #ifndef _RDMA_RW_H
 #define _RDMA_RW_H
@@ -47,15 +39,6 @@
 			struct ib_send_wr	inv_wr;
 			struct ib_mr		*mr;
 		} *reg;
-
-		struct {
-			struct rdma_rw_reg_ctx	data;
-			struct rdma_rw_reg_ctx	prot;
-			struct ib_send_wr	sig_inv_wr;
-			struct ib_mr		*sig_mr;
-			struct ib_sge		sig_sge;
-			struct ib_sig_handover_wr sig_wr;
-		} *sig;
 	};
 };
 
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
new file mode 100644
index 0000000..d16b0fc
--- /dev/null
+++ b/include/rdma/signature.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_SIGNATURE_H_
+#define _RDMA_SIGNATURE_H_
+
+#include <linux/types.h>
+
+enum ib_signature_prot_cap {
+	IB_PROT_T10DIF_TYPE_1 = 1,
+	IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+	IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+	IB_GUARD_T10DIF_CRC	= 1,
+	IB_GUARD_T10DIF_CSUM	= 1 << 1,
+};
+
+/**
+ * enum ib_signature_type - Signature types
+ * @IB_SIG_TYPE_NONE: Unprotected.
+ * @IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+	IB_SIG_TYPE_NONE,
+	IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types
+ * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * @IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+	IB_T10DIF_CRC,
+	IB_T10DIF_CSUM,
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ *     domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+	enum ib_t10_dif_bg_type bg_type;
+	u16			pi_interval;
+	u16			bg;
+	u16			app_tag;
+	u32			ref_tag;
+	bool			ref_remap;
+	bool			app_escape;
+	bool			ref_escape;
+	u16			apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ *     be used to set domain layout.
+ */
+struct ib_sig_domain {
+	enum ib_signature_type sig_type;
+	union {
+		struct ib_t10_dif_domain dif;
+	} sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout descriptor.
+ * @wire: wire domain layout descriptor.
+ * @meta_length: metadata length
+ */
+struct ib_sig_attrs {
+	u8			check_mask;
+	struct ib_sig_domain	mem;
+	struct ib_sig_domain	wire;
+	int			meta_length;
+};
+
+enum ib_sig_err_type {
+	IB_SIG_BAD_GUARD,
+	IB_SIG_BAD_REFTAG,
+	IB_SIG_BAD_APPTAG,
+};
+
+/*
+ * Signature check masks (8 bytes in total) according to the T10-PI standard:
+ *  -------- -------- ------------
+ * | GUARD  | APPTAG |   REFTAG   |
+ * |  2B    |  2B    |    4B      |
+ *  -------- -------- ------------
+ */
+enum {
+	IB_SIG_CHECK_GUARD = 0xc0,
+	IB_SIG_CHECK_APPTAG = 0x30,
+	IB_SIG_CHECK_REFTAG = 0x0f,
+};
+
+/*
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+	enum ib_sig_err_type	err_type;
+	u32			expected;
+	u32			actual;
+	u64			sig_err_offset;
+	u32			key;
+};
+
+#endif /* _RDMA_SIGNATURE_H_ */
diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h
new file mode 100644
index 0000000..08fe47c
--- /dev/null
+++ b/include/rdma/tid_rdma_defs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef TID_RDMA_DEFS_H
+#define TID_RDMA_DEFS_H
+
+#include <rdma/ib_pack.h>
+
+struct tid_rdma_read_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_read_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[4];
+	__be32 verbs_psn;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 reserved[2];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[3];
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_data {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_resync {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_ack {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[2];
+	__be32 tid_flow_psn;
+	__be32 verbs_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+/*
+ * TID RDMA Opcodes
+ */
+#define IB_OPCODE_TID_RDMA 0xe0
+enum {
+	IB_OPCODE_WRITE_REQ       = 0x0,
+	IB_OPCODE_WRITE_RESP      = 0x1,
+	IB_OPCODE_WRITE_DATA      = 0x2,
+	IB_OPCODE_WRITE_DATA_LAST = 0x3,
+	IB_OPCODE_READ_REQ        = 0x4,
+	IB_OPCODE_READ_RESP       = 0x5,
+	IB_OPCODE_RESYNC          = 0x6,
+	IB_OPCODE_ACK             = 0x7,
+
+	IB_OPCODE(TID_RDMA, WRITE_REQ),
+	IB_OPCODE(TID_RDMA, WRITE_RESP),
+	IB_OPCODE(TID_RDMA, WRITE_DATA),
+	IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
+	IB_OPCODE(TID_RDMA, READ_REQ),
+	IB_OPCODE(TID_RDMA, READ_RESP),
+	IB_OPCODE(TID_RDMA, RESYNC),
+	IB_OPCODE(TID_RDMA, ACK),
+};
+
+#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
+
+/*
+ * Define TID RDMA specific WR opcodes. The ib_wr_opcode
+ * enum already provides some reserved values for use by
+ * low level drivers. Two of those are used but renamed
+ * to be more descriptive.
+ */
+#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
+#define IB_WR_TID_RDMA_READ  IB_WR_RESERVED2
+
+#endif /* TID_RDMA_DEFS_H */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3..28570ac 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@
 	UVERBS_ATTR_TYPE_IDR,
 	UVERBS_ATTR_TYPE_FD,
 	UVERBS_ATTR_TYPE_ENUM_IN,
+	UVERBS_ATTR_TYPE_IDRS_ARRAY,
 };
 
 enum uverbs_obj_access {
@@ -78,6 +79,8 @@
 	 */
 	u8 alloc_and_copy:1;
 	u8 mandatory:1;
+	/* True if this is from UVERBS_ATTR_UHW */
+	u8 is_udata:1;
 
 	union {
 		struct {
@@ -101,7 +104,7 @@
 		} enum_def;
 	} u;
 
-	/* This weird split of the enum lets us remove some padding */
+	/* This weird split lets us remove some padding */
 	union {
 		struct {
 			/*
@@ -111,6 +114,17 @@
 			 */
 			const struct uverbs_attr_spec *ids;
 		} enum_def;
+
+		struct {
+			/*
+			 * higher bits mean the namespace and lower bits mean
+			 * the type id within the namespace.
+			 */
+			u16				obj_type;
+			u16				min_len;
+			u16				max_len;
+			u8				access;
+		} objs_arr;
 	} u2;
 };
 
@@ -128,6 +142,13 @@
  *
  * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
  * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
  */
 enum uapi_radix_data {
 	UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
@@ -135,12 +156,16 @@
 	UVERBS_API_ATTR_KEY_BITS = 6,
 	UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
 	UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+	UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
 
 	UVERBS_API_METHOD_KEY_BITS = 5,
 	UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
-	UVERBS_API_METHOD_KEY_NUM_CORE = 24,
-	UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
-					   UVERBS_API_METHOD_KEY_NUM_CORE,
+	UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+	UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_METHOD_KEY_NUM_DRIVER =
+		(UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+		UVERBS_API_METHOD_KEY_NUM_CORE,
 	UVERBS_API_METHOD_KEY_MASK = GENMASK(
 		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
 		UVERBS_API_METHOD_KEY_SHIFT),
@@ -193,7 +218,22 @@
 	return id << UVERBS_API_METHOD_KEY_SHIFT;
 }
 
-static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+	if (id >= UVERBS_API_WRITE_KEY_NUM)
+		return UVERBS_API_KEY_ERR;
+	return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+	if (id >= UVERBS_API_WRITE_KEY_NUM)
+		return UVERBS_API_KEY_ERR;
+	return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
 {
 	return attr_key &
 	       (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
@@ -201,10 +241,23 @@
 
 static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
 {
-	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+	return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
 	       (key & UVERBS_API_ATTR_KEY_MASK) == 0;
 }
 
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) ==
+	       UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
 static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
 {
 	/* 0 is the method slot itself */
@@ -234,9 +287,12 @@
 	return id;
 }
 
+/* Only true for ioctl methods */
 static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
 {
-	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+	return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
 	       (key & UVERBS_API_ATTR_KEY_MASK) != 0;
 }
 
@@ -251,6 +307,11 @@
 	return attr_key - 1;
 }
 
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+	return attr_bkey + 1;
+}
+
 /*
  * =======================================
  *	Verbs definitions
@@ -268,8 +329,7 @@
 	u32				     flags;
 	size_t				     num_attrs;
 	const struct uverbs_attr_def * const (*attrs)[];
-	int (*handler)(struct ib_uverbs_file *ufile,
-		       struct uverbs_attr_bundle *ctx);
+	int (*handler)(struct uverbs_attr_bundle *attrs);
 };
 
 struct uverbs_object_def {
@@ -279,11 +339,132 @@
 	const struct uverbs_method_def * const (*methods)[];
 };
 
-struct uverbs_object_tree_def {
-	size_t					 num_objects;
-	const struct uverbs_object_def * const (*objects)[];
+enum uapi_definition_kind {
+	UAPI_DEF_END = 0,
+	UAPI_DEF_OBJECT_START,
+	UAPI_DEF_WRITE,
+	UAPI_DEF_CHAIN_OBJ_TREE,
+	UAPI_DEF_CHAIN,
+	UAPI_DEF_IS_SUPPORTED_FUNC,
+	UAPI_DEF_IS_SUPPORTED_DEV_FN,
 };
 
+enum uapi_definition_scope {
+	UAPI_SCOPE_OBJECT = 1,
+	UAPI_SCOPE_METHOD = 2,
+};
+
+struct uapi_definition {
+	u8 kind;
+	u8 scope;
+	union {
+		struct {
+			u16 object_id;
+		} object_start;
+		struct {
+			u16 command_num;
+			u8 is_ex:1;
+			u8 has_udata:1;
+			u8 has_resp:1;
+			u8 req_size;
+			u8 resp_size;
+		} write;
+	};
+
+	union {
+		bool (*func_is_supported)(struct ib_device *device);
+		int (*func_write)(struct uverbs_attr_bundle *attrs);
+		const struct uapi_definition *chain;
+		const struct uverbs_object_def *chain_obj_tree;
+		size_t needs_fn_offset;
+	};
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...)                                 \
+	{                                                                      \
+		.kind = UAPI_DEF_OBJECT_START,                                 \
+		.object_start = { .object_id = _object_id },                   \
+	},                                                                     \
+		##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...)              \
+	{                                                                      \
+		.kind = UAPI_DEF_WRITE,                                        \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.write = { .is_ex = 0, .command_num = _command_num },          \
+		.func_write = _func,                                           \
+		_cmd_desc,                                                     \
+	},                                                                     \
+		##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...)           \
+	{                                                                      \
+		.kind = UAPI_DEF_WRITE,                                        \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.write = { .is_ex = 1, .command_num = _command_num },          \
+		.func_write = _func,                                           \
+		_cmd_desc,                                                     \
+	},                                                                     \
+		##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn)                                        \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.needs_fn_offset =                                             \
+			offsetof(struct ib_device_ops, ibdev_fn) +             \
+			BUILD_BUG_ON_ZERO(                                     \
+			    sizeof(((struct ib_device_ops *)0)->ibdev_fn) !=   \
+			    sizeof(void *)),				       \
+	}
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn)                                     \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+		.scope = UAPI_SCOPE_METHOD,                                    \
+		.needs_fn_offset =                                             \
+			offsetof(struct ib_device_ops, ibdev_fn) +             \
+			BUILD_BUG_ON_ZERO(                                     \
+			    sizeof(((struct ib_device_ops *)0)->ibdev_fn) !=   \
+			    sizeof(void *)),                                   \
+	}
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func)                                       \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_FUNC,                            \
+		.scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func,        \
+	}
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var)                                               \
+	{                                                                      \
+		.kind = UAPI_DEF_CHAIN, .chain = _def_var,                     \
+	}
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...)                \
+	{                                                                      \
+		.kind = UAPI_DEF_CHAIN_OBJ_TREE,                               \
+		.object_start = { .object_id = _object_enum },                 \
+		.chain_obj_tree = _object_ptr,                                 \
+	},								       \
+		##__VA_ARGS__
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...)                       \
+	UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum),    \
+				##__VA_ARGS__)
+
 /*
  * =======================================
  *	Attribute Specifications
@@ -323,6 +504,33 @@
 #define UA_MANDATORY .mandatory = 1
 #define UA_OPTIONAL .mandatory = 0
 
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095.  Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+			     ...)                                              \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_min_len) == 0 ||                     \
+					(_max_len) >                           \
+						PAGE_SIZE / sizeof(void *) ||  \
+					(_min_len) > (_max_len) ||             \
+					(_access) == UVERBS_ACCESS_NEW ||      \
+					(_access) == UVERBS_ACCESS_DESTROY),   \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY,                 \
+			  .u2.objs_arr.obj_type = _idr_type,                   \
+			  .u2.objs_arr.access = _access,                       \
+			  .u2.objs_arr.min_len = _min_len,                     \
+			  .u2.objs_arr.max_len = _max_len,                     \
+			  __VA_ARGS__ } })
+
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
 	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
@@ -365,6 +573,15 @@
 			  __VA_ARGS__ },                                       \
 	})
 
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(                                              \
+			sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)),  \
+			sizeof(u64)),                                          \
+		__VA_ARGS__)
+
 /*
  * An input value that is a bitwise combination of values of _enum_type.
  * This permits the flag value to be passed as either a u32 or u64, it must
@@ -386,25 +603,12 @@
 #define UVERBS_ATTR_UHW()                                                      \
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
 			   UVERBS_ATTR_MIN_SIZE(0),			       \
-			   UA_OPTIONAL),				       \
+			   UA_OPTIONAL,                                        \
+			   .is_udata = 1),				       \
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
 			    UVERBS_ATTR_MIN_SIZE(0),			       \
-			    UA_OPTIONAL)
-
-/*
- * =======================================
- *	Declaration helpers
- * =======================================
- */
-
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)                                 \
-	static const struct uverbs_object_def *const _name##_ptr[] = {         \
-		__VA_ARGS__,                                                   \
-	};                                                                     \
-	static const struct uverbs_object_tree_def _name = {                   \
-		.num_objects = ARRAY_SIZE(_name##_ptr),                        \
-		.objects = &_name##_ptr,                                       \
-	}
+			    UA_OPTIONAL,                                       \
+			    .is_udata = 1)
 
 /* =================================================
  *              Parsing infrastructure
@@ -431,15 +635,24 @@
 	const struct uverbs_api_attr	*attr_elm;
 };
 
+struct uverbs_objs_arr_attr {
+	struct ib_uobject **uobjects;
+	u16 len;
+};
+
 struct uverbs_attr {
 	union {
 		struct uverbs_ptr_attr	ptr_attr;
 		struct uverbs_obj_attr	obj_attr;
+		struct uverbs_objs_arr_attr objs_arr_attr;
 	};
 };
 
 struct uverbs_attr_bundle {
+	struct ib_udata driver_udata;
+	struct ib_udata ucore;
 	struct ib_uverbs_file *ufile;
+	struct ib_ucontext *context;
 	DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
 	struct uverbs_attr attrs[];
 };
@@ -451,6 +664,23 @@
 			attrs_bundle->attr_present);
 }
 
+/**
+ * rdma_udata_to_drv_context - Helper macro to get the driver's context out of
+ *                             ib_udata which is embedded in uverbs_attr_bundle.
+ *
+ * If udata is not NULL this cannot fail. Otherwise a NULL udata will result
+ * in a NULL ucontext pointer, as a safety precaution. Callers should be using
+ * 'udata' to determine if the driver call is in user or kernel mode, not
+ * 'ucontext'.
+ *
+ */
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member)               \
+	(udata ? container_of(container_of(udata, struct uverbs_attr_bundle,   \
+					   driver_udata)                       \
+				      ->context,                               \
+			      drv_dev_struct, member) :                        \
+		 (drv_dev_struct *)NULL)
+
 #define IS_UVERBS_COPY_ERR(_ret)		((_ret) && (_ret) != -ENOENT)
 
 static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
@@ -507,6 +737,53 @@
 	return attr->ptr_attr.len;
 }
 
+/*
+ * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr
+ * attribute.
+ * @attrs: The attribute bundle
+ * @idx: The ID of the attribute
+ * @elem_size: The size of the element in the array
+ */
+static inline int
+uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx,
+			       size_t elem_size)
+{
+	int size = uverbs_attr_get_len(attrs, idx);
+
+	if (size < 0)
+		return size;
+
+	if (size % elem_size)
+		return -EINVAL;
+
+	return size / elem_size;
+}
+
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ *       the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+	struct ib_uobject ***arr)
+{
+	const struct uverbs_attr *attr =
+			uverbs_attr_get(attrs_bundle, attr_idx);
+
+	if (IS_ERR(attr)) {
+		*arr = NULL;
+		return 0;
+	}
+
+	*arr = attr->objs_arr_attr.uobjects;
+
+	return attr->objs_arr_attr.len;
+}
+
 static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
 {
 	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -582,6 +859,12 @@
 #define uverbs_copy_from_or_zero(to, attrs_bundle, idx)			      \
 	_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
 
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+	return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
 		       size_t idx, u64 allowed_bits);
@@ -603,6 +886,11 @@
 {
 	return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
 }
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		      size_t idx, s64 lower_bound, u64 upper_bound,
+		      s64 *def_val);
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+				  size_t idx, const void *from, size_t size);
 #else
 static inline int
 uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +919,40 @@
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		  size_t idx, s64 lower_bound, u64 upper_bound,
+		  s64 *def_val)
+{
+	return -EINVAL;
+}
+static inline int
+uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+			      size_t idx, const void *from, size_t size)
+{
+	return -EINVAL;
+}
 #endif
 
+#define uverbs_get_const(_to, _attrs_bundle, _idx)                             \
+	({                                                                     \
+		s64 _val;                                                      \
+		int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx,       \
+					     type_min(typeof(*_to)),           \
+					     type_max(typeof(*_to)), NULL);    \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default)           \
+	({                                                                     \
+		s64 _val;                                                      \
+		s64 _def_val = _default;                                       \
+		int _ret =                                                     \
+			_uverbs_get_const(&_val, _attrs_bundle, _idx,          \
+					  type_min(typeof(*_to)),              \
+					  type_max(typeof(*_to)), &_def_val);  \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
 #endif
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index b3b2173..3447bfe 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,7 +43,7 @@
 #define _UVERBS_NAME(x, y)	_UVERBS_PASTE(x, y)
 #define UVERBS_METHOD(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
 #define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
-#define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
+#define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
 
 /* These are static so they do not need to be qualified */
 #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
@@ -102,18 +102,11 @@
 #define ADD_UVERBS_METHODS(_name, _object_id, ...)                             \
 	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
 		_object_id)[] = { __VA_ARGS__ };                               \
-	static const struct uverbs_object_def _name##_struct = {               \
+	static const struct uverbs_object_def _name = {                        \
 		.id = _object_id,                                              \
 		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
 		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
-	};                                                                     \
-	static const struct uverbs_object_def *const _name##_ptrs[] = {        \
-		&_name##_struct,                                               \
-	};                                                                     \
-	static const struct uverbs_object_tree_def _name = {                   \
-		.num_objects = 1,                                              \
-		.objects = &_name##_ptrs,                                      \
-	}
+	};
 
 /* Used by drivers to declare a complete parsing tree for a single method that
  * differs only in having additional driver specific attributes.
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231..05eabfd 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,15 +37,6 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/ib_user_ioctl_verbs.h>
 
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
-#else
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
-	return NULL;
-}
-#endif
-
 /* Returns _id, or causes a compile error if _id is not a u32.
  *
  * The uobj APIs should only be used with the write based uAPI to access
@@ -54,17 +45,18 @@
  */
 #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
 
-#define uobj_get_type(_ufile, _object)                                         \
-	uapi_get_object((_ufile)->device->uapi, _object)
+#define uobj_get_type(_attrs, _object)                                         \
+	uapi_get_object((_attrs)->ufile->device->uapi, _object)
 
-#define uobj_get_read(_type, _id, _ufile)                                      \
-	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
-				_uobj_check_id(_id), UVERBS_LOOKUP_READ)
+#define uobj_get_read(_type, _id, _attrs)                                      \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+				_uobj_check_id(_id), UVERBS_LOOKUP_READ,       \
+				_attrs)
 
-#define ufd_get_read(_type, _fdnum, _ufile)                                    \
-	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+#define ufd_get_read(_type, _fdnum, _attrs)                                    \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
 				(_fdnum)*typecheck(s32, _fdnum),               \
-				UVERBS_LOOKUP_READ)
+				UVERBS_LOOKUP_READ, _attrs)
 
 static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 {
@@ -72,26 +64,27 @@
 		return NULL;
 	return uobj->object;
 }
-#define uobj_get_obj_read(_object, _type, _id, _ufile)                         \
+#define uobj_get_obj_read(_object, _type, _id, _attrs)                         \
 	((struct ib_##_object *)_uobj_get_obj_read(                            \
-		uobj_get_read(_type, _id, _ufile)))
+		uobj_get_read(_type, _id, _attrs)))
 
-#define uobj_get_write(_type, _id, _ufile)                                     \
-	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
-				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
+#define uobj_get_write(_type, _id, _attrs)                                     \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE,      \
+				_attrs)
 
 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
-			   struct ib_uverbs_file *ufile, int success_res);
-#define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
-	__uobj_perform_destroy(uobj_get_type(_ufile, _type),                   \
-			       _uobj_check_id(_id), _ufile, _success_res)
+			   struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs)                               \
+	__uobj_perform_destroy(uobj_get_type(_attrs, _type),                   \
+			       _uobj_check_id(_id), _attrs)
 
 struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
-				      u32 id, struct ib_uverbs_file *ufile);
+				      u32 id, struct uverbs_attr_bundle *attrs);
 
-#define uobj_get_destroy(_type, _id, _ufile)                                   \
-	__uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id),  \
-			   _ufile)
+#define uobj_get_destroy(_type, _id, _attrs)                                   \
+	__uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id),  \
+			   _attrs)
 
 static inline void uobj_put_destroy(struct ib_uobject *uobj)
 {
@@ -111,34 +104,99 @@
 	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
-						 int success_res)
+static inline int __must_check
+uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
 {
-	int ret = rdma_alloc_commit_uobject(uobj);
+	int ret = rdma_alloc_commit_uobject(uobj, attrs);
 
 	if (ret)
 		return ret;
-	return success_res;
+	return 0;
 }
 
-static inline void uobj_alloc_abort(struct ib_uobject *uobj)
+static inline void uobj_alloc_abort(struct ib_uobject *uobj,
+				    struct uverbs_attr_bundle *attrs)
 {
-	rdma_alloc_abort_uobject(uobj);
+	rdma_alloc_abort_uobject(uobj, attrs);
 }
 
 static inline struct ib_uobject *
-__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
-	     struct ib_device **ib_dev)
+__uobj_alloc(const struct uverbs_api_object *obj,
+	     struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
 {
-	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+	struct ib_uobject *uobj =
+		rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
 
 	if (!IS_ERR(uobj))
-		*ib_dev = uobj->context->device;
+		*ib_dev = attrs->context->device;
 	return uobj;
 }
 
-#define uobj_alloc(_type, _ufile, _ib_dev)                                     \
-	__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+#define uobj_alloc(_type, _attrs, _ib_dev)                                     \
+	__uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
+
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+						  struct ib_uobject *uobj,
+						  struct ib_device *ib_dev,
+						  enum ib_flow_action_type type)
+{
+	atomic_set(&action->usecnt, 0);
+	action->device = ib_dev;
+	action->type = type;
+	action->uobject = uobj;
+	uobj->object = action;
+}
+
+struct ib_uflow_resources {
+	size_t			max;
+	size_t			num;
+	size_t			collection_num;
+	size_t			counters_num;
+	struct ib_counters	**counters;
+	struct ib_flow_action	**collection;
+};
+
+struct ib_uflow_object {
+	struct ib_uobject		uobject;
+	struct ib_uflow_resources	*resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+			enum ib_flow_spec_type type,
+			void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+			       struct ib_qp *qp, struct ib_device *device,
+			       struct ib_uflow_resources *uflow_res)
+{
+	struct ib_uflow_object *uflow;
+
+	uobj->object = ibflow;
+	ibflow->uobject = uobj;
+
+	if (qp) {
+		atomic_inc(&qp->usecnt);
+		ibflow->qp = qp;
+	}
+
+	ibflow->device = device;
+	uflow = container_of(uobj, typeof(*uflow), uobject);
+	uflow->resources = uflow_res;
+}
+
+struct uverbs_api_object {
+	const struct uverbs_obj_type *type_attrs;
+	const struct uverbs_obj_type_class *type_class;
+	u8 disabled:1;
+	u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+	return uobj->uapi_object->id;
+}
 
 #endif
 
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index acb1bfa..d57a5ba 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -95,7 +95,8 @@
 	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
 	/* This does not consume the kref on uobj */
 	int __must_check (*destroy_hw)(struct ib_uobject *uobj,
-				       enum rdma_remove_reason why);
+				       enum rdma_remove_reason why,
+				       struct uverbs_attr_bundle *attrs);
 	void (*remove_handle)(struct ib_uobject *uobj);
 	u8    needs_kfree_rcu;
 };
@@ -126,18 +127,23 @@
 	 * completely unchanged.
 	 */
 	int __must_check (*destroy_object)(struct ib_uobject *uobj,
-					   enum rdma_remove_reason why);
+					   enum rdma_remove_reason why,
+					   struct uverbs_attr_bundle *attrs);
 };
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 					   struct ib_uverbs_file *ufile, s64 id,
-					   enum rdma_lookup_mode mode);
+					   enum rdma_lookup_mode mode,
+					   struct uverbs_attr_bundle *attrs);
 void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 			     enum rdma_lookup_mode mode);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
-					    struct ib_uverbs_file *ufile);
-void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
+					    struct ib_uverbs_file *ufile,
+					    struct uverbs_attr_bundle *attrs);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+			      struct uverbs_attr_bundle *attrs);
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+					   struct uverbs_attr_bundle *attrs);
 
 struct uverbs_obj_fd_type {
 	/*
@@ -157,6 +163,7 @@
 
 extern const struct uverbs_obj_type_class uverbs_idr_class;
 extern const struct uverbs_obj_type_class uverbs_fd_class;
+void uverbs_close_fd(struct file *f);
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))