Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 4f385ec..fe2fc9e 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -36,6 +36,8 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
struct ib_addr {
union {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908..2734c89 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
#include <net/ip.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
-#include <net/ipv6.h>
#include <net/net_namespace.h>
/**
@@ -95,20 +94,18 @@
* @timeout_ms: Amount of time to wait for the address resolution to complete.
* @callback: Call invoked once address resolution has completed, timed out,
* or been canceled. A status of 0 indicates success.
+ * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from
+ * rdma_dev_addr.
* @context: User-specified context associated with the call.
*/
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr, int timeout_ms,
+ struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
- void *context);
+ bool resolve_by_gid_attr, void *context);
void rdma_addr_cancel(struct rdma_dev_addr *addr);
-void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
- const struct net_device *dev,
- const unsigned char *dst_dev_addr);
-
int rdma_addr_size(const struct sockaddr *addr);
int rdma_addr_size_in6(struct sockaddr_in6 *addr);
int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 62e990b..870b5e6 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -54,6 +54,10 @@
void *),
void *context);
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
+ u16 *vlan_id, u8 *smac);
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
/**
* ib_get_cached_pkey - Returns a cached PKey table entry
* @device: The device to query.
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5..49f4f75 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@
struct sa_path_rec *path;
const struct ib_gid_attr *sgid_attr;
__be64 service_id;
- int timeout_ms;
+ unsigned long timeout_ms;
const void *private_data;
u8 private_data_len;
u8 max_cm_retries;
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
index f62b842..f8982e4 100644
--- a/include/rdma/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -88,6 +88,6 @@
int list_len,
u64 io_virtual_address);
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
#endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 6e35416..9a90bd0 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -100,6 +100,8 @@
__be64 compare_data; /* potentially unaligned */
} __packed;
+#include <rdma/tid_rdma_defs.h>
+
union ib_ehdrs {
struct {
__be32 deth[2];
@@ -117,6 +119,16 @@
__be32 aeth;
__be32 ieth;
struct ib_atomic_eth atomic_eth;
+ /* TID RDMA headers */
+ union {
+ struct tid_rdma_read_req r_req;
+ struct tid_rdma_read_resp r_rsp;
+ struct tid_rdma_write_req w_req;
+ struct tid_rdma_write_resp w_rsp;
+ struct tid_rdma_write_data w_data;
+ struct tid_rdma_resync resync;
+ struct tid_rdma_ack ack;
+ } tid_rdma;
} __packed;
struct ib_other_headers {
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index f6ba366..eea946f 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -198,7 +198,7 @@
__be16 attr_offset;
__be16 reserved;
ib_sa_comp_mask comp_mask;
-} __attribute__ ((packed));
+} __packed;
struct ib_mad {
struct ib_mad_hdr mad_hdr;
@@ -227,7 +227,7 @@
struct ib_rmpp_hdr rmpp_hdr;
struct ib_sa_hdr sa_hdr;
u8 data[IB_MGMT_SA_DATA];
-} __attribute__ ((packed));
+} __packed;
struct ib_vendor_mad {
struct ib_mad_hdr mad_hdr;
@@ -277,6 +277,7 @@
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_PORT_CAP_MASK2_SUP = 1 << 15,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
@@ -295,6 +296,15 @@
IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
};
+enum ib_port_capability_mask2_bits {
+ IB_PORT_SET_NODE_DESC_SUP = 1 << 0,
+ IB_PORT_EX_PORT_INFO_EX_SUP = 1 << 1,
+ IB_PORT_VIRT_SUP = 1 << 2,
+ IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3,
+ IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4,
+ IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5,
+};
+
#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
struct opa_class_port_info {
@@ -606,12 +616,11 @@
void *context;
u32 hi_tid;
u32 flags;
+ void *security;
+ struct list_head mad_agent_sec_list;
u8 port_num;
u8 rmpp_version;
- void *security;
bool smp_allowed;
- bool lsm_nb_reg;
- struct notifier_block lsm_nb;
};
/**
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a..1952097 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@
void ib_sa_cancel_query(int id, struct ib_sa_query *query);
-int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct sa_path_rec *resp,
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+ u8 port_num, struct sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status, struct sa_path_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **query);
+ void *context, struct ib_sa_query **query);
int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ struct ib_device *device, u8 port_num, u8 method,
+ struct ib_sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+ gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_service_rec *resp,
+ void *context),
+ void *context, struct ib_sa_query **sa_query);
struct ib_sa_multicast {
struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ void *context, struct ib_sa_query **sa_query);
bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
struct ib_device *device,
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index b439e98..7be0028 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -61,7 +61,7 @@
u8 data[IB_SMP_DATA_SIZE];
u8 initial_path[IB_SMP_MAX_PATH_HOPS];
u8 return_path[IB_SMP_MAX_PATH_HOPS];
-} __attribute__ ((packed));
+} __packed;
#define IB_SMP_DIRECTION cpu_to_be16(0x8000)
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd638..a91b2af 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,65 +36,57 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
struct ib_ucontext;
struct ib_umem_odp;
struct ib_umem {
- struct ib_ucontext *context;
+ struct ib_device *ibdev;
+ struct mm_struct *owning_mm;
size_t length;
unsigned long address;
- int page_shift;
- int writable;
- int hugetlb;
+ u32 writable : 1;
+ u32 is_odp : 1;
struct work_struct work;
- struct mm_struct *mm;
- unsigned long diff;
- struct ib_umem_odp *odp_data;
struct sg_table sg_head;
int nmap;
- int npages;
+ unsigned int sg_nents;
};
/* Returns the offset of the umem start relative to the first page. */
static inline int ib_umem_offset(struct ib_umem *umem)
{
- return umem->address & (BIT(umem->page_shift) - 1);
-}
-
-/* Returns the first page of an ODP umem. */
-static inline unsigned long ib_umem_start(struct ib_umem *umem)
-{
- return umem->address - ib_umem_offset(umem);
-}
-
-/* Returns the address of the page after the last one of an ODP umem. */
-static inline unsigned long ib_umem_end(struct ib_umem *umem)
-{
- return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
+ return umem->address & ~PAGE_MASK;
}
static inline size_t ib_umem_num_pages(struct ib_umem *umem)
{
- return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift;
+ return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
+ ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
+ PAGE_SHIFT;
}
#ifdef CONFIG_INFINIBAND_USER_MEM
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt);
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
-static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+static inline struct ib_umem *ib_umem_get(struct ib_udata *udata,
unsigned long addr, size_t size,
- int access, int dmasync) {
+ int access, int dmasync)
+{
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
@@ -103,6 +95,12 @@
size_t length) {
return -EINVAL;
}
+static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt) {
+ return -EINVAL;
+}
+
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5..253df1a 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -37,12 +37,10 @@
#include <rdma/ib_verbs.h>
#include <linux/interval_tree.h>
-struct umem_odp_node {
- u64 __subtree_last;
- struct rb_node rb;
-};
-
struct ib_umem_odp {
+ struct ib_umem umem;
+ struct ib_ucontext_per_mm *per_mm;
+
/*
* An array of the pages included in the on-demand paging umem.
* Indices of pages that are currently not mapped into the device will
@@ -64,33 +62,49 @@
struct mutex umem_mutex;
void *private; /* for the HW driver to use. */
- /* When false, use the notifier counter in the ucontext struct. */
- bool mn_counters_active;
int notifiers_seq;
int notifiers_count;
-
- /* A linked list of umems that don't have private mmu notifier
- * counters yet. */
- struct list_head no_private_counters;
- struct ib_umem *umem;
+ int npages;
/* Tree tracking */
- struct umem_odp_node interval_tree;
+ struct interval_tree_node interval_tree;
+
+ /*
+ * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+ * only as an anchor for the driver to hold onto the per_mm. FIXME:
+ * This should be removed and drivers should work with the per_mm
+ * directly.
+ */
+ bool is_implicit_odp;
struct completion notifier_completion;
int dying;
+ unsigned int page_shift;
struct work_struct work;
};
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+ return container_of(umem, struct ib_umem_odp, umem);
+}
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
- int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size);
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
+{
+ return umem_odp->interval_tree.start;
+}
-void ib_umem_odp_release(struct ib_umem *umem);
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
+{
+ return umem_odp->interval_tree.last + 1;
+}
+
+static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
+{
+ return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >>
+ umem_odp->page_shift;
+}
/*
* The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +119,33 @@
#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
- u64 access_mask, unsigned long current_seq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+struct ib_ucontext_per_mm {
+ struct mmu_notifier mn;
+ struct pid *tgid;
+
+ struct rb_root_cached umem_tree;
+ /* Protects umem_tree */
+ struct rw_semaphore umem_rwsem;
+};
+
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+ size_t size, int access);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+ int access);
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
+ unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
+
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
void *cookie);
/*
* Call the callback on each ib_umem in the range. Returns the logical or of
@@ -126,49 +160,45 @@
* Find first region intersecting with address range.
* Return NULL if not found
*/
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
- u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+ struct interval_tree_node *node;
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+ node = interval_tree_iter_first(root, addr, addr + length - 1);
+ if (!node)
+ return NULL;
+ return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq)
{
/*
* This code is strongly based on the KVM code from
* mmu_notifier_retry. Should be called with
- * the relevant locks taken (item->odp_data->umem_mutex
+ * the relevant locks taken (umem_odp->umem_mutex
* and the ucontext umem_mutex semaphore locked for read).
*/
- /* Do not allow page faults while the new ib_umem hasn't seen a state
- * with zero notifiers yet, and doesn't have its own valid set of
- * private counters. */
- if (!item->odp_data->mn_counters_active)
+ if (unlikely(umem_odp->notifiers_count))
return 1;
-
- if (unlikely(item->odp_data->notifiers_count))
- return 1;
- if (item->odp_data->notifiers_seq != mmu_seq)
+ if (umem_odp->notifiers_seq != mmu_seq)
return 1;
return 0;
}
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline int ib_umem_odp_get(struct ib_ucontext *context,
- struct ib_umem *umem,
- int access)
-{
- return -EINVAL;
-}
-
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
+ unsigned long addr,
+ size_t size, int access)
{
return ERR_PTR(-EINVAL);
}
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ec299fc..e7e733a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -41,14 +41,11 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
-#include <linux/scatterlist.h>
#include <linux/workqueue.h>
-#include <linux/socket.h>
#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
@@ -56,21 +53,98 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-
+#include <linux/refcount.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/dim.h>
#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <rdma/restrack.h>
+#include <rdma/signature.h>
#include <uapi/rdma/rdma_user_ioctl.h>
#include <uapi/rdma/ib_user_ioctl_verbs.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
+struct ib_umem_odp;
+
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
+
+__printf(3, 4) __cold
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
+ const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define ibdev_dbg(__dev, format, args...) \
+ dynamic_ibdev_dbg(__dev, format, ##args)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
union ib_gid {
u8 raw[16];
@@ -92,7 +166,7 @@
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- struct net_device *ndev;
+ struct net_device __rcu *ndev;
struct ib_device *device;
union ib_gid gid;
enum ib_gid_type gid_type;
@@ -100,16 +174,6 @@
u8 port_num;
};
-enum rdma_node_type {
- /* IB values map to NodeInfo:NodeType. */
- RDMA_NODE_IB_CA = 1,
- RDMA_NODE_IB_SWITCH,
- RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC,
- RDMA_NODE_USNIC,
- RDMA_NODE_USNIC_UDP,
-};
-
enum {
/* set the local administered indication */
IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2,
@@ -119,7 +183,8 @@
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_USNIC,
- RDMA_TRANSPORT_USNIC_UDP
+ RDMA_TRANSPORT_USNIC_UDP,
+ RDMA_TRANSPORT_UNSPECIFIED,
};
enum rdma_protocol_type {
@@ -130,7 +195,7 @@
};
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type);
+rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
@@ -229,7 +294,7 @@
*/
IB_DEVICE_CROSS_CHANNEL = (1 << 27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
+ IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
@@ -238,17 +303,7 @@
IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
-};
-
-enum ib_signature_prot_cap {
- IB_PROT_T10DIF_TYPE_1 = 1,
- IB_PROT_T10DIF_TYPE_2 = 1 << 1,
- IB_PROT_T10DIF_TYPE_3 = 1 << 2,
-};
-
-enum ib_signature_guard_cap {
- IB_GUARD_T10DIF_CRC = 1,
- IB_GUARD_T10DIF_CSUM = 1 << 1,
+ IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
};
enum ib_atomic_cap {
@@ -268,6 +323,7 @@
IB_ODP_SUPPORT_WRITE = 1 << 2,
IB_ODP_SUPPORT_READ = 1 << 3,
IB_ODP_SUPPORT_ATOMIC = 1 << 4,
+ IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
};
struct ib_odp_caps {
@@ -276,6 +332,7 @@
uint32_t rc_odp_caps;
uint32_t uc_odp_caps;
uint32_t ud_odp_caps;
+ uint32_t xrc_odp_caps;
} per_transport_caps;
};
@@ -290,8 +347,8 @@
};
enum ib_tm_cap_flags {
- /* Support tag matching on RC transport */
- IB_TM_CAP_RC = 1 << 0,
+ /* Support tag matching with rendezvous offload for RC transport */
+ IB_TM_CAP_RNDV_RC = 1 << 0,
};
struct ib_tm_caps {
@@ -309,7 +366,7 @@
struct ib_cq_init_attr {
unsigned int cqe;
- int comp_vector;
+ u32 comp_vector;
u32 flags;
};
@@ -374,6 +431,7 @@
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ unsigned int max_pi_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
@@ -432,8 +490,19 @@
IB_PORT_ACTIVE_DEFER = 5
};
+enum ib_port_phys_state {
+ IB_PORT_PHYS_STATE_SLEEP = 1,
+ IB_PORT_PHYS_STATE_POLLING = 2,
+ IB_PORT_PHYS_STATE_DISABLED = 3,
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
enum ib_port_width {
IB_WIDTH_1X = 1,
+ IB_WIDTH_2X = 16,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
@@ -443,6 +512,7 @@
{
switch (width) {
case IB_WIDTH_1X: return 1;
+ case IB_WIDTH_2X: return 2;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
@@ -592,6 +662,7 @@
u8 active_width;
u8 active_speed;
u8 phys_state;
+ u16 port_cap_flags2;
};
enum ib_device_modify_flags {
@@ -729,7 +800,11 @@
IB_RATE_25_GBPS = 15,
IB_RATE_100_GBPS = 16,
IB_RATE_200_GBPS = 17,
- IB_RATE_300_GBPS = 18
+ IB_RATE_300_GBPS = 18,
+ IB_RATE_28_GBPS = 19,
+ IB_RATE_50_GBPS = 20,
+ IB_RATE_400_GBPS = 21,
+ IB_RATE_600_GBPS = 22,
};
/**
@@ -752,118 +827,26 @@
* enum ib_mr_type - memory region type
* @IB_MR_TYPE_MEM_REG: memory region that is used for
* normal registration
- * @IB_MR_TYPE_SIGNATURE: memory region that is used for
- * signature operations (data-integrity
- * capable regions)
* @IB_MR_TYPE_SG_GAPS: memory region that is capable to
* register any arbitrary sg lists (without
* the normal mr constraints - see
* ib_map_mr_sg)
+ * @IB_MR_TYPE_DM: memory region that is used for device
+ * memory registration
+ * @IB_MR_TYPE_USER: memory region that is used for the user-space
+ * application
+ * @IB_MR_TYPE_DMA: memory region that is used for DMA operations
+ * without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY: memory region that is used for
+ * data integrity operations
*/
enum ib_mr_type {
IB_MR_TYPE_MEM_REG,
- IB_MR_TYPE_SIGNATURE,
IB_MR_TYPE_SG_GAPS,
-};
-
-/**
- * Signature types
- * IB_SIG_TYPE_NONE: Unprotected.
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
- */
-enum ib_signature_type {
- IB_SIG_TYPE_NONE,
- IB_SIG_TYPE_T10_DIF,
-};
-
-/**
- * Signature T10-DIF block-guard types
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
- */
-enum ib_t10_dif_bg_type {
- IB_T10DIF_CRC,
- IB_T10DIF_CSUM
-};
-
-/**
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
- * domain.
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
- * @pi_interval: protection information interval.
- * @bg: seed of guard computation.
- * @app_tag: application tag of guard block
- * @ref_tag: initial guard block reference tag.
- * @ref_remap: Indicate wethear the reftag increments each block
- * @app_escape: Indicate to skip block check if apptag=0xffff
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
- * @apptag_check_mask: check bitmask of application tag.
- */
-struct ib_t10_dif_domain {
- enum ib_t10_dif_bg_type bg_type;
- u16 pi_interval;
- u16 bg;
- u16 app_tag;
- u32 ref_tag;
- bool ref_remap;
- bool app_escape;
- bool ref_escape;
- u16 apptag_check_mask;
-};
-
-/**
- * struct ib_sig_domain - Parameters for signature domain
- * @sig_type: specific signauture type
- * @sig: union of all signature domain attributes that may
- * be used to set domain layout.
- */
-struct ib_sig_domain {
- enum ib_signature_type sig_type;
- union {
- struct ib_t10_dif_domain dif;
- } sig;
-};
-
-/**
- * struct ib_sig_attrs - Parameters for signature handover operation
- * @check_mask: bitmask for signature byte check (8 bytes)
- * @mem: memory domain layout desciptor.
- * @wire: wire domain layout desciptor.
- */
-struct ib_sig_attrs {
- u8 check_mask;
- struct ib_sig_domain mem;
- struct ib_sig_domain wire;
-};
-
-enum ib_sig_err_type {
- IB_SIG_BAD_GUARD,
- IB_SIG_BAD_REFTAG,
- IB_SIG_BAD_APPTAG,
-};
-
-/**
- * Signature check masks (8 bytes in total) according to the T10-PI standard:
- * -------- -------- ------------
- * | GUARD | APPTAG | REFTAG |
- * | 2B | 2B | 4B |
- * -------- -------- ------------
- */
-enum {
- IB_SIG_CHECK_GUARD = 0xc0,
- IB_SIG_CHECK_APPTAG = 0x30,
- IB_SIG_CHECK_REFTAG = 0x0f,
-};
-
-/**
- * struct ib_sig_err - signature error descriptor
- */
-struct ib_sig_err {
- enum ib_sig_err_type err_type;
- u32 expected;
- u32 actual;
- u64 sig_err_offset;
- u32 key;
+ IB_MR_TYPE_DM,
+ IB_MR_TYPE_USER,
+ IB_MR_TYPE_DMA,
+ IB_MR_TYPE_INTEGRITY,
};
enum ib_mr_status_check {
@@ -1120,7 +1103,7 @@
IB_QP_CREATE_MANAGED_SEND = 1 << 3,
IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
- IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
+ IB_QP_CREATE_INTEGRITY_EN = 1 << 6,
/* FREE = 1 << 7, */
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
@@ -1137,7 +1120,9 @@
*/
struct ib_qp_init_attr {
+ /* Consumer's event_handler callback must not block */
void (*event_handler)(struct ib_event *, void *);
+
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
@@ -1146,7 +1131,7 @@
struct ib_qp_cap cap;
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
- enum ib_qp_create_flags create_flags;
+ u32 create_flags;
/*
* Only needed for special QP types, or when using the RW API.
@@ -1297,7 +1282,7 @@
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
- IB_WR_REG_SIG_MR,
+ IB_WR_REG_MR_INTEGRITY,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
@@ -1407,20 +1392,6 @@
return container_of(wr, struct ib_reg_wr, wr);
}
-struct ib_sig_handover_wr {
- struct ib_send_wr wr;
- struct ib_sig_attrs *sig_attrs;
- struct ib_mr *sig_mr;
- int access_flags;
- struct ib_sge *prot;
-};
-
-static inline const struct ib_sig_handover_wr *
-sig_handover_wr(const struct ib_send_wr *wr)
-{
- return container_of(wr, struct ib_sig_handover_wr, wr);
-}
-
struct ib_recv_wr {
struct ib_recv_wr *next;
union {
@@ -1491,29 +1462,15 @@
* it is set when we are closing the file descriptor and indicates
* that mm_sem may be locked.
*/
- int closing;
+ bool closing;
bool cleanup_retryable;
- struct pid *tgid;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct rb_root_cached umem_tree;
- /*
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
- * mmu notifiers registration.
- */
- struct rw_semaphore umem_rwsem;
- void (*invalidate_range)(struct ib_umem *umem,
- unsigned long start, unsigned long end);
-
- struct mmu_notifier mn;
- atomic_t notifier_count;
- /* A list of umems that don't have private mmu notifier counters yet. */
- struct list_head no_private_counters;
- int odp_mrs_count;
-#endif
-
struct ib_rdmacg_object cg_obj;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_uobject {
@@ -1576,9 +1533,10 @@
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
- IB_POLL_DIRECT, /* caller context, no hw completions */
- IB_POLL_SOFTIRQ, /* poll from softirq context */
- IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_DIRECT, /* caller context, no hw completions */
+ IB_POLL_SOFTIRQ, /* poll from softirq context */
+ IB_POLL_WORKQUEUE, /* poll from workqueue */
+ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
};
struct ib_cq {
@@ -1595,6 +1553,8 @@
struct irq_poll iop;
struct work_struct work;
};
+ struct workqueue_struct *comp_wq;
+ struct dim *dim;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1779,10 +1739,14 @@
struct ib_qp_security *qp_sec;
u8 port;
+ bool integrity_en;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+
+ /* The counter the qp is bind to */
+ struct rdma_counter *counter;
};
struct ib_dm {
@@ -1801,6 +1765,7 @@
u64 iova;
u64 length;
unsigned int page_size;
+ enum ib_mr_type type;
bool need_inval;
union {
struct ib_uobject *uobject; /* user */
@@ -1808,7 +1773,7 @@
};
struct ib_dm *dm;
-
+ struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2182,11 +2147,8 @@
struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
- struct ib_port_cache *ports;
};
-struct iw_cm_verbs;
-
struct ib_port_immutable {
int pkey_tbl_len;
int gid_tbl_len;
@@ -2194,6 +2156,23 @@
u32 max_mad_size;
};
+struct ib_port_data {
+ struct ib_device *ib_dev;
+
+ struct ib_port_immutable immutable;
+
+ spinlock_t pkey_list_lock;
+ struct list_head pkey_list;
+
+ struct ib_port_cache cache;
+
+ spinlock_t netdev_lock;
+ struct net_device __rcu *netdev;
+ struct hlist_node ndev_hash_link;
+ struct rdma_port_counter port_counter;
+ struct rdma_hw_stats *hw_stats;
+};
+
/* rdma netdev type - specifies protocol type */
enum rdma_netdev_t {
RDMA_NETDEV_OPA_VNIC,
@@ -2229,10 +2208,14 @@
union ib_gid *gid, u16 mlid);
};
-struct ib_port_pkey_list {
- /* Lock to hold while modifying the list. */
- spinlock_t list_lock;
- struct list_head pkey_list;
+struct rdma_netdev_alloc_params {
+ size_t sizeof_priv;
+ unsigned int txqs;
+ unsigned int rxqs;
+ void *param;
+
+ int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+ struct net_device *netdev, void *param);
};
struct ib_counters {
@@ -2249,33 +2232,253 @@
};
struct uverbs_attr_bundle;
+struct iw_cm_id;
+struct iw_cm_conn_param;
-struct ib_device {
- /* Do not access @dma_device directly from ULP nor from HW drivers. */
- struct device *dma_device;
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
+ .size_##ib_struct = \
+ (sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO( \
+ !__same_type(((struct drv_struct *)NULL)->member, \
+ struct ib_struct)))
- char name[IB_DEVICE_NAME_MAX];
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
- struct list_head event_handler_list;
- spinlock_t event_handler_lock;
+#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
+ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
- spinlock_t client_data_lock;
- struct list_head core_list;
- /* Access to the client_data_list is protected by the client_data_lock
- * spinlock and the lists_rwsem read-write semaphore */
- struct list_head client_data_list;
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
- struct ib_cache cache;
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+ struct module *owner;
+ enum rdma_driver_id driver_id;
+ u32 uverbs_abi_ver;
+ unsigned int uverbs_no_driver_id_binding:1;
+
+ int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+ int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ void (*drain_rq)(struct ib_qp *qp);
+ void (*drain_sq)(struct ib_qp *qp);
+ int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+ int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+ int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+ int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+ int (*post_srq_recv)(struct ib_srq *srq,
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ int (*process_mad)(struct ib_device *device, int process_mad_flags,
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad_hdr *in_mad, size_t in_mad_size,
+ struct ib_mad_hdr *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
+ int (*query_device)(struct ib_device *device,
+ struct ib_device_attr *device_attr,
+ struct ib_udata *udata);
+ int (*modify_device)(struct ib_device *device, int device_modify_mask,
+ struct ib_device_modify *device_modify);
+ void (*get_dev_fw_str)(struct ib_device *device, char *str);
+ const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+ int comp_vector);
+ int (*query_port)(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *port_attr);
+ int (*modify_port)(struct ib_device *device, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify);
/**
- * port_immutable is indexed by port number
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
*/
- struct ib_port_immutable *port_immutable;
+ int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+ struct ib_port_immutable *immutable);
+ enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+ u8 port_num);
+ /**
+ * When calling get_netdev, the HW vendor's driver should return the
+ * net device of device @device at port @port_num or NULL if such
+ * a net device doesn't exist. The vendor driver should call dev_hold
+ * on this net device. The HW vendor's device driver must guarantee
+ * that this function returns NULL before the net device has finished
+ * NETDEV_UNREGISTER state.
+ */
+ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+ /**
+ * rdma netdev operation
+ *
+ * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+ * must return -EOPNOTSUPP if it doesn't support the specified type.
+ */
+ struct net_device *(*alloc_rdma_netdev)(
+ struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+ const char *name, unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
- int num_comp_vectors;
-
- struct ib_port_pkey_list *port_pkey_list;
-
- struct iw_cm_verbs *iwcm;
+ int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+ /**
+ * query_gid should be return GID value for @device, when @port_num
+ * link layer is either IB or iWarp. It is no-op if @port_num port
+ * is RoCE link layer.
+ */
+ int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+ union ib_gid *gid);
+ /**
+ * When calling add_gid, the HW vendor's driver should add the gid
+ * of device of port at gid index available at @attr. Meta-info of
+ * that gid (for example, the network device related to this gid) is
+ * available at @attr. @context allows the HW vendor driver to store
+ * extra information together with a GID entry. The HW vendor driver may
+ * allocate memory to contain this information and store it in @context
+ * when a new GID entry is written to. Params are consistent until the
+ * next call of add_gid or delete_gid. The function should return 0 on
+ * success or error otherwise. The function could be called
+ * concurrently for different ports. This function is only called when
+ * roce_gid_table is used.
+ */
+ int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+ /**
+ * When calling del_gid, the HW vendor's driver should delete the
+ * gid of device @device at gid index gid_index of port port_num
+ * available in @attr.
+ * Upon the deletion of a GID entry, the HW vendor must free any
+ * allocated memory. The caller will clear @context afterwards.
+ * This function is only called when roce_gid_table is used.
+ */
+ int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+ int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+ u16 *pkey);
+ int (*alloc_ucontext)(struct ib_ucontext *context,
+ struct ib_udata *udata);
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+ int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
+ u32 flags, struct ib_udata *udata);
+ int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ void (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*create_srq)(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+ int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+ void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ struct ib_qp *(*create_qp)(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+ int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+ int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
+ int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+ int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+ void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+ struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+ struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+ int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
+ int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
+ struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
+ struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
+ int (*advise_mr)(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+ struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+ int (*dealloc_mw)(struct ib_mw *mw);
+ struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+ int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len,
+ u64 iova);
+ int (*unmap_fmr)(struct list_head *fmr_list);
+ int (*dealloc_fmr)(struct ib_fmr *fmr);
+ void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+ unsigned long start, unsigned long end);
+ int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
+ struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ struct ib_flow *(*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain, struct ib_udata *udata);
+ int (*destroy_flow)(struct ib_flow *flow_id);
+ struct ib_flow_action *(*create_flow_action_esp)(
+ struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(
+ struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int state);
+ int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *ivf);
+ int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats);
+ int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type);
+ struct ib_wq *(*create_wq)(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+ void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+ struct ib_rwq_ind_table *(*create_rwq_ind_table)(
+ struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_dm *(*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+ struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ struct ib_counters *(*create_counters)(
+ struct ib_device *device, struct uverbs_attr_bundle *attrs);
+ int (*destroy_counters)(struct ib_counters *counters);
+ int (*read_counters)(struct ib_counters *counters,
+ struct ib_counters_read_attr *counters_read_attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
/**
* alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
@@ -2283,8 +2486,8 @@
* core when the device is removed. A lifespan of -1 in the return
* struct tells the core to set a default lifespan.
*/
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
- u8 port_num);
+ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+ u8 port_num);
/**
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
@@ -2297,261 +2500,121 @@
* Drivers are allowed to update all counters in leiu of just the
* one given in index at their option
*/
- int (*get_hw_stats)(struct ib_device *device,
- struct rdma_hw_stats *stats,
- u8 port, int index);
- int (*query_device)(struct ib_device *device,
- struct ib_device_attr *device_attr,
- struct ib_udata *udata);
- int (*query_port)(struct ib_device *device,
- u8 port_num,
- struct ib_port_attr *port_attr);
- enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
- u8 port_num);
- /* When calling get_netdev, the HW vendor's driver should return the
- * net device of device @device at port @port_num or NULL if such
- * a net device doesn't exist. The vendor driver should call dev_hold
- * on this net device. The HW vendor's device driver must guarantee
- * that this function returns NULL before the net device has finished
- * NETDEV_UNREGISTER state.
+ int (*get_hw_stats)(struct ib_device *device,
+ struct rdma_hw_stats *stats, u8 port, int index);
+ /*
+ * This function is called once for each port when a ib device is
+ * registered.
*/
- struct net_device *(*get_netdev)(struct ib_device *device,
- u8 port_num);
- /* query_gid should be return GID value for @device, when @port_num
- * link layer is either IB or iWarp. It is no-op if @port_num port
- * is RoCE link layer.
- */
- int (*query_gid)(struct ib_device *device,
- u8 port_num, int index,
- union ib_gid *gid);
- /* When calling add_gid, the HW vendor's driver should add the gid
- * of device of port at gid index available at @attr. Meta-info of
- * that gid (for example, the network device related to this gid) is
- * available at @attr. @context allows the HW vendor driver to store
- * extra information together with a GID entry. The HW vendor driver may
- * allocate memory to contain this information and store it in @context
- * when a new GID entry is written to. Params are consistent until the
- * next call of add_gid or delete_gid. The function should return 0 on
- * success or error otherwise. The function could be called
- * concurrently for different ports. This function is only called when
- * roce_gid_table is used.
- */
- int (*add_gid)(const struct ib_gid_attr *attr,
- void **context);
- /* When calling del_gid, the HW vendor's driver should delete the
- * gid of device @device at gid index gid_index of port port_num
- * available in @attr.
- * Upon the deletion of a GID entry, the HW vendor must free any
- * allocated memory. The caller will clear @context afterwards.
- * This function is only called when roce_gid_table is used.
- */
- int (*del_gid)(const struct ib_gid_attr *attr,
- void **context);
- int (*query_pkey)(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey);
- int (*modify_device)(struct ib_device *device,
- int device_modify_mask,
- struct ib_device_modify *device_modify);
- int (*modify_port)(struct ib_device *device,
- u8 port_num, int port_modify_mask,
- struct ib_port_modify *port_modify);
- struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
- struct ib_udata *udata);
- int (*dealloc_ucontext)(struct ib_ucontext *context);
- int (*mmap)(struct ib_ucontext *context,
- struct vm_area_struct *vma);
- struct ib_pd * (*alloc_pd)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*dealloc_pd)(struct ib_pd *pd);
- struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
- int (*modify_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*query_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*destroy_ah)(struct ib_ah *ah);
- struct ib_srq * (*create_srq)(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
- int (*modify_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
- int (*query_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr);
- int (*destroy_srq)(struct ib_srq *srq);
- int (*post_srq_recv)(struct ib_srq *srq,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_qp * (*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
- int (*modify_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_udata *udata);
- int (*query_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr);
- int (*destroy_qp)(struct ib_qp *qp);
- int (*post_send)(struct ib_qp *qp,
- const struct ib_send_wr *send_wr,
- const struct ib_send_wr **bad_send_wr);
- int (*post_recv)(struct ib_qp *qp,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
- u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq);
- int (*resize_cq)(struct ib_cq *cq, int cqe,
- struct ib_udata *udata);
- int (*poll_cq)(struct ib_cq *cq, int num_entries,
- struct ib_wc *wc);
- int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
- int (*req_notify_cq)(struct ib_cq *cq,
- enum ib_cq_notify_flags flags);
- int (*req_ncomp_notif)(struct ib_cq *cq,
- int wc_cnt);
- struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
- int mr_access_flags);
- struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr,
- int flags,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_pd *pd,
- struct ib_udata *udata);
- int (*dereg_mr)(struct ib_mr *mr);
- struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
- int (*map_mr_sg)(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int *sg_offset);
- struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
- enum ib_mw_type type,
- struct ib_udata *udata);
- int (*dealloc_mw)(struct ib_mw *mw);
- struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
- int (*map_phys_fmr)(struct ib_fmr *fmr,
- u64 *page_list, int list_len,
- u64 iova);
- int (*unmap_fmr)(struct list_head *fmr_list);
- int (*dealloc_fmr)(struct ib_fmr *fmr);
- int (*attach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*detach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*process_mad)(struct ib_device *device,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size,
- struct ib_mad_hdr *out_mad,
- size_t *out_mad_size,
- u16 *out_mad_pkey_index);
- struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *ucontext,
- struct ib_udata *udata);
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
- struct ib_flow * (*create_flow)(struct ib_qp *qp,
- struct ib_flow_attr
- *flow_attr,
- int domain,
- struct ib_udata *udata);
- int (*destroy_flow)(struct ib_flow *flow_id);
- int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
- struct ib_mr_status *mr_status);
- void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
- void (*drain_rq)(struct ib_qp *qp);
- void (*drain_sq)(struct ib_qp *qp);
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
- int state);
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_info *ivf);
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_stats *stats);
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
- int type);
- struct ib_wq * (*create_wq)(struct ib_pd *pd,
- struct ib_wq_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq);
- int (*modify_wq)(struct ib_wq *wq,
- struct ib_wq_attr *attr,
- u32 wq_attr_mask,
- struct ib_udata *udata);
- struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
- struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_dm * (*alloc_dm)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs);
- int (*dealloc_dm)(struct ib_dm *dm);
- struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
- struct ib_dm_mr_attr *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_counters * (*create_counters)(struct ib_device *device,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_counters)(struct ib_counters *counters);
- int (*read_counters)(struct ib_counters *counters,
- struct ib_counters_read_attr *counters_read_attr,
- struct uverbs_attr_bundle *attrs);
-
+ int (*init_port)(struct ib_device *device, u8 port_num,
+ struct kobject *port_sysfs);
/**
- * rdma netdev operation
- *
- * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
- * doesn't support the specified rdma netdev type.
+ * Allows rdma drivers to add their own restrack attributes.
*/
- struct net_device *(*alloc_rdma_netdev)(
- struct ib_device *device,
- u8 port_num,
- enum rdma_netdev_t type,
- const char *name,
- unsigned char name_assign_type,
- void (*setup)(struct net_device *));
+ int (*fill_res_entry)(struct sk_buff *msg,
+ struct rdma_restrack_entry *entry);
- struct module *owner;
- struct device dev;
- struct kobject *ports_parent;
- struct list_head port_list;
+ /* Device lifecycle callbacks */
+ /*
+ * Called after the device becomes registered, before clients are
+ * attached
+ */
+ int (*enable_driver)(struct ib_device *dev);
+ /*
+ * This is called as part of ib_dealloc_device().
+ */
+ void (*dealloc_driver)(struct ib_device *dev);
- enum {
- IB_DEV_UNINITIALIZED,
- IB_DEV_REGISTERED,
- IB_DEV_UNREGISTERED
- } reg_state;
+ /* iWarp CM callbacks */
+ void (*iw_add_ref)(struct ib_qp *qp);
+ void (*iw_rem_ref)(struct ib_qp *qp);
+ struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
+ int (*iw_connect)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_accept)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
+ u8 pdata_len);
+ int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
+ int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+ /**
+ * counter_bind_qp - Bind a QP to a counter.
+ * @counter - The counter to be bound. If counter->id is zero then
+ * the driver needs to allocate a new counter and set counter->id
+ */
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+ /**
+ * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+ * counter and bind it onto the default one
+ */
+ int (*counter_unbind_qp)(struct ib_qp *qp);
+ /**
+ * counter_dealloc -De-allocate the hw counter
+ */
+ int (*counter_dealloc)(struct rdma_counter *counter);
+ /**
+ * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
+ * the driver initialized data.
+ */
+ struct rdma_hw_stats *(*counter_alloc_stats)(
+ struct rdma_counter *counter);
+ /**
+ * counter_update_stats - Query the stats value of this counter
+ */
+ int (*counter_update_stats)(struct rdma_counter *counter);
- int uverbs_abi_ver;
+ DECLARE_RDMA_OBJ_SIZE(ib_ah);
+ DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_srq);
+ DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+};
+
+struct ib_core_device {
+ /* device must be the first element in structure until,
+ * union of ib_core_device and device exists in ib_device.
+ */
+ struct device dev;
+ possible_net_t rdma_net;
+ struct kobject *ports_kobj;
+ struct list_head port_list;
+ struct ib_device *owner; /* reach back to owner ib_device */
+};
+
+struct rdma_restrack_root;
+struct ib_device {
+ /* Do not access @dma_device directly from ULP nor from HW drivers. */
+ struct device *dma_device;
+ struct ib_device_ops ops;
+ char name[IB_DEVICE_NAME_MAX];
+ struct rcu_head rcu_head;
+
+ struct list_head event_handler_list;
+ spinlock_t event_handler_lock;
+
+ struct rw_semaphore client_data_rwsem;
+ struct xarray client_data;
+ struct mutex unregistration_lock;
+
+ struct ib_cache cache;
+ /**
+ * port_data is indexed by port number
+ */
+ struct ib_port_data *port_data;
+
+ int num_comp_vectors;
+
+ union {
+ struct device dev;
+ struct ib_core_device coredev;
+ };
+
+ /* First group for device attributes,
+ * Second group for driver provided attributes (optional).
+ * It is NULL terminated array.
+ */
+ const struct attribute_group *groups[3];
+
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -2559,6 +2622,10 @@
__be64 node_guid;
u32 local_dma_lkey;
u16 is_switch:1;
+ /* Indicates kernel verbs support, should not be used in drivers */
+ u16 kverbs_provider:1;
+ /* CQ adaptive moderation (RDMA DIM) */
+ u16 use_cq_dim:1;
u8 node_type;
u8 phys_port_cnt;
struct ib_device_attr attrs;
@@ -2570,30 +2637,39 @@
#endif
u32 index;
+ struct rdma_restrack_root *res;
+
+ const struct uapi_definition *driver_def;
+
/*
- * Implementation details of the RDMA core, don't use in drivers
+ * Positive refcount indicates that the device is currently
+ * registered and cannot be unregistered.
*/
- struct rdma_restrack_root res;
+ refcount_t refcount;
+ struct completion unreg_completion;
+ struct work_struct unregistration_work;
- /**
- * The following mandatory functions are used only at device
- * registration. Keep functions such as these at the end of this
- * structure to avoid cache line misses when accessing struct ib_device
- * in fast paths.
- */
- int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
- void (*get_dev_fw_str)(struct ib_device *, char *str);
- const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
- int comp_vector);
+ const struct rdma_link_ops *link_ops;
- const struct uverbs_object_tree_def *const *driver_specs;
- enum rdma_driver_id driver_id;
+ /* Protects compat_devs xarray modifications */
+ struct mutex compat_devs_mutex;
+ /* Maintains compat devices for each net namespace */
+ struct xarray compat_devs;
+
+ /* Used by iWarp CM */
+ char iw_ifname[IFNAMSIZ];
+ u32 iw_driver_flags;
};
+struct ib_client_nl_info;
struct ib_client {
- char *name;
+ const char *name;
void (*add) (struct ib_device *);
void (*remove)(struct ib_device *, void *client_data);
+ void (*rename)(struct ib_device *dev, void *client_data);
+ int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res);
+ int (*get_global_nl_info)(struct ib_client_nl_info *res);
/* Returns the net_dev belonging to this ib_client and matching the
* given parameters.
@@ -2617,25 +2693,114 @@
const union ib_gid *gid,
const struct sockaddr *addr,
void *client_data);
- struct list_head list;
+
+ refcount_t uses;
+ struct completion uses_zero;
+ u32 client_id;
+
+ /* kverbs are not required by the client */
+ u8 no_kverbs_req:1;
};
-struct ib_device *ib_alloc_device(size_t size);
+/*
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+ /* internal states */
+ struct scatterlist *__sg; /* sg holding the current aligned block */
+ dma_addr_t __dma_addr; /* unaligned DMA address of this block */
+ unsigned int __sg_nents; /* number of SG entries */
+ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
+ unsigned int __pg_bit; /* alignment of current block */
+};
+
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member) \
+ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof( \
+ struct drv_struct, member))), \
+ struct drv_struct, member)
+
void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name);
void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+ struct scatterlist *sglist,
+ unsigned int nents,
+ unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+ return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz) \
+ for (__rdma_block_iter_start(biter, sglist, nents, \
+ pgsz); \
+ __rdma_block_iter_next(biter);)
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+ struct ib_client *client)
+{
+ return xa_load(&device->client_data, client->client_id);
+}
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+void ib_set_device_ops(struct ib_device *device,
+ const struct ib_device_ops *ops);
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot);
+#else
+static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size,
+ pgprot_t prot)
+{
+ return -EINVAL;
+}
+#endif
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
@@ -2720,7 +2885,6 @@
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
- * @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -2729,8 +2893,7 @@
* and that the attribute mask supplied is allowed for the transition.
*/
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+ enum ib_qp_type type, enum ib_qp_attr_mask mask);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -2770,6 +2933,16 @@
}
/**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter) \
+ for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
+ unsigned int, iter))); \
+ iter <= rdma_end_port(device); (iter)++)
+
+/**
* rdma_end_port - Return the last valid port number for the device
* specified
*
@@ -2792,34 +2965,38 @@
static inline bool rdma_is_grh_required(const struct ib_device *device,
u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- RDMA_CORE_PORT_IB_GRH_REQUIRED;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
}
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IB;
}
static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+ return device->port_data[port_num].immutable.core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
}
static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
}
static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE;
}
static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IWARP;
}
static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
@@ -2830,12 +3007,14 @@
static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_RAW_PACKET;
}
static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_USNIC;
}
/**
@@ -2852,7 +3031,8 @@
*/
static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_MAD;
}
/**
@@ -2876,8 +3056,8 @@
*/
static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
- == RDMA_CORE_CAP_OPA_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_OPA_MAD;
}
/**
@@ -2902,7 +3082,8 @@
*/
static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SMI;
}
/**
@@ -2922,7 +3103,8 @@
*/
static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_CM;
}
/**
@@ -2939,7 +3121,8 @@
*/
static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IW_CM;
}
/**
@@ -2959,7 +3142,8 @@
*/
static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SA;
}
/**
@@ -2999,7 +3183,8 @@
*/
static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_AF_IB;
}
/**
@@ -3020,7 +3205,8 @@
*/
static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_ETH_AH;
}
/**
@@ -3034,7 +3220,7 @@
*/
static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags &
+ return (device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
}
@@ -3052,7 +3238,7 @@
*/
static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].max_mad_size;
+ return device->port_data[port_num].immutable.max_mad_size;
}
/**
@@ -3072,7 +3258,7 @@
u8 port_num)
{
return rdma_protocol_roce(device, port_num) &&
- device->add_gid && device->del_gid;
+ device->ops.add_gid && device->ops.del_gid;
}
/*
@@ -3087,6 +3273,30 @@
return rdma_protocol_iwarp(dev, port_num);
}
+/**
+ * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
+ *
+ * @addr: address
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ */
+static inline unsigned int rdma_find_pg_bit(unsigned long addr,
+ unsigned long pgsz_bitmap)
+{
+ unsigned long align;
+ unsigned long pgsz;
+
+ align = addr & -addr;
+
+ /* Find page bit such that addr is aligned to the highest supported
+ * HW page size
+ */
+ pgsz = pgsz_bitmap & ~(-align << 1);
+ if (!pgsz)
+ return __ffs(pgsz_bitmap);
+
+ return __fls(pgsz);
+}
+
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3128,19 +3338,44 @@
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-void ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_dealloc_pd_user - Deallocate kernel/user PD
+ * @pd: The protection domain
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+ ib_dealloc_pd_user(pd, NULL);
+}
+
+enum rdma_create_ah_flags {
+ /* In a sleepable context */
+ RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
/**
* rdma_create_ah - Creates an address handle for the given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
*
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr);
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags);
/**
* rdma_create_user_ah - Creates an address handle for the given address vector.
@@ -3230,11 +3465,30 @@
*/
int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+enum rdma_destroy_ah_flags {
+ /* In a sleepable context */
+ RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
+
/**
- * rdma_destroy_ah - Destroys an address handle.
+ * rdma_destroy_ah_user - Destroys an address handle.
* @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
*/
-int rdma_destroy_ah(struct ib_ah *ah);
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
+ */
+static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return rdma_destroy_ah_user(ah, flags, NULL);
+}
/**
* ib_create_srq - Creates a SRQ associated with the specified protection
@@ -3278,10 +3532,22 @@
struct ib_srq_attr *srq_attr);
/**
- * ib_destroy_srq - Destroys the specified SRQ.
+ * ib_destroy_srq_user - Destroys the specified SRQ.
* @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_srq(struct ib_srq *srq);
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline int ib_destroy_srq(struct ib_srq *srq)
+{
+ return ib_destroy_srq_user(srq, NULL);
+}
/**
* ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
@@ -3296,19 +3562,39 @@
{
const struct ib_recv_wr *dummy;
- return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
+ return srq->device->ops.post_srq_recv(srq, recv_wr,
+ bad_recv_wr ? : &dummy);
}
/**
- * ib_create_qp - Creates a QP associated with the specified protection
+ * ib_create_qp_user - Creates a QP associated with the specified protection
* domain.
* @pd: The protection domain associated with the QP.
* @qp_init_attr: A list of initial attributes required to create the
* QP. If QP creation succeeds, then the attributes are updated to
* the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
*/
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr);
+struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+
+/**
+ * ib_create_qp - Creates a kernel QP associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ * @udata: Valid user data or NULL for kernel objects
+ *
+ * NOTE: for user qp use ib_create_qp_user with valid udata!
+ */
+static inline struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ return ib_create_qp_user(pd, qp_init_attr, NULL);
+}
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
@@ -3358,8 +3644,20 @@
/**
* ib_destroy_qp - Destroys the specified QP.
* @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
*/
-int ib_destroy_qp(struct ib_qp *qp);
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
+ */
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+ return ib_destroy_qp_user(qp, NULL);
+}
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
@@ -3399,7 +3697,7 @@
{
const struct ib_send_wr *dummy;
- return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
+ return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
}
/**
@@ -3416,16 +3714,88 @@
{
const struct ib_recv_wr *dummy;
- return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
+ return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx, const char *caller);
-#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
- __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
+struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx,
+ const char *caller, struct ib_udata *udata);
-void ib_free_cq(struct ib_cq *cq);
+/**
+ * ib_alloc_cq_user: Allocate kernel/user CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ * @udata: Valid user data or NULL for kernel objects
+ */
+static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
+ void *private, int nr_cqe,
+ int comp_vector,
+ enum ib_poll_context poll_ctx,
+ struct ib_udata *udata)
+{
+ return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME, udata);
+}
+
+/**
+ * ib_alloc_cq: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ *
+ * NOTE: for user cq use ib_alloc_cq_user with valid udata!
+ */
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx)
+{
+ return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ NULL);
+}
+
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+ void *private, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+ KBUILD_MODNAME);
+}
+
+/**
+ * ib_free_cq_user - Free kernel/user CQ
+ * @cq: The CQ to free
+ * @udata: Valid user data or NULL for kernel objects
+ */
+void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_free_cq - Free kernel CQ
+ * @cq: The CQ to free
+ *
+ * NOTE: for user cq use ib_free_cq_user with valid udata!
+ */
+static inline void ib_free_cq(struct ib_cq *cq)
+{
+ ib_free_cq_user(cq, NULL);
+}
+
int ib_process_cq_direct(struct ib_cq *cq, int budget);
/**
@@ -3469,10 +3839,22 @@
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
/**
- * ib_destroy_cq - Destroys the specified CQ.
+ * ib_destroy_cq_user - Destroys the specified CQ.
* @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_cq(struct ib_cq *cq);
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline void ib_destroy_cq(struct ib_cq *cq)
+{
+ ib_destroy_cq_user(cq, NULL);
+}
/**
* ib_poll_cq - poll a CQ for completion(s)
@@ -3489,7 +3871,7 @@
static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
struct ib_wc *wc)
{
- return cq->device->poll_cq(cq, num_entries, wc);
+ return cq->device->ops.poll_cq(cq, num_entries, wc);
}
/**
@@ -3522,7 +3904,7 @@
static inline int ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags)
{
- return cq->device->req_notify_cq(cq, flags);
+ return cq->device->ops.req_notify_cq(cq, flags);
}
/**
@@ -3534,8 +3916,8 @@
*/
static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
{
- return cq->device->req_ncomp_notif ?
- cq->device->req_ncomp_notif(cq, wc_cnt) :
+ return cq->device->ops.req_ncomp_notif ?
+ cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
-ENOSYS;
}
@@ -3652,32 +4034,18 @@
{
dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
}
-/**
- * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
- *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_address() into sg_dma_address().
- */
-static inline u64 ib_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_address(sg);
-}
/**
- * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
+ * @dev: The device to query
*
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_len() into sg_dma_len().
+ * The returned value represents a size in bytes.
*/
-static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
- return sg_dma_len(sg);
+ struct device_dma_parameters *p = dev->dma_device->dma_parms;
+
+ return p ? p->max_segment_size : UINT_MAX;
}
/**
@@ -3740,17 +4108,41 @@
}
/**
- * ib_dereg_mr - Deregisters a memory region and removes it from the
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
+ * HCA translation table.
+ * @mr: The memory region to deregister.
+ * @udata: Valid user data or NULL for kernel object
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
+
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
*
* This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
*/
-int ib_dereg_mr(struct ib_mr *mr);
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+ return ib_dereg_mr_user(mr, NULL);
+}
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
+struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg, struct ib_udata *udata);
+
+static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg)
+{
+ return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL);
+}
+
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -3799,7 +4191,7 @@
u64 *page_list, int list_len,
u64 iova)
{
- return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+ return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova);
}
/**
@@ -3848,8 +4240,9 @@
/**
* ib_dealloc_xrcd - Deallocates an XRC domain.
* @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
*/
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(int flags)
{
@@ -3893,12 +4286,39 @@
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+/**
+ * ib_device_try_get: Hold a registration lock
+ * device: The device to lock
+ *
+ * A device under an active registration lock cannot become unregistered. It
+ * is only possible to obtain a registration lock on a device that is fully
+ * registered, otherwise this function returns false.
+ *
+ * The registration lock is only necessary for actions which require the
+ * device to still be registered. Uses that only require the device pointer to
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
+ *
+ */
+static inline bool ib_device_try_get(struct ib_device *dev)
+{
+ return refcount_inc_not_zero(&dev->refcount);
+}
+
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+ enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+ enum rdma_driver_id driver_id);
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+ unsigned int port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq);
+int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
@@ -3908,6 +4328,10 @@
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size);
static inline int
ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
@@ -4152,27 +4576,13 @@
ib_get_vector_affinity(struct ib_device *device, int comp_vector)
{
if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
- !device->get_vector_affinity)
+ !device->ops.get_vector_affinity)
return NULL;
- return device->get_vector_affinity(device, comp_vector);
+ return device->ops.get_vector_affinity(device, comp_vector);
}
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
- struct ib_qp *qp, struct ib_device *device)
-{
- uobj->object = ibflow;
- ibflow->uobject = uobj;
-
- if (qp) {
- atomic_inc(&qp->usecnt);
- ibflow->qp = qp;
- }
-
- ibflow->device = device;
-}
-
/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
@@ -4181,8 +4591,70 @@
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+
+int rdma_init_netdev(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *),
+ struct net_device *netdev);
+
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ * driver specific sysfs entries at
+ * for infiniband class.
+ *
+ * @device: device pointer for which attributes to be created
+ * @group: Pointer to group which should be added when device
+ * is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+ const struct attribute_group *group)
+{
+ dev->groups[1] = group;
+}
+
+/**
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
+ *
+ * @device: device pointer for which ib_device pointer to retrieve
+ *
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
+ *
+ */
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
+{
+ struct ib_core_device *coredev =
+ container_of(device, struct ib_core_device, dev);
+
+ return coredev->owner;
+}
+
+/**
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
+ * ib_device holder structure from device pointer.
+ *
+ * NOTE: New drivers should not make use of this API; This API is only for
+ * existing drivers who have exposed sysfs entries using
+ * rdma_set_device_sysfs_group().
+ */
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
+ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
+
+bool rdma_dev_access_netns(const struct ib_device *device,
+ const struct net *net);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 5cd7701..5aa8a9c 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -94,7 +94,8 @@
void (*add_ref)(struct iw_cm_id *);
void (*rem_ref)(struct iw_cm_id *);
u8 tos;
- bool mapped;
+ bool tos_set:1;
+ bool mapped:1;
};
struct iw_cm_conn_param {
@@ -105,28 +106,16 @@
u32 qpn;
};
-struct iw_cm_verbs {
- void (*add_ref)(struct ib_qp *qp);
+enum iw_flags {
- void (*rem_ref)(struct ib_qp *qp);
-
- struct ib_qp * (*get_qp)(struct ib_device *device,
- int qpn);
-
- int (*connect)(struct iw_cm_id *cm_id,
- struct iw_cm_conn_param *conn_param);
-
- int (*accept)(struct iw_cm_id *cm_id,
- struct iw_cm_conn_param *conn_param);
-
- int (*reject)(struct iw_cm_id *cm_id,
- const void *pdata, u8 pdata_len);
-
- int (*create_listen)(struct iw_cm_id *cm_id,
- int backlog);
-
- int (*destroy_listen)(struct iw_cm_id *cm_id);
- char ifname[IFNAMSIZ];
+ /*
+ * This flag allows the iwcm and iwpmd to still advertise
+ * mappings but the real and mapped port numbers are the
+ * same. Further, iwpmd will not bind any user socket to
+ * reserve the port. This is required for soft iwarp
+ * to play in the port mapped iwarp space.
+ */
+ IW_F_NO_PORT_MAP = (1 << 0),
};
/**
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index fda3167..c895350 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -33,6 +33,9 @@
#ifndef _IW_PORTMAP_H
#define _IW_PORTMAP_H
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
#define IWPM_ULIBNAME_SIZE 32
#define IWPM_DEVNAME_SIZE 32
#define IWPM_IFNAME_SIZE 16
@@ -58,167 +61,31 @@
struct sockaddr_storage mapped_loc_addr;
struct sockaddr_storage rem_addr;
struct sockaddr_storage mapped_rem_addr;
+ u32 flags;
};
-/**
- * iwpm_init - Allocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes up.
- */
int iwpm_init(u8);
-
-/**
- * iwpm_exit - Deallocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes down.
- */
int iwpm_exit(u8);
-
-/**
- * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
- *
- * Returns true if the pid is greater than zero, otherwise returns false
- */
int iwpm_valid_pid(void);
-
-/**
- * iwpm_register_pid - Send a netlink query to userspace
- * to get the iwarp port mapper pid
- * @pm_msg: Contains driver info to send to the userspace port mapper
- * @nl_client: The index of the netlink client
- */
int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_mapping - Send a netlink add mapping request to
- * the userspace port mapper
- * @pm_msg: Contains the local ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores
- * the port mapper response (mapped address info)
- */
int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
- * to the userspace port mapper
- * @pm_msg: Contains the local and remote ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores the
- * port mapper response (mapped local and remote address info)
- */
int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_remove_mapping - Send a netlink remove mapping request
- * to the userspace port mapper
- *
- * @local_addr: Local ip/tcp address to remove
- * @nl_client: The index of the netlink client
- */
int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
-
-/**
- * iwpm_register_pid_cb - Process the port mapper response to
- * iwpm_register_pid query
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * If successful, the function receives the userspace port mapper pid
- * which is used in future communication with the port mapper
- */
int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_mapping_cb - Process the port mapper response to
- * iwpm_add_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_and_query_mapping_cb - Process the port mapper response to
- * iwpm_add_and_query_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_remote_info_cb - Process remote connecting peer address info, which
- * the port mapper has received from the connecting peer
- *
- * @cb: Contains the received message (payload and netlink header)
- *
- * Stores the IPv4/IPv6 address info in a hash table
- */
int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_error_cb - Process port mapper notification for error
- *
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_info_cb - Process a notification that the userspace
- * port mapper daemon is started
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * Using the received port mapper pid, send all the local mapping
- * info records to the userspace port mapper
- */
int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_ack_mapping_info_cb - Process the port mapper ack for
- * the provided local mapping info records
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_get_remote_info - Get the remote connecting peer address info
- *
- * @mapped_loc_addr: Mapped local address of the listening peer
- * @mapped_rem_addr: Mapped remote address of the connecting peer
- * @remote_addr: To store the remote address of the connecting peer
- * @nl_client: The index of the netlink client
- *
- * The remote address info is retrieved and provided to the client in
- * the remote_addr. After that it is removed from the hash table
- */
int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
struct sockaddr_storage *mapped_rem_addr,
struct sockaddr_storage *remote_addr, u8 nl_client);
-
-/**
- * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
- * info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- * @nl_client: The index of the netlink client
- */
int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
- struct sockaddr_storage *mapped_addr, u8 nl_client);
-
-/**
- * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
- * info from the hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- *
- * Returns err code if mapping info is not found in the hash table,
- * otherwise returns 0
- */
+ struct sockaddr_storage *mapped_addr, u8 nl_client,
+ u32 map_flags);
int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
struct sockaddr_storage *mapped_addr);
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb);
#endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h
index 986010b..e77123b 100644
--- a/include/rdma/mr_pool.h
+++ b/include/rdma/mr_pool.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _RDMA_MR_POOL_H
#define _RDMA_MR_POOL_H 1
@@ -19,7 +11,7 @@
void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
- enum ib_mr_type type, u32 max_num_sg);
+ enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg);
void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
#endif /* _RDMA_MR_POOL_H */
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index b4f0ac0..bdbfe25 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -33,6 +33,8 @@
#if !defined(OPA_PORT_INFO_H)
#define OPA_PORT_INFO_H
+#include <rdma/opa_smi.h>
+
#define OPA_PORT_LINK_MODE_NOP 0 /* No change */
#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */
@@ -413,6 +415,6 @@
u8 local_port_num;
u8 reserved12;
u8 reserved13; /* was guid_cap */
-} __attribute__ ((packed));
+} __packed;
#endif /* OPA_PORT_INFO_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index f789611..c7b2ef1 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -98,7 +98,7 @@
struct opa_node_description {
u8 data[64];
-} __attribute__ ((packed));
+} __packed;
struct opa_node_info {
u8 base_version;
@@ -114,7 +114,7 @@
__be32 revision;
u8 local_port_num;
u8 vendor_id[3]; /* network byte order */
-} __attribute__ ((packed));
+} __packed;
#define OPA_PARTITION_TABLE_BLK_SIZE 32
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f..71f48cf 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@
* @ps: RDMA port space.
* @qp_type: type of queue pair associated with the id.
*
- * The id holds a reference on the network namespace until it is destroyed.
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
*/
#define rdma_create_id(net, event_handler, context, ps, qp_type) \
__rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@
* @timeout_ms: Time to wait for resolution to complete.
*/
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, int timeout_ms);
+ const struct sockaddr *dst_addr,
+ unsigned long timeout_ms);
/**
* rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@
* Users must have first called rdma_resolve_addr to resolve a dst_addr
* into an RDMA address before calling this routine.
*/
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
@@ -369,6 +374,7 @@
*/
int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
/**
* rdma_get_service_id - Return the IB service ID for a specified address.
* @id: Communication identifier associated with the address.
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
new file mode 100644
index 0000000..eb99856
--- /dev/null
+++ b/include/rdma/rdma_counter.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_COUNTER_H_
+#define _RDMA_COUNTER_H_
+
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+
+#include <rdma/restrack.h>
+#include <rdma/rdma_netlink.h>
+
+struct ib_device;
+struct ib_qp;
+
+struct auto_mode_param {
+ int qp_type;
+};
+
+struct rdma_counter_mode {
+ enum rdma_nl_counter_mode mode;
+ enum rdma_nl_counter_mask mask;
+ struct auto_mode_param param;
+};
+
+struct rdma_port_counter {
+ struct rdma_counter_mode mode;
+ struct rdma_hw_stats *hstats;
+ unsigned int num_counters;
+ struct mutex lock;
+};
+
+struct rdma_counter {
+ struct rdma_restrack_entry res;
+ struct ib_device *device;
+ uint32_t id;
+ struct kref kref;
+ struct rdma_counter_mode mode;
+ struct mutex lock;
+ struct rdma_hw_stats *stats;
+ u8 port;
+};
+
+void rdma_counter_init(struct ib_device *dev);
+void rdma_counter_release(struct ib_device *dev);
+int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
+ bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
+
+int rdma_counter_query_stats(struct rdma_counter *counter);
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index);
+int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id);
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 *counter_id);
+int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+ u32 qp_num, u32 counter_id);
+int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+ enum rdma_nl_counter_mode *mode,
+ enum rdma_nl_counter_mask *mask);
+
+#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703..ab22759 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -6,6 +6,12 @@
#include <linux/netlink.h>
#include <uapi/rdma/rdma_netlink.h>
+enum {
+ RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
+ RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+ RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+};
+
struct rdma_nl_cbs {
int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
@@ -70,33 +76,50 @@
/**
* Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @pid: Userspace netlink process ID
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid);
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
/**
* Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @pid: Userspace netlink process ID
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid);
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
/**
* Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
* @skb: The netlink skb
* @group: Netlink group ID
* @flags: allocation flags
* Returns 0 on success or a negative error code.
*/
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+ unsigned int group, gfp_t flags);
/**
* Check if there are any listeners to the netlink group
* @group: the netlink group ID
- * Returns 0 on success or a negative for no listeners.
+ * Returns true on success or false if no listeners.
*/
-int rdma_nl_chk_listeners(unsigned int group);
+bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+ struct list_head list;
+ const char *type;
+ int (*newlink)(const char *ibdev_name, struct net_device *ndev);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
+#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type)
+
#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a..ac5a943 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -2,7 +2,7 @@
#define DEF_RDMA_VT_H
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,7 +59,6 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
#include <rdma/rdmavt_mr.h>
-#include <rdma/rdmavt_qp.h>
#define RVT_MAX_PKEY_VALUES 16
@@ -72,6 +71,8 @@
struct list_head list;
};
+struct rvt_qp;
+struct rvt_qpn_table;
struct rvt_ibport {
struct rvt_qp __rcu *qp[2];
struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
@@ -115,6 +116,7 @@
u64 n_unaligned;
u64 n_rc_dupreq;
u64 n_rc_seqnak;
+ u64 n_rc_crwaits;
u16 pkey_violations;
u16 qkey_violations;
u16 mkey_violations;
@@ -149,6 +151,10 @@
#define RVT_CQN_MAX 16 /* maximum length of cq name */
+#define RVT_SGE_COPY_MEMCPY 0
+#define RVT_SGE_COPY_CACHELESS 1
+#define RVT_SGE_COPY_ADAPTIVE 2
+
/*
* Things that are driver specific, module parameters in hfi1 and qib
*/
@@ -161,6 +167,9 @@
*/
unsigned int lkey_table_size;
unsigned int qp_table_size;
+ unsigned int sge_copy_mode;
+ unsigned int wss_threshold;
+ unsigned int wss_clean_period;
int qpn_start;
int qpn_inc;
int qpn_res_start;
@@ -175,9 +184,15 @@
u32 max_mad_size;
u8 qos_shift;
u8 max_rdma_atomic;
+ u8 extra_rdma_atomic;
u8 reserved_operations;
};
+/* User context */
+struct rvt_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
/* Protection domain */
struct rvt_pd {
struct ib_pd ibpd;
@@ -188,11 +203,37 @@
struct rvt_ah {
struct ib_ah ibah;
struct rdma_ah_attr attr;
- atomic_t refcount;
u8 vl;
u8 log_pmtu;
};
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ u32 size;
+};
+
+/* memory working set size */
+struct rvt_wss {
+ unsigned long *entries;
+ atomic_t total_count;
+ atomic_t clean_counter;
+ atomic_t clean_entry;
+
+ int threshold;
+ int num_entries;
+ long pages_mask;
+ unsigned int clean_period;
+};
+
struct rvt_dev_info;
struct rvt_swqe;
struct rvt_driver_provided {
@@ -211,11 +252,18 @@
* version requires the s_lock not to be held. The other assumes the
* s_lock is held.
*/
- void (*schedule_send)(struct rvt_qp *qp);
- void (*schedule_send_no_lock)(struct rvt_qp *qp);
+ bool (*schedule_send)(struct rvt_qp *qp);
+ bool (*schedule_send_no_lock)(struct rvt_qp *qp);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+ /*
+ * Driver specific work request setup and checking.
+ * This function is allowed to perform any setup, checks, or
+ * adjustments required to the SWQE in order to be usable by
+ * underlying protocols. This includes private data structure
+ * allocations.
+ */
+ int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -223,9 +271,6 @@
*/
void (*do_send)(struct rvt_qp *qp);
- /* Passed to ib core registration. Callback to create syfs files */
- int (*port_callback)(struct ib_device *, u8, struct kobject *);
-
/*
* Returns a pointer to the undelying hardware's PCI device. This is
* used to display information as to what hardware is being referenced
@@ -242,6 +287,13 @@
void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
/*
+ * Init a struture allocated with qp_priv_alloc(). This should be
+ * called after all qp fields have been initialized in rdmavt.
+ */
+ int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+ /*
* Free the driver's private qp structure.
*/
void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
@@ -371,6 +423,9 @@
/* post send table */
const struct rvt_operation_params *post_parms;
+ /* opcode translation table */
+ const enum ib_wc_opcode *wc_opcode;
+
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
@@ -411,6 +466,8 @@
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
+ /* Memory Working Set Size */
+ struct rvt_wss *wss;
};
/**
@@ -423,7 +480,14 @@
const char *fmt, const char *name,
const int unit)
{
- snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
+ /*
+ * FIXME: rvt and its users want to touch the ibdev before
+ * registration and have things like the name work. We don't have the
+ * infrastructure in the core to support this directly today, hack it
+ * to work by setting the name manually here.
+ */
+ dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+ strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
}
/**
@@ -434,7 +498,7 @@
*/
static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
{
- return rdi->ibdev.name;
+ return dev_name(&rdi->ibdev.dev);
}
static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
@@ -452,16 +516,6 @@
return container_of(ibdev, struct rvt_dev_info, ibdev);
}
-static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
-{
- return container_of(ibsrq, struct rvt_srq, ibsrq);
-}
-
-static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct rvt_qp, ibqp);
-}
-
static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
{
/*
@@ -476,7 +530,14 @@
*/
static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
{
- return rdi->dparms.max_rdma_atomic + 1;
+ return rdi->dparms.max_rdma_atomic +
+ rdi->dparms.extra_rdma_atomic + 1;
+}
+
+static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
+{
+ return rdi->dparms.max_rdma_atomic +
+ rdi->dparms.extra_rdma_atomic;
}
/*
@@ -492,54 +553,9 @@
return rdi->ports[port_index]->pkey_table[index];
}
-/**
- * rvt_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
- */
-/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */
-static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
- struct rvt_ibport *rvp,
- u32 qpn) __must_hold(RCU)
-{
- struct rvt_qp *qp = NULL;
-
- if (unlikely(qpn <= 1)) {
- qp = rcu_dereference(rvp->qp[qpn]);
- } else {
- u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
-
- for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
- qp = rcu_dereference(qp->next))
- if (qp->ibqp.qp_num == qpn)
- break;
- }
- return qp;
-}
-
-/**
- * rvt_mod_retry_timer - mod a retry timer
- * @qp - the QP
- * Modify a potentially already running retry timer
- */
-static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies);
-}
-
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
void rvt_dealloc_device(struct rvt_dev_info *rdi);
-int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id);
+int rvt_register_device(struct rvt_dev_info *rvd);
void rvt_unregister_device(struct rvt_dev_info *rvd);
int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 75dc65c..574eb72 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -53,6 +53,7 @@
#include <linux/kthread.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
@@ -61,18 +62,27 @@
#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1)
/*
+ * Define read macro that apply smp_load_acquire memory barrier
+ * when reading indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val)
+
+/*
+ * Define write macro that uses smp_store_release memory barrier
+ * when writing indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x)
+#include <rdma/rvt-abi.h>
+
+/*
* This structure is used to contain the head pointer, tail pointer,
* and completion queue entries as a single memory allocation so
* it can be mmap'ed into user space.
*/
-struct rvt_cq_wc {
+struct rvt_k_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- union {
- /* these are actually size ibcq.cqe + 1 */
- struct ib_uverbs_wc uqueue[0];
- struct ib_wc kqueue[0];
- };
+ struct ib_wc kqueue[];
};
/*
@@ -84,10 +94,12 @@
spinlock_t lock; /* protect changes in this struct */
u8 notify;
u8 triggered;
+ u8 cq_full;
int comp_vector_cpu;
struct rvt_dev_info *rdi;
struct rvt_cq_wc *queue;
struct rvt_mmap_info *ip;
+ struct rvt_k_cq_wc *kqueue;
};
static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
@@ -95,6 +107,6 @@
return container_of(ibcq, struct rvt_cq, ibcq);
}
-void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
#endif /* DEF_RDMAVT_INCCQH */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5..b550ae8 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
#define DEF_RDMAVT_INCQP_H
/*
- * Copyright(c) 2016 - 2018 Intel Corporation.
+ * Copyright(c) 2016 - 2019 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -52,6 +52,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
+#include <rdma/rvt-abi.h>
/*
* Atomic bit definitions for r_aflags.
*/
@@ -83,7 +84,6 @@
* RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
* next send completion entry not via send DMA
* RVT_S_WAIT_PIO - waiting for a send buffer to be available
- * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets
* RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
* RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
* RVT_S_WAIT_KMEM - waiting for kernel memory to be available
@@ -157,6 +157,22 @@
#define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START
#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1)
+/**
+ * rvt_ud_wr - IB UD work plus AH cache
+ * @wr: valid IB work request
+ * @attr: pointer to an allocated AH attribute
+ *
+ * Special case the UD WR so we can keep track of the AH attributes.
+ *
+ * NOTE: This data structure is stricly ordered wr then attr. I.e the attr
+ * MUST come after wr. The ib_ud_wr is sized and copied in rvt_post_one_wr.
+ * The copy assumes that wr is first.
+ */
+struct rvt_ud_wr {
+ struct ib_ud_wr wr;
+ struct rdma_ah_attr *attr;
+};
+
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
@@ -165,7 +181,7 @@
struct rvt_swqe {
union {
struct ib_send_wr wr; /* don't use wr.sg_list */
- struct ib_ud_wr ud_wr;
+ struct rvt_ud_wr ud_wr;
struct ib_reg_wr reg_wr;
struct ib_rdma_wr rdma_wr;
struct ib_atomic_wr atomic_wr;
@@ -174,36 +190,88 @@
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
u32 length; /* total length of data in sg_list */
+ void *priv; /* driver dependent field */
struct rvt_sge sg_list[0];
};
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+/**
+ * struct rvt_krwq - kernel struct receive work request
+ * @p_lock: lock to protect producer of the kernel buffer
+ * @head: index of next entry to fill
+ * @c_lock:lock to protect consumer of the kernel buffer
+ * @tail: index of next entry to pull
+ * @count: count is aproximate of total receive enteries posted
+ * @rvt_rwqe: struct of receive work request queue entry
+ *
+ * This structure is used to contain the head pointer,
+ * tail pointer and receive work queue entries for kernel
+ * mode user.
*/
-struct rvt_rwqe {
- u64 wr_id;
- u8 num_sge;
- struct ib_sge sg_list[0];
+struct rvt_krwq {
+ spinlock_t p_lock; /* protect producer */
+ u32 head; /* new work requests posted to the head */
+
+ /* protect consumer */
+ spinlock_t c_lock ____cacheline_aligned_in_smp;
+ u32 tail; /* receives pull requests from here. */
+ u32 count; /* approx count of receive entries posted */
+ struct rvt_rwqe *curr_wq;
+ struct rvt_rwqe wq[];
};
/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
+ * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah
+ * @swqe: valid Send WQE
+ *
*/
-struct rvt_rwq {
- u32 head; /* new work requests posted to the head */
- u32 tail; /* receives pull requests from here. */
- struct rvt_rwqe wq[0];
-};
+static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe)
+{
+ return ibah_to_rvtah(swqe->ud_wr.wr.ah);
+}
+
+/**
+ * rvt_get_swqe_ah_attr - Return the cached ah attribute information
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.attr;
+}
+
+/**
+ * rvt_get_swqe_remote_qpn - Access the remote QPN value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.remote_qpn;
+}
+
+/**
+ * rvt_get_swqe_remote_qkey - Acces the remote qkey value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.remote_qkey;
+}
+
+/**
+ * rvt_get_swqe_pkey_index - Access the pkey index
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe)
+{
+ return swqe->ud_wr.wr.pkey_index;
+}
struct rvt_rq {
struct rvt_rwq *wq;
+ struct rvt_krwq *kwq;
u32 size; /* size of RWQE array */
u8 max_sge;
/* protect changes in this struct */
@@ -211,20 +279,6 @@
};
/*
- * This structure is used by rvt_mmap() to validate an offset
- * when an mmap() request is made. The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct rvt_mmap_info {
- struct list_head pending_mmaps;
- struct ib_ucontext *context;
- void *obj;
- __u64 offset;
- struct kref ref;
- unsigned size;
-};
-
-/*
* This structure holds the information that the send tasklet needs
* to send a RDMA read response or atomic operation.
*/
@@ -235,6 +289,7 @@
u32 lpsn;
u8 opcode;
u8 sent;
+ void *priv;
};
#define RC_QP_SCALING_INTERVAL 5
@@ -244,6 +299,7 @@
#define RVT_OPERATION_ATOMIC_SGE 0x00000004
#define RVT_OPERATION_LOCAL 0x00000008
#define RVT_OPERATION_USE_RESERVE 0x00000010
+#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
@@ -373,6 +429,7 @@
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_acked_ack_queue; /* index into s_ack_queue[] */
struct rvt_sge_state s_ack_rdma_sge;
struct timer_list s_timer;
@@ -395,6 +452,16 @@
u32 limit;
};
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
#define RVT_QPN_MAX BIT(24)
#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
@@ -473,7 +540,7 @@
static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
{
return (struct rvt_rwqe *)
- ((char *)rq->wq->wq +
+ ((char *)rq->kwq->curr_wq +
(sizeof(struct rvt_rwqe) +
rq->max_sge * sizeof(struct ib_sge)) * n);
}
@@ -541,7 +608,7 @@
/**
* rvt_qp_wqe_unreserve - clean reserved operation
* @qp - the rvt qp
- * @wqe - the send wqe
+ * @flags - send wqe flags
*
* This decrements the reserve use count.
*
@@ -553,11 +620,9 @@
* the compiler does not juggle the order of the s_last
* ring index and the decrementing of s_reserved_used.
*/
-static inline void rvt_qp_wqe_unreserve(
- struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
{
- if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+ if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
atomic_dec(&qp->s_reserved_used);
/* insure no compiler re-order up to s_last change */
smp_mb__after_atomic();
@@ -566,42 +631,6 @@
extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
-/**
- * rvt_qp_swqe_complete() - insert send completion
- * @qp - the qp
- * @wqe - the send wqe
- * @status - completion status
- *
- * Insert a send completion into the completion
- * queue if the qp indicates it should be done.
- *
- * See IBTA 10.7.3.1 for info on completion
- * control.
- */
-static inline void rvt_qp_swqe_complete(
- struct rvt_qp *qp,
- struct rvt_swqe *wqe,
- enum ib_wc_opcode opcode,
- enum ib_wc_status status)
-{
- if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
- return;
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = opcode;
- wc.qp = &qp->ibqp;
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
-}
-
/*
* Compare the lower 24 bits of the msn values.
* Returns an integer <, ==, or > than zero.
@@ -629,6 +658,16 @@
void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
/**
+ * rvt_restart_sge - rewind the sge state for a wqe
+ * @ss: the sge state pointer
+ * @wqe: the wqe to rewind
+ * @len: the data length from the start of the wqe in bytes
+ *
+ * Returns the remaining data length.
+ */
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
+
+/**
* @qp - the qp pair
* @len - the length
*
@@ -664,19 +703,206 @@
return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
}
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+ struct rvt_ibport *rvp,
+ u32 qpn) __must_hold(RCU)
+{
+ struct rvt_qp *qp = NULL;
+
+ if (unlikely(qpn <= 1)) {
+ qp = rcu_dereference(rvp->qp[qpn]);
+ } else {
+ u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+ for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
+ if (qp->ibqp.qp_num == qpn)
+ break;
+ }
+ return qp;
+}
+
+/**
+ * rvt_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
+ * Modify a potentially already running retry timer
+ */
+static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
+ (qp->timeout_jiffies << shift));
+}
+
+static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+{
+ return rvt_mod_retry_timer_ext(qp, 0);
+}
+
+/**
+ * rvt_put_qp_swqe - drop refs held by swqe
+ * @qp: the send qp
+ * @wqe: the send wqe
+ *
+ * This drops any references held by the swqe
+ */
+static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ rvt_put_swqe(wqe);
+ if (qp->allowed_ops == IB_OPCODE_UD)
+ rdma_destroy_ah_attr(wqe->ud_wr.attr);
+}
+
+/**
+ * rvt_qp_sqwe_incr - increment ring index
+ * @qp: the qp
+ * @val: the starting value
+ *
+ * Return: the new value wrapping as appropriate
+ */
+static inline u32
+rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
+{
+ if (++val >= qp->s_size)
+ val = 0;
+ return val;
+}
+
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+/**
+ * rvt_recv_cq - add a new entry to completion queue
+ * by receive queue
+ * @qp: receive queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * receive queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc,
+ bool solicited)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq);
+
+ if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+ rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_send_cq - add a new entry to completion queue
+ * by send queue
+ * @qp: send queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * send queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
+ bool solicited)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq);
+
+ if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+ rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_qp_complete_swqe - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @opcode - wc operation (driver dependent)
+ * @status - completion status
+ *
+ * Update the s_last information, and then insert a send
+ * completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ *
+ * Return: new last
+ */
+static inline u32
+rvt_qp_complete_swqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ enum ib_wc_opcode opcode,
+ enum ib_wc_status status)
+{
+ bool need_completion;
+ u64 wr_id;
+ u32 byte_len, last;
+ int flags = wqe->wr.send_flags;
+
+ rvt_qp_wqe_unreserve(qp, flags);
+ rvt_put_qp_swqe(qp, wqe);
+
+ need_completion =
+ !(flags & RVT_SEND_RESERVE_USED) &&
+ (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+ (flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS);
+ if (need_completion) {
+ wr_id = wqe->wr.wr_id;
+ byte_len = wqe->length;
+ /* above fields required before writing s_last */
+ }
+ last = rvt_qp_swqe_incr(qp, qp->s_last);
+ /* see rvt_qp_is_avail() */
+ smp_store_release(&qp->s_last, last);
+ if (need_completion) {
+ struct ib_wc w = {
+ .wr_id = wr_id,
+ .status = status,
+ .opcode = opcode,
+ .qp = &qp->ibqp,
+ .byte_len = byte_len,
+ };
+ rvt_send_cq(qp, &w, status != IB_WC_SUCCESS);
+ }
+ return last;
+}
+
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only);
void rvt_comm_est(struct rvt_qp *qp);
-int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
unsigned long rvt_rnr_tbl_to_usec(u32 index);
enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
void rvt_del_timers_sync(struct rvt_qp *qp);
void rvt_stop_rc_timers(struct rvt_qp *qp);
-void rvt_add_retry_timer(struct rvt_qp *qp);
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
+static inline void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+ rvt_add_retry_timer_ext(qp, 0);
+}
+
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+ void *data, u32 length,
+ bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
/**
* struct rvt_qp_iter - the iterator for QPs
@@ -700,6 +926,88 @@
int n;
};
+/**
+ * ib_cq_tail - Return tail index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get tail
+ * of cq buffer.
+ */
+static inline u32 ib_cq_tail(struct ib_cq *send_cq)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+ return ibcq_to_rvtcq(send_cq)->ip ?
+ RDMA_READ_UAPI_ATOMIC(cq->queue->tail) :
+ ibcq_to_rvtcq(send_cq)->kqueue->tail;
+}
+
+/**
+ * ib_cq_head - Return head index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get head
+ * of cq buffer.
+ */
+static inline u32 ib_cq_head(struct ib_cq *send_cq)
+{
+ struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+ return ibcq_to_rvtcq(send_cq)->ip ?
+ RDMA_READ_UAPI_ATOMIC(cq->queue->head) :
+ ibcq_to_rvtcq(send_cq)->kqueue->head;
+}
+
+/**
+ * rvt_free_rq - free memory allocated for rvt_rq struct
+ * @rvt_rq: request queue data structure
+ *
+ * This function should only be called if the rvt_mmap_info()
+ * has not succeeded.
+ */
+static inline void rvt_free_rq(struct rvt_rq *rq)
+{
+ kvfree(rq->kwq);
+ rq->kwq = NULL;
+ vfree(rq->wq);
+ rq->wq = NULL;
+}
+
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ lockdep_assert_held(&qp->s_lock);
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+ qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ rvp->n_rc_crwaits++;
+ return false;
+ }
+ return true;
+}
+
struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
u64 v,
void (*cb)(struct rvt_qp *qp, u64 v));
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33..83df1ec 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -7,12 +7,15 @@
#define _RDMA_RESTRACK_H_
#include <linux/typecheck.h>
-#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/kref.h>
#include <linux/completion.h>
#include <linux/sched/task.h>
#include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
+
+struct ib_device;
+struct sk_buff;
/**
* enum rdma_restrack_type - HW objects to track
@@ -39,36 +42,19 @@
*/
RDMA_RESTRACK_MR,
/**
+ * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+ */
+ RDMA_RESTRACK_CTX,
+ /**
+ * @RDMA_RESTRACK_COUNTER: Statistic Counter
+ */
+ RDMA_RESTRACK_COUNTER,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
};
-#define RDMA_RESTRACK_HASH_BITS 8
-struct rdma_restrack_entry;
-
-/**
- * struct rdma_restrack_root - main resource tracking management
- * entity, per-device
- */
-struct rdma_restrack_root {
- /*
- * @rwsem: Read/write lock to protect lists
- */
- struct rw_semaphore rwsem;
- /**
- * @hash: global database for all resources per-device
- */
- DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
- /**
- * @fill_res_entry: driver-specific fill function
- *
- * Allows rdma drivers to add their own restrack attributes.
- */
- int (*fill_res_entry)(struct sk_buff *msg,
- struct rdma_restrack_entry *entry);
-};
-
/**
* struct rdma_restrack_entry - metadata per-entry
*/
@@ -105,42 +91,24 @@
*/
const char *kern_name;
/**
- * @node: hash table entry
- */
- struct hlist_node node;
- /**
* @type: various objects in restrack database
*/
enum rdma_restrack_type type;
+ /**
+ * @user: user resource
+ */
+ bool user;
+ /**
+ * @id: ID to expose to users
+ */
+ u32 id;
};
-/**
- * rdma_restrack_init() - initialize resource tracking
- * @res: resource tracking root
- */
-void rdma_restrack_init(struct rdma_restrack_root *res);
+int rdma_restrack_count(struct ib_device *dev,
+ enum rdma_restrack_type type);
-/**
- * rdma_restrack_clean() - clean resource tracking
- * @res: resource tracking root
- */
-void rdma_restrack_clean(struct rdma_restrack_root *res);
-
-/**
- * rdma_restrack_count() - the current usage of specific object
- * @res: resource entry
- * @type: actual type of object to operate
- * @ns: PID namespace
- */
-int rdma_restrack_count(struct rdma_restrack_root *res,
- enum rdma_restrack_type type,
- struct pid_namespace *ns);
-
-/**
- * rdma_restrack_add() - add object to the reource tracking database
- * @res: resource entry
- */
-void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_kadd(struct rdma_restrack_entry *res);
+void rdma_restrack_uadd(struct rdma_restrack_entry *res);
/**
* rdma_restrack_del() - delete object from the reource tracking database
@@ -155,7 +123,7 @@
*/
static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
{
- return !res->task;
+ return !res->user;
}
/**
@@ -173,16 +141,10 @@
/**
* rdma_restrack_set_task() - set the task for this resource
* @res: resource entry
- * @task: task struct
+ * @caller: kernel name, the current task will be used if the caller is NULL.
*/
-static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- struct task_struct *task)
-{
- if (res->task)
- put_task_struct(res->task);
- get_task_struct(task);
- res->task = task;
-}
+void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+ const char *caller);
/*
* Helper functions for rdma drivers when filling out
@@ -194,4 +156,7 @@
int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value);
int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name,
u64 value);
+struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
+ enum rdma_restrack_type type,
+ u32 id);
#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index a3cbbc7..6ad9dc8 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _RDMA_RW_H
#define _RDMA_RW_H
@@ -47,15 +39,6 @@
struct ib_send_wr inv_wr;
struct ib_mr *mr;
} *reg;
-
- struct {
- struct rdma_rw_reg_ctx data;
- struct rdma_rw_reg_ctx prot;
- struct ib_send_wr sig_inv_wr;
- struct ib_mr *sig_mr;
- struct ib_sge sig_sge;
- struct ib_sig_handover_wr sig_wr;
- } *sig;
};
};
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
new file mode 100644
index 0000000..d16b0fc
--- /dev/null
+++ b/include/rdma/signature.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_SIGNATURE_H_
+#define _RDMA_SIGNATURE_H_
+
+#include <linux/types.h>
+
+enum ib_signature_prot_cap {
+ IB_PROT_T10DIF_TYPE_1 = 1,
+ IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+ IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+ IB_GUARD_T10DIF_CRC = 1,
+ IB_GUARD_T10DIF_CSUM = 1 << 1,
+};
+
+/**
+ * enum ib_signature_type - Signature types
+ * @IB_SIG_TYPE_NONE: Unprotected.
+ * @IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+ IB_SIG_TYPE_NONE,
+ IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types
+ * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * @IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+ IB_T10DIF_CRC,
+ IB_T10DIF_CSUM,
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ * domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+ enum ib_t10_dif_bg_type bg_type;
+ u16 pi_interval;
+ u16 bg;
+ u16 app_tag;
+ u32 ref_tag;
+ bool ref_remap;
+ bool app_escape;
+ bool ref_escape;
+ u16 apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ * be used to set domain layout.
+ */
+struct ib_sig_domain {
+ enum ib_signature_type sig_type;
+ union {
+ struct ib_t10_dif_domain dif;
+ } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout descriptor.
+ * @wire: wire domain layout descriptor.
+ * @meta_length: metadata length
+ */
+struct ib_sig_attrs {
+ u8 check_mask;
+ struct ib_sig_domain mem;
+ struct ib_sig_domain wire;
+ int meta_length;
+};
+
+enum ib_sig_err_type {
+ IB_SIG_BAD_GUARD,
+ IB_SIG_BAD_REFTAG,
+ IB_SIG_BAD_APPTAG,
+};
+
+/*
+ * Signature check masks (8 bytes in total) according to the T10-PI standard:
+ * -------- -------- ------------
+ * | GUARD | APPTAG | REFTAG |
+ * | 2B | 2B | 4B |
+ * -------- -------- ------------
+ */
+enum {
+ IB_SIG_CHECK_GUARD = 0xc0,
+ IB_SIG_CHECK_APPTAG = 0x30,
+ IB_SIG_CHECK_REFTAG = 0x0f,
+};
+
+/*
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+ enum ib_sig_err_type err_type;
+ u32 expected;
+ u32 actual;
+ u64 sig_err_offset;
+ u32 key;
+};
+
+#endif /* _RDMA_SIGNATURE_H_ */
diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h
new file mode 100644
index 0000000..08fe47c
--- /dev/null
+++ b/include/rdma/tid_rdma_defs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef TID_RDMA_DEFS_H
+#define TID_RDMA_DEFS_H
+
+#include <rdma/ib_pack.h>
+
+struct tid_rdma_read_req {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ struct ib_reth reth;
+ __be32 tid_flow_psn;
+ __be32 tid_flow_qp;
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_read_resp {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ __be32 aeth;
+ __be32 reserved[4];
+ __be32 verbs_psn;
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_write_req {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ struct ib_reth reth;
+ __be32 reserved[2];
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_write_resp {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ __be32 aeth;
+ __be32 reserved[3];
+ __be32 tid_flow_psn;
+ __be32 tid_flow_qp;
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_write_data {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ __be32 reserved[6];
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_resync {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ __be32 reserved[6];
+ __be32 verbs_qp;
+};
+
+struct tid_rdma_ack {
+ __le32 kdeth0;
+ __le32 kdeth1;
+ __be32 aeth;
+ __be32 reserved[2];
+ __be32 tid_flow_psn;
+ __be32 verbs_psn;
+ __be32 tid_flow_qp;
+ __be32 verbs_qp;
+};
+
+/*
+ * TID RDMA Opcodes
+ */
+#define IB_OPCODE_TID_RDMA 0xe0
+enum {
+ IB_OPCODE_WRITE_REQ = 0x0,
+ IB_OPCODE_WRITE_RESP = 0x1,
+ IB_OPCODE_WRITE_DATA = 0x2,
+ IB_OPCODE_WRITE_DATA_LAST = 0x3,
+ IB_OPCODE_READ_REQ = 0x4,
+ IB_OPCODE_READ_RESP = 0x5,
+ IB_OPCODE_RESYNC = 0x6,
+ IB_OPCODE_ACK = 0x7,
+
+ IB_OPCODE(TID_RDMA, WRITE_REQ),
+ IB_OPCODE(TID_RDMA, WRITE_RESP),
+ IB_OPCODE(TID_RDMA, WRITE_DATA),
+ IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
+ IB_OPCODE(TID_RDMA, READ_REQ),
+ IB_OPCODE(TID_RDMA, READ_RESP),
+ IB_OPCODE(TID_RDMA, RESYNC),
+ IB_OPCODE(TID_RDMA, ACK),
+};
+
+#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
+
+/*
+ * Define TID RDMA specific WR opcodes. The ib_wr_opcode
+ * enum already provides some reserved values for use by
+ * low level drivers. Two of those are used but renamed
+ * to be more descriptive.
+ */
+#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
+#define IB_WR_TID_RDMA_READ IB_WR_RESERVED2
+
+#endif /* TID_RDMA_DEFS_H */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3..28570ac 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@
UVERBS_ATTR_TYPE_IDR,
UVERBS_ATTR_TYPE_FD,
UVERBS_ATTR_TYPE_ENUM_IN,
+ UVERBS_ATTR_TYPE_IDRS_ARRAY,
};
enum uverbs_obj_access {
@@ -78,6 +79,8 @@
*/
u8 alloc_and_copy:1;
u8 mandatory:1;
+ /* True if this is from UVERBS_ATTR_UHW */
+ u8 is_udata:1;
union {
struct {
@@ -101,7 +104,7 @@
} enum_def;
} u;
- /* This weird split of the enum lets us remove some padding */
+ /* This weird split lets us remove some padding */
union {
struct {
/*
@@ -111,6 +114,17 @@
*/
const struct uverbs_attr_spec *ids;
} enum_def;
+
+ struct {
+ /*
+ * higher bits mean the namespace and lower bits mean
+ * the type id within the namespace.
+ */
+ u16 obj_type;
+ u16 min_len;
+ u16 max_len;
+ u8 access;
+ } objs_arr;
} u2;
};
@@ -128,6 +142,13 @@
*
* The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
* the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
*/
enum uapi_radix_data {
UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
@@ -135,12 +156,16 @@
UVERBS_API_ATTR_KEY_BITS = 6,
UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+ UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
UVERBS_API_METHOD_KEY_BITS = 5,
UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
- UVERBS_API_METHOD_KEY_NUM_CORE = 24,
- UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
- UVERBS_API_METHOD_KEY_NUM_CORE,
+ UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+ UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+ UVERBS_API_METHOD_KEY_NUM_DRIVER =
+ (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+ UVERBS_API_METHOD_KEY_NUM_CORE,
UVERBS_API_METHOD_KEY_MASK = GENMASK(
UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
UVERBS_API_METHOD_KEY_SHIFT),
@@ -193,7 +218,22 @@
return id << UVERBS_API_METHOD_KEY_SHIFT;
}
-static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+ if (id >= UVERBS_API_WRITE_KEY_NUM)
+ return UVERBS_API_KEY_ERR;
+ return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
{
return attr_key &
(UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
@@ -201,10 +241,23 @@
static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) == 0;
}
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+ return (key & UVERBS_API_METHOD_KEY_MASK) ==
+ UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
{
/* 0 is the method slot itself */
@@ -234,9 +287,12 @@
return id;
}
+/* Only true for ioctl methods */
static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
{
- return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+ unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+ return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
(key & UVERBS_API_ATTR_KEY_MASK) != 0;
}
@@ -251,6 +307,11 @@
return attr_key - 1;
}
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+ return attr_bkey + 1;
+}
+
/*
* =======================================
* Verbs definitions
@@ -268,8 +329,7 @@
u32 flags;
size_t num_attrs;
const struct uverbs_attr_def * const (*attrs)[];
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
};
struct uverbs_object_def {
@@ -279,11 +339,132 @@
const struct uverbs_method_def * const (*methods)[];
};
-struct uverbs_object_tree_def {
- size_t num_objects;
- const struct uverbs_object_def * const (*objects)[];
+enum uapi_definition_kind {
+ UAPI_DEF_END = 0,
+ UAPI_DEF_OBJECT_START,
+ UAPI_DEF_WRITE,
+ UAPI_DEF_CHAIN_OBJ_TREE,
+ UAPI_DEF_CHAIN,
+ UAPI_DEF_IS_SUPPORTED_FUNC,
+ UAPI_DEF_IS_SUPPORTED_DEV_FN,
};
+enum uapi_definition_scope {
+ UAPI_SCOPE_OBJECT = 1,
+ UAPI_SCOPE_METHOD = 2,
+};
+
+struct uapi_definition {
+ u8 kind;
+ u8 scope;
+ union {
+ struct {
+ u16 object_id;
+ } object_start;
+ struct {
+ u16 command_num;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+ } write;
+ };
+
+ union {
+ bool (*func_is_supported)(struct ib_device *device);
+ int (*func_write)(struct uverbs_attr_bundle *attrs);
+ const struct uapi_definition *chain;
+ const struct uverbs_object_def *chain_obj_tree;
+ size_t needs_fn_offset;
+ };
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...) \
+ { \
+ .kind = UAPI_DEF_OBJECT_START, \
+ .object_start = { .object_id = _object_id }, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 0, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \
+ { \
+ .kind = UAPI_DEF_WRITE, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .write = { .is_ex = 1, .command_num = _command_num }, \
+ .func_write = _func, \
+ _cmd_desc, \
+ }, \
+ ##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_OBJECT, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device_ops, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \
+ .scope = UAPI_SCOPE_METHOD, \
+ .needs_fn_offset = \
+ offsetof(struct ib_device_ops, ibdev_fn) + \
+ BUILD_BUG_ON_ZERO( \
+ sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \
+ sizeof(void *)), \
+ }
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \
+ { \
+ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \
+ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \
+ }
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var) \
+ { \
+ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \
+ }
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \
+ { \
+ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \
+ .object_start = { .object_id = _object_enum }, \
+ .chain_obj_tree = _object_ptr, \
+ }, \
+ ##__VA_ARGS__
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \
+ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \
+ ##__VA_ARGS__)
+
/*
* =======================================
* Attribute Specifications
@@ -323,6 +504,33 @@
#define UA_MANDATORY .mandatory = 1
#define UA_OPTIONAL .mandatory = 0
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+ ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_min_len) == 0 || \
+ (_max_len) > \
+ PAGE_SIZE / sizeof(void *) || \
+ (_min_len) > (_max_len) || \
+ (_access) == UVERBS_ACCESS_NEW || \
+ (_access) == UVERBS_ACCESS_DESTROY), \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \
+ .u2.objs_arr.obj_type = _idr_type, \
+ .u2.objs_arr.access = _access, \
+ .u2.objs_arr.min_len = _min_len, \
+ .u2.objs_arr.max_len = _max_len, \
+ __VA_ARGS__ } })
+
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
(&(const struct uverbs_attr_def){ \
.id = _attr_id, \
@@ -365,6 +573,15 @@
__VA_ARGS__ }, \
})
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \
+ UVERBS_ATTR_PTR_IN( \
+ _attr_id, \
+ UVERBS_ATTR_SIZE( \
+ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \
+ sizeof(u64)), \
+ __VA_ARGS__)
+
/*
* An input value that is a bitwise combination of values of _enum_type.
* This permits the flag value to be passed as either a u32 or u64, it must
@@ -386,25 +603,12 @@
#define UVERBS_ATTR_UHW() \
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL), \
+ UA_OPTIONAL, \
+ .is_udata = 1), \
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
UVERBS_ATTR_MIN_SIZE(0), \
- UA_OPTIONAL)
-
-/*
- * =======================================
- * Declaration helpers
- * =======================================
- */
-
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
- static const struct uverbs_object_def *const _name##_ptr[] = { \
- __VA_ARGS__, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = ARRAY_SIZE(_name##_ptr), \
- .objects = &_name##_ptr, \
- }
+ UA_OPTIONAL, \
+ .is_udata = 1)
/* =================================================
* Parsing infrastructure
@@ -431,15 +635,24 @@
const struct uverbs_api_attr *attr_elm;
};
+struct uverbs_objs_arr_attr {
+ struct ib_uobject **uobjects;
+ u16 len;
+};
+
struct uverbs_attr {
union {
struct uverbs_ptr_attr ptr_attr;
struct uverbs_obj_attr obj_attr;
+ struct uverbs_objs_arr_attr objs_arr_attr;
};
};
struct uverbs_attr_bundle {
+ struct ib_udata driver_udata;
+ struct ib_udata ucore;
struct ib_uverbs_file *ufile;
+ struct ib_ucontext *context;
DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
struct uverbs_attr attrs[];
};
@@ -451,6 +664,23 @@
attrs_bundle->attr_present);
}
+/**
+ * rdma_udata_to_drv_context - Helper macro to get the driver's context out of
+ * ib_udata which is embedded in uverbs_attr_bundle.
+ *
+ * If udata is not NULL this cannot fail. Otherwise a NULL udata will result
+ * in a NULL ucontext pointer, as a safety precaution. Callers should be using
+ * 'udata' to determine if the driver call is in user or kernel mode, not
+ * 'ucontext'.
+ *
+ */
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \
+ (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \
+ driver_udata) \
+ ->context, \
+ drv_dev_struct, member) : \
+ (drv_dev_struct *)NULL)
+
#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
@@ -507,6 +737,53 @@
return attr->ptr_attr.len;
}
+/*
+ * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr
+ * attribute.
+ * @attrs: The attribute bundle
+ * @idx: The ID of the attribute
+ * @elem_size: The size of the element in the array
+ */
+static inline int
+uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx,
+ size_t elem_size)
+{
+ int size = uverbs_attr_get_len(attrs, idx);
+
+ if (size < 0)
+ return size;
+
+ if (size % elem_size)
+ return -EINVAL;
+
+ return size / elem_size;
+}
+
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ * the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+ struct ib_uobject ***arr)
+{
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(attrs_bundle, attr_idx);
+
+ if (IS_ERR(attr)) {
+ *arr = NULL;
+ return 0;
+ }
+
+ *arr = attr->objs_arr_attr.uobjects;
+
+ return attr->objs_arr_attr.len;
+}
+
static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
{
return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -582,6 +859,12 @@
#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+ return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, u64 allowed_bits);
@@ -603,6 +886,11 @@
{
return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
}
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val);
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size);
#else
static inline int
uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +919,40 @@
{
return ERR_PTR(-EINVAL);
}
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ return -EINVAL;
+}
+static inline int
+uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ return -EINVAL;
+}
#endif
+#define uverbs_get_const(_to, _attrs_bundle, _idx) \
+ ({ \
+ s64 _val; \
+ int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), NULL); \
+ (*_to) = _val; \
+ _ret; \
+ })
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
+ ({ \
+ s64 _val; \
+ s64 _def_val = _default; \
+ int _ret = \
+ _uverbs_get_const(&_val, _attrs_bundle, _idx, \
+ type_min(typeof(*_to)), \
+ type_max(typeof(*_to)), &_def_val); \
+ (*_to) = _val; \
+ _ret; \
+ })
#endif
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index b3b2173..3447bfe 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,7 +43,7 @@
#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
-#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
+#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
/* These are static so they do not need to be qualified */
#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
@@ -102,18 +102,11 @@
#define ADD_UVERBS_METHODS(_name, _object_id, ...) \
static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
_object_id)[] = { __VA_ARGS__ }; \
- static const struct uverbs_object_def _name##_struct = { \
+ static const struct uverbs_object_def _name = { \
.id = _object_id, \
.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
.methods = &UVERBS_OBJECT_METHODS(_object_id) \
- }; \
- static const struct uverbs_object_def *const _name##_ptrs[] = { \
- &_name##_struct, \
- }; \
- static const struct uverbs_object_tree_def _name = { \
- .num_objects = 1, \
- .objects = &_name##_ptrs, \
- }
+ };
/* Used by drivers to declare a complete parsing tree for a single method that
* differs only in having additional driver specific attributes.
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231..05eabfd 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,15 +37,6 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
-#else
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return NULL;
-}
-#endif
-
/* Returns _id, or causes a compile error if _id is not a u32.
*
* The uobj APIs should only be used with the write based uAPI to access
@@ -54,17 +45,18 @@
*/
#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
-#define uobj_get_type(_ufile, _object) \
- uapi_get_object((_ufile)->device->uapi, _object)
+#define uobj_get_type(_attrs, _object) \
+ uapi_get_object((_attrs)->ufile->device->uapi, _object)
-#define uobj_get_read(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
- _uobj_check_id(_id), UVERBS_LOOKUP_READ)
+#define uobj_get_read(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_READ, \
+ _attrs)
-#define ufd_get_read(_type, _fdnum, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
+#define ufd_get_read(_type, _fdnum, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
(_fdnum)*typecheck(s32, _fdnum), \
- UVERBS_LOOKUP_READ)
+ UVERBS_LOOKUP_READ, _attrs)
static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
{
@@ -72,26 +64,27 @@
return NULL;
return uobj->object;
}
-#define uobj_get_obj_read(_object, _type, _id, _ufile) \
+#define uobj_get_obj_read(_object, _type, _id, _attrs) \
((struct ib_##_object *)_uobj_get_obj_read( \
- uobj_get_read(_type, _id, _ufile)))
+ uobj_get_read(_type, _id, _attrs)))
-#define uobj_get_write(_type, _id, _ufile) \
- rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
- _uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
+#define uobj_get_write(_type, _id, _attrs) \
+ rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE, \
+ _attrs)
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res);
-#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \
- __uobj_perform_destroy(uobj_get_type(_ufile, _type), \
- _uobj_check_id(_id), _ufile, _success_res)
+ struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs) \
+ __uobj_perform_destroy(uobj_get_type(_attrs, _type), \
+ _uobj_check_id(_id), _attrs)
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile);
+ u32 id, struct uverbs_attr_bundle *attrs);
-#define uobj_get_destroy(_type, _id, _ufile) \
- __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \
- _ufile)
+#define uobj_get_destroy(_type, _id, _attrs) \
+ __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \
+ _attrs)
static inline void uobj_put_destroy(struct ib_uobject *uobj)
{
@@ -111,34 +104,99 @@
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
}
-static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
- int success_res)
+static inline int __must_check
+uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
{
- int ret = rdma_alloc_commit_uobject(uobj);
+ int ret = rdma_alloc_commit_uobject(uobj, attrs);
if (ret)
return ret;
- return success_res;
+ return 0;
}
-static inline void uobj_alloc_abort(struct ib_uobject *uobj)
+static inline void uobj_alloc_abort(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
{
- rdma_alloc_abort_uobject(uobj);
+ rdma_alloc_abort_uobject(uobj, attrs);
}
static inline struct ib_uobject *
-__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
- struct ib_device **ib_dev)
+__uobj_alloc(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
{
- struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+ struct ib_uobject *uobj =
+ rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
if (!IS_ERR(uobj))
- *ib_dev = uobj->context->device;
+ *ib_dev = attrs->context->device;
return uobj;
}
-#define uobj_alloc(_type, _ufile, _ib_dev) \
- __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
+#define uobj_alloc(_type, _attrs, _ib_dev) \
+ __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
+
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+ struct ib_uobject *uobj,
+ struct ib_device *ib_dev,
+ enum ib_flow_action_type type)
+{
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = type;
+ action->uobject = uobj;
+ uobj->object = action;
+}
+
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
+};
+
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+ struct ib_qp *qp, struct ib_device *device,
+ struct ib_uflow_resources *uflow_res)
+{
+ struct ib_uflow_object *uflow;
+
+ uobj->object = ibflow;
+ ibflow->uobject = uobj;
+
+ if (qp) {
+ atomic_inc(&qp->usecnt);
+ ibflow->qp = qp;
+ }
+
+ ibflow->device = device;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
+}
+
+struct uverbs_api_object {
+ const struct uverbs_obj_type *type_attrs;
+ const struct uverbs_obj_type_class *type_class;
+ u8 disabled:1;
+ u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+ return uobj->uapi_object->id;
+}
#endif
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index acb1bfa..d57a5ba 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -95,7 +95,8 @@
void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
/* This does not consume the kref on uobj */
int __must_check (*destroy_hw)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
void (*remove_handle)(struct ib_uobject *uobj);
u8 needs_kfree_rcu;
};
@@ -126,18 +127,23 @@
* completely unchanged.
*/
int __must_check (*destroy_object)(struct ib_uobject *uobj,
- enum rdma_remove_reason why);
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
};
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uverbs_file *ufile, s64 id,
- enum rdma_lookup_mode mode);
+ enum rdma_lookup_mode mode,
+ struct uverbs_attr_bundle *attrs);
void rdma_lookup_put_uobject(struct ib_uobject *uobj,
enum rdma_lookup_mode mode);
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile);
-void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
+ struct ib_uverbs_file *ufile,
+ struct uverbs_attr_bundle *attrs);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs);
struct uverbs_obj_fd_type {
/*
@@ -157,6 +163,7 @@
extern const struct uverbs_obj_type_class uverbs_idr_class;
extern const struct uverbs_obj_type_class uverbs_fd_class;
+void uverbs_close_fd(struct file *f);
#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
sizeof(char))