Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 50be240..8315394 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -52,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -60,6 +61,7 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "srq.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
@@ -82,10 +84,13 @@
struct mlx5_ib_event_work {
struct work_struct work;
- struct mlx5_core_dev *dev;
- void *context;
- enum mlx5_dev_event event;
- unsigned long param;
+ union {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_multiport_info *mpi;
+ };
+ bool is_slave;
+ unsigned int event;
+ void *param;
};
enum {
@@ -146,12 +151,40 @@
int ret;
memset(&attr, 0, sizeof(attr));
- ret = ibdev->query_port(ibdev, port_num, &attr);
+ ret = ibdev->ops.query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
}
+static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
+ struct net_device *ndev,
+ u8 *port_num)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct net_device *rep_ndev;
+ struct mlx5_ib_port *port;
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ port = &dev->port[i];
+ if (!port->rep)
+ continue;
+
+ read_lock(&port->roce.netdev_lock);
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
+ port->rep->vport);
+ if (rep_ndev == ndev) {
+ read_unlock(&port->roce.netdev_lock);
+ *port_num = i + 1;
+ return &port->roce;
+ }
+ read_unlock(&port->roce.netdev_lock);
+ }
+
+ return NULL;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -168,21 +201,20 @@
switch (event) {
case NETDEV_REGISTER:
- case NETDEV_UNREGISTER:
+ /* Should already be registered during the load */
+ if (ibdev->is_rep)
+ break;
write_lock(&roce->netdev_lock);
- if (ibdev->rep) {
- struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
- struct net_device *rep_ndev;
+ if (ndev->dev.parent == mdev->device)
+ roce->netdev = ndev;
+ write_unlock(&roce->netdev_lock);
+ break;
- rep_ndev = mlx5_ib_get_rep_netdev(esw,
- ibdev->rep->vport);
- if (rep_ndev == ndev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
- } else if (ndev->dev.parent == &mdev->pdev->dev) {
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
- }
+ case NETDEV_UNREGISTER:
+ /* In case of reps, ib device goes away before the netdevs */
+ write_lock(&roce->netdev_lock);
+ if (roce->netdev == ndev)
+ roce->netdev = NULL;
write_unlock(&roce->netdev_lock);
break;
@@ -197,6 +229,10 @@
dev_put(lag_ndev);
}
+ if (ibdev->is_rep)
+ roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+ if (!roce)
+ return NOTIFY_DONE;
if ((upper == ndev || (!upper && ndev == roce->netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
@@ -249,11 +285,11 @@
/* Ensure ndev does not disappear before we invoke dev_hold()
*/
- read_lock(&ibdev->roce[port_num - 1].netdev_lock);
- ndev = ibdev->roce[port_num - 1].netdev;
+ read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
+ ndev = ibdev->port[port_num - 1].roce.netdev;
if (ndev)
dev_hold(ndev);
- read_unlock(&ibdev->roce[port_num - 1].netdev_lock);
+ read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
out:
mlx5_ib_put_native_port_mdev(ibdev, port_num);
@@ -323,8 +359,8 @@
spin_unlock(&port->mp.mpi_lock);
}
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
- u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width)
{
switch (eth_proto_oper) {
case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
@@ -381,10 +417,73 @@
return 0;
}
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_DDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+ u8 *active_width, bool ext)
+{
+ return ext ?
+ translate_eth_ext_proto_oper(eth_proto_oper, active_speed,
+ active_width) :
+ translate_eth_legacy_proto_oper(eth_proto_oper, active_speed,
+ active_width);
+}
+
static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
struct mlx5_core_dev *mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
@@ -392,6 +491,7 @@
u16 qkey_viol_cntr;
u32 eth_prot_oper;
u8 mdev_port_num;
+ bool ext;
int err;
mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
@@ -407,17 +507,24 @@
/* Possible bad flows are checked before filling out props so in case
* of an error it will still be zeroed out.
+ * Use native port in case of reps
*/
- err = mlx5_query_port_eth_proto_oper(mdev, ð_prot_oper,
- mdev_port_num);
+ if (dev->is_rep)
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+ 1);
+ else
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+ mdev_port_num);
if (err)
goto out;
+ ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
+ eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_QDR;
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
- &props->active_width);
+ &props->active_width, ext);
props->port_cap_flags |= IB_PORT_CM_SUP;
props->ip_gids = true;
@@ -428,7 +535,7 @@
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
@@ -441,7 +548,7 @@
if (!ndev)
goto out;
- if (mlx5_lag_is_active(dev->mdev)) {
+ if (dev->lag_active) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
@@ -454,7 +561,7 @@
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
@@ -473,20 +580,17 @@
const struct ib_gid_attr *attr)
{
enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ u16 vlan_id = 0xffff;
u8 roce_version = 0;
u8 roce_l3_type = 0;
- bool vlan = false;
u8 mac[ETH_ALEN];
- u16 vlan_id = 0;
+ int ret;
if (gid) {
gid_type = attr->gid_type;
- ether_addr_copy(mac, attr->ndev->dev_addr);
-
- if (is_vlan_dev(attr->ndev)) {
- vlan = true;
- vlan_id = vlan_dev_vlan_id(attr->ndev);
- }
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+ if (ret)
+ return ret;
}
switch (gid_type) {
@@ -506,8 +610,9 @@
}
return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
- roce_l3_type, gid->raw, mac, vlan,
- vlan_id, port_num);
+ roce_l3_type, gid->raw, mac,
+ vlan_id < VLAN_CFI_MASK, vlan_id,
+ port_num);
}
static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
@@ -784,7 +889,7 @@
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -904,6 +1009,8 @@
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -915,11 +1022,11 @@
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(mdev, pg))
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
- props->odp_caps = dev->odp_caps;
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
+ props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+ props->odp_caps = dev->odp_caps;
+ }
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
@@ -939,15 +1046,19 @@
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
- props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
- props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
props->cq_caps.max_cq_moderation_count =
MLX5_MAX_CQ_COUNT;
@@ -1014,6 +1125,11 @@
if (MLX5_CAP_GEN(mdev, cqe_128_always))
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
}
if (field_avail(typeof(resp), sw_parsing_caps,
@@ -1101,6 +1217,8 @@
if (active_width & MLX5_IB_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
+ else if (active_width & MLX5_IB_WIDTH_2X)
+ *ib_width = IB_WIDTH_2X;
else if (active_width & MLX5_IB_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
else if (active_width & MLX5_IB_WIDTH_8X)
@@ -1216,6 +1334,9 @@
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 = rep->cap_mask2;
+
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
goto out;
@@ -1295,7 +1416,9 @@
{
int ret;
- /* Only link layer == ethernet is valid for representors */
+ /* Only link layer == ethernet is valid for representors
+ * and we always use port 1
+ */
ret = mlx5_query_port_roce(ibdev, port, props);
if (ret || !props)
return ret;
@@ -1564,14 +1687,57 @@
mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
}
-static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ int err = 0;
+
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td++;
+ if (qp)
+ dev->lb.qps++;
+
+ if (dev->lb.user_td == 2 ||
+ dev->lb.qps == 1) {
+ if (!dev->lb.enabled) {
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+ dev->lb.enabled = true;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+
+ return err;
+}
+
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td--;
+ if (qp)
+ dev->lb.qps--;
+
+ if (dev->lb.user_td == 1 &&
+ dev->lb.qps == 0) {
+ if (dev->lb.enabled) {
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+ dev->lb.enabled = false;
+ }
+ }
+
+ mutex_unlock(&dev->lb.mutex);
+}
+
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+ u16 uid)
{
int err;
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return 0;
- err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
if (err)
return err;
@@ -1580,45 +1746,34 @@
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return err;
- mutex_lock(&dev->lb_mutex);
- dev->user_td++;
-
- if (dev->user_td == 2)
- err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
-
- mutex_unlock(&dev->lb_mutex);
- return err;
+ return mlx5_ib_enable_lb(dev, true, false);
}
-static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+ u16 uid)
{
if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
return;
- mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
(!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
!MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
return;
- mutex_lock(&dev->lb_mutex);
- dev->user_td--;
-
- if (dev->user_td < 2)
- mlx5_nic_vport_update_local_lb(dev->mdev, false);
-
- mutex_unlock(&dev->lb_mutex);
+ mlx5_ib_disable_lb(dev, true, false);
}
-static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
@@ -1628,29 +1783,29 @@
bool lib_uar_4k;
if (!dev->ib_active)
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
else if (udata->inlen >= min_req_v2)
ver = 2;
else
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
req.total_num_bfregs = ALIGN(req.total_num_bfregs,
MLX5_NON_FP_BFREGS_PER_UAR);
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
@@ -1683,10 +1838,6 @@
/* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
}
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
bfregi = &context->bfregi;
@@ -1716,34 +1867,24 @@
if (err)
goto out_sys_pages;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
-#endif
-
- err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
- if (err)
- goto out_uars;
-
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- /* Block DEVX on Infiniband as of SELinux */
- if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
- err = -EPERM;
- goto out_td;
- }
-
- err = mlx5_ib_devx_create(dev, context);
- if (err)
- goto out_td;
+ err = mlx5_ib_devx_create(dev, true);
+ if (err < 0)
+ goto out_uars;
+ context->devx_uid = err;
}
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+
if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
if (err)
goto out_mdev;
}
- INIT_LIST_HEAD(&context->vma_private_list);
- mutex_init(&context->vma_private_list_mutex);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1819,13 +1960,21 @@
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
- return &context->ibucontext;
+ if (dev->lag_active) {
+ u8 port = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ atomic_set(&context->tx_port_affinity,
+ atomic_add_return(
+ 1, &dev->port[port].roce.tx_port_affinity));
+ }
+
+ return 0;
out_mdev:
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
- mlx5_ib_devx_destroy(dev, context);
-out_td:
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1837,29 +1986,24 @@
kfree(bfregi->count);
out_ctx:
- kfree(context);
-
- return ERR_PTR(err);
+ return err;
}
-static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
- if (context->devx_uid)
- mlx5_ib_devx_destroy(dev, context);
-
bfregi = &context->bfregi;
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
+
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
- kfree(context);
-
- return 0;
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -1869,7 +2013,7 @@
fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
+ return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
static int get_command(unsigned long offset)
@@ -1893,94 +2037,9 @@
return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
}
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA. This
- * is done through either mremap flow or split_vma (usually due to
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
- * as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original VMA size is exactly a single page, and therefore all
- * "splitting" operation will not happen to it.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
- * However need a sync with accessing the vma as part of
- * mlx5_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting.
- * The exiting case is handled explicitly as part of
- * mlx5_ib_disassociate_ucontext.
- */
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
- /* setting the vma context pointer to null in the mlx5_ib driver's
- * private data, to protect a race condition in
- * mlx5_ib_disassociate_ucontext().
- */
- mlx5_ib_vma_priv_data->vma = NULL;
- mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- list_del(&mlx5_ib_vma_priv_data->list);
- mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
- kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
- .open = mlx5_ib_vma_open,
- .close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx5_ib_ucontext *ctx)
-{
- struct mlx5_ib_vma_private_data *vma_prv;
- struct list_head *vma_head = &ctx->vma_private_list;
-
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
- if (!vma_prv)
- return -ENOMEM;
-
- vma_prv->vma = vma;
- vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
- vma->vm_private_data = vma_prv;
- vma->vm_ops = &mlx5_ib_vm_ops;
-
- mutex_lock(&ctx->vma_private_list_mutex);
- list_add(&vma_prv->list, vma_head);
- mutex_unlock(&ctx->vma_private_list_mutex);
-
- return 0;
-}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct vm_area_struct *vma;
- struct mlx5_ib_vma_private_data *vma_private, *n;
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
-
- mutex_lock(&context->vma_private_list_mutex);
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
- list) {
- vma = vma_private->vma;
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
- /* context going to be destroyed, should
- * not access ops any more.
- */
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
- vma->vm_ops = NULL;
- list_del(&vma_private->list);
- kfree(vma_private);
- }
- mutex_unlock(&context->vma_private_list_mutex);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2003,28 +2062,22 @@
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- phys_addr_t pfn;
- int err;
-
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
+ !(vma->vm_flags & VM_SHARED))
return -EINVAL;
if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
return -EOPNOTSUPP;
- if (vma->vm_flags & VM_WRITE)
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
return -EPERM;
+ vma->vm_flags &= ~VM_MAYWRITE;
- if (!dev->mdev->clock_info_page)
+ if (!dev->mdev->clock_info)
return -EOPNOTSUPP;
- pfn = page_to_pfn(dev->mdev->clock_info_page);
- err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
- vma->vm_page_prot);
- if (err)
- return err;
-
- return mlx5_ib_set_vma_data(vma, context);
+ return vm_insert_page(vma, vma->vm_start,
+ virt_to_page(dev->mdev->clock_info));
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
@@ -2114,21 +2167,15 @@
pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
- vma->vm_page_prot = prot;
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot);
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+ prot);
if (err) {
mlx5_ib_err(dev,
- "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
err, mmap_cmd2str(cmd));
- err = -EAGAIN;
goto err;
}
- err = mlx5_ib_set_vma_data(vma, context);
- if (err)
- goto err;
-
if (dyn_uar)
bfregi->sys_pages[idx] = uar_index;
return 0;
@@ -2153,24 +2200,17 @@
size_t map_size = vma->vm_end - vma->vm_start;
u32 npages = map_size >> PAGE_SHIFT;
phys_addr_t pfn;
- pgprot_t prot;
if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
page_idx + npages)
return -EINVAL;
- pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ pfn = ((dev->mdev->bar_addr +
MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
PAGE_SHIFT) +
page_idx;
- prot = pgprot_writecombine(vma->vm_page_prot);
- vma->vm_page_prot = prot;
-
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
- vma->vm_page_prot))
- return -EAGAIN;
-
- return mlx5_ib_set_vma_data(vma, mctx);
+ return rdma_user_mmap_io(context, vma, pfn, map_size,
+ pgprot_writecombine(vma->vm_page_prot));
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2197,19 +2237,18 @@
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+ vma->vm_flags &= ~VM_MAYWRITE;
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- break;
+ return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
@@ -2223,126 +2262,250 @@
return 0;
}
-struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs)
+static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
+ u32 type)
{
- u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
- struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
- phys_addr_t memic_addr;
- struct mlx5_ib_dm *dm;
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
+ return -EOPNOTSUPP;
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ if (!capable(CAP_SYS_RAWIO) ||
+ !capable(CAP_NET_RAW))
+ return -EPERM;
+
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+ return -EOPNOTSUPP;
+ break;
+ }
+
+ return 0;
+}
+
+static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
+ struct mlx5_ib_dm *dm,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
u64 start_offset;
u32 page_idx;
int err;
- dm = kzalloc(sizeof(*dm), GFP_KERNEL);
- if (!dm)
- return ERR_PTR(-ENOMEM);
+ dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
- mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
- attr->length, act_size, attr->alignment);
-
- err = mlx5_cmd_alloc_memic(memic, &memic_addr,
- act_size, attr->alignment);
+ err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
+ dm->size, attr->alignment);
if (err)
- goto err_free;
+ return err;
- start_offset = memic_addr & ~PAGE_MASK;
- page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
PAGE_SHIFT;
err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- &start_offset, sizeof(start_offset));
- if (err)
- goto err_dealloc;
-
- err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
&page_idx, sizeof(page_idx));
if (err)
goto err_dealloc;
- bitmap_set(to_mucontext(context)->dm_pages, page_idx,
- DIV_ROUND_UP(act_size, PAGE_SIZE));
+ start_offset = dm->dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_dealloc;
- dm->dev_addr = memic_addr;
+ bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
+ DIV_ROUND_UP(dm->size, PAGE_SIZE));
+
+ return 0;
+
+err_dealloc:
+ mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
+
+ return err;
+}
+
+static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+ struct mlx5_ib_dm *dm,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs,
+ int type)
+{
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
+ u64 act_size;
+ int err;
+
+ /* Allocation size must a multiple of the basic block size
+ * and a power of 2.
+ */
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ act_size = roundup_pow_of_two(act_size);
+
+ dm->size = act_size;
+ err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
+ to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+ &dm->icm_dm.obj_id);
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &dm->dev_addr, sizeof(dm->dev_addr));
+ if (err)
+ mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
+ to_mucontext(ctx)->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
+
+ return err;
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm *dm;
+ enum mlx5_ib_uapi_dm_type type;
+ int err;
+
+ err = uverbs_get_const_default(&type, attrs,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ MLX5_IB_UAPI_DM_TYPE_MEMIC);
+ if (err)
+ return ERR_PTR(err);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
+ type, attr->length, attr->alignment);
+
+ err = check_dm_type_support(to_mdev(ibdev), type);
+ if (err)
+ return ERR_PTR(err);
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->type = type;
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ err = handle_alloc_dm_memic(context, dm,
+ attr,
+ attrs);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_STEERING);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ err = handle_alloc_dm_sw_icm(context, dm,
+ attr, attrs,
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ if (err)
+ goto err_free;
return &dm->ibdm;
-err_dealloc:
- mlx5_cmd_dealloc_memic(memic, memic_addr,
- act_size);
err_free:
kfree(dm);
return ERR_PTR(err);
}
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
{
- struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
+ struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
+ struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
- u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
u32 page_idx;
int ret;
- ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
- if (ret)
- return ret;
+ switch (dm->type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
+ if (ret)
+ return ret;
- page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
- PAGE_SHIFT;
- bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
- page_idx,
- DIV_ROUND_UP(act_size, PAGE_SIZE));
+ page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+ bitmap_clear(ctx->dm_pages, page_idx,
+ DIV_ROUND_UP(dm->size, PAGE_SIZE));
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
+ if (ret)
+ return ret;
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+ dm->size, ctx->devx_uid, dm->dev_addr,
+ dm->icm_dm.obj_id);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
kfree(dm);
return 0;
}
-static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct mlx5_ib_pd *pd = to_mpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
struct mlx5_ib_alloc_pd_resp resp;
- struct mlx5_ib_pd *pd;
int err;
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u16 uid = 0;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
+ uid = context ? context->devx_uid : 0;
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ MLX5_SET(alloc_pd_in, in, uid, uid);
+ err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
+ out, sizeof(out));
+ if (err)
+ return err;
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
-
- if (context) {
+ pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
+ pd->uid = uid;
+ if (udata) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
+ return -EFAULT;
}
}
- return &pd->ibpd;
+ return 0;
}
-static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
- kfree(mpd);
-
- return 0;
+ mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
enum {
@@ -2376,10 +2539,29 @@
return match_criteria_enable;
}
-static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
}
static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
@@ -2445,30 +2627,65 @@
offsetof(typeof(filter), field) -\
sizeof(filter.field))
-static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action)
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
{
- struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
switch (maction->ib_action.type) {
case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
/* Currently only AES_GCM keymat is supported by the driver */
action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ action->action |= is_egress ?
MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ /* fall through */
default:
return -EOPNOTSUPP;
}
}
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action, u32 prev_type)
{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
@@ -2583,8 +2800,10 @@
set_tos(headers_c, headers_v,
ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
- set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
@@ -2623,9 +2842,10 @@
ib_spec->ipv6.mask.traffic_class,
ib_spec->ipv6.val.traffic_class);
- set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr);
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
set_flow_label(misc_params_c, misc_params_v,
ntohl(ib_spec->ipv6.mask.flow_label),
@@ -2646,10 +2866,8 @@
LAST_TCP_UDP_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_TCP);
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
@@ -2666,10 +2884,8 @@
LAST_TCP_UDP_FIELD))
return -EOPNOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_UDP);
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
@@ -2685,6 +2901,9 @@
if (ib_spec->gre.mask.c_ks_res0_ver)
return -EOPNOTSUPP;
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -2696,11 +2915,11 @@
ntohs(ib_spec->gre.val.protocol));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key_h),
+ gre_key.nvgre.hi),
&ib_spec->gre.mask.key,
sizeof(ib_spec->gre.mask.key));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key_h),
+ gre_key.nvgre.hi),
&ib_spec->gre.val.key,
sizeof(ib_spec->gre.val.key));
break;
@@ -2785,8 +3004,8 @@
if (ib_spec->flow_tag.tag_id >= BIT(24))
return -EINVAL;
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->has_flow_tag = true;
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -2795,7 +3014,8 @@
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
if (ret)
return ret;
break;
@@ -2876,10 +3096,11 @@
* rules would be supported, always return VALID_SPEC_NA.
*/
if (!is_crypto)
- return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+ return VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
@@ -3019,14 +3240,15 @@
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
- int num_entries, int num_groups)
+ int num_entries, int num_groups,
+ u32 flags)
{
struct mlx5_flow_table *ft;
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups,
- 0, 0);
+ 0, flags);
if (IS_ERR(ft))
return ERR_CAST(ft);
@@ -3046,26 +3268,46 @@
int max_table_size;
int num_entries;
int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
int priority;
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (ft_type == MLX5_IB_FT_TX)
- priority = 0;
- else if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
- ns = mlx5_get_flow_namespace(dev->mdev,
- ft_type == MLX5_IB_FT_TX ?
- MLX5_FLOW_NAMESPACE_EGRESS :
- MLX5_FLOW_NAMESPACE_BYPASS);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@@ -3092,12 +3334,12 @@
if (!ns)
return ERR_PTR(-ENOTSUPP);
- if (num_entries > max_table_size)
- return ERR_PTR(-ENOMEM);
+ max_table_size = min_t(int, num_entries, max_table_size);
ft = prio->flow_table;
if (!ft)
- return _get_prio(ns, prio, priority, num_entries, num_groups);
+ return _get_prio(ns, prio, priority, max_table_size, num_groups,
+ flags);
return prio;
}
@@ -3241,6 +3483,37 @@
return ret;
}
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
@@ -3250,7 +3523,7 @@
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+ struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
struct mlx5_flow_destination dest_arr[2] = {};
struct mlx5_flow_destination *rule_dst = dest_arr;
@@ -3264,6 +3537,9 @@
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
@@ -3278,8 +3554,7 @@
}
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
+ err = parse_flow_attr(dev->mdev, spec,
ib_flow, flow_attr, &flow_act,
prev_type);
if (err < 0)
@@ -3292,16 +3567,16 @@
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
- if (dev->rep) {
- void *misc;
+ if (dev->is_rep) {
+ struct mlx5_eswitch_rep *rep;
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
@@ -3313,15 +3588,18 @@
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
err = flow_counters_set_data(flow_act.counters, ucmd);
if (err)
goto free;
+ mcounters = to_mcounters(flow_act.counters);
handler->ibcounters = flow_act.counters;
dest_arr[dest_num].type =
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter =
- to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+ dest_arr[dest_num].counter_id =
+ mlx5_fc_id(mcounters->hw_cntrs_hndl);
dest_num++;
}
@@ -3339,11 +3617,11 @@
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if (flow_act.has_flow_tag &&
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
+ spec->flow_context.flow_tag, flow_attr->type);
err = -EINVAL;
goto free;
}
@@ -3651,34 +3929,78 @@
return ERR_PTR(err);
}
-static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
- int priority, bool mcast)
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
{
- int max_table_size;
struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (max_table_size < MLX5_FS_MAX_ENTRIES)
- return ERR_PTR(-ENOMEM);
+ struct mlx5_ib_flow_prio *prio = NULL;
+ int max_table_size = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
- priority = ib_prio_to_core_prio(priority, false);
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
- ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = FDB_BYPASS_PATH;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
if (!ns)
return ERR_PTR(-ENOTSUPP);
- prio = &dev->flow_db->prios[priority];
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ prio = &dev->flow_db->egress_prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ prio = &dev->flow_db->fdb;
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ prio = &dev->flow_db->rdma_rx[priority];
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
if (prio->flow_table)
return prio;
- return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
- MLX5_FS_MAX_TYPES);
+ return _get_prio(ns, prio, priority, max_table_size,
+ MLX5_FS_MAX_TYPES, flags);
}
static struct mlx5_ib_flow_handler *
@@ -3686,10 +4008,12 @@
struct mlx5_ib_flow_prio *ft_prio,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
- void *cmd_in, int inlen)
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
{
struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
struct mlx5_flow_table *ft = ft_prio->flow_table;
int err = 0;
@@ -3707,10 +4031,10 @@
memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
fs_matcher->mask_len);
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act, dst, 1);
+ flow_act, dst, dst_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3772,13 +4096,16 @@
struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ u32 counter_id,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
- int priority = fs_matcher->priority;
struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
bool mcast;
int err;
@@ -3788,29 +4115,43 @@
if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, mcast);
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
}
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst->type = dest_type;
- dst->tir_num = dest_id;
+ dst[dst_num].type = dest_type;
+ dst[dst_num].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst->ft_num = dest_id;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
- inlen);
+ dst_num++;
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
+ cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -3988,6 +4329,9 @@
*/
mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ mlx5_ib_destroy_flow_action_raw(maction);
+ break;
default:
WARN_ON(true);
break;
@@ -4002,13 +4346,17 @@
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ u16 uid;
+
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
- err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
+ err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4020,8 +4368,11 @@
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
+ u16 uid;
- err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
+ err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -4042,61 +4393,68 @@
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t fw_pages_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
}
+static DEVICE_ATTR_RO(fw_pages);
-static ssize_t show_reg_pages(struct device *device,
+static ssize_t reg_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
+static DEVICE_ATTR_RO(reg_pages);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "%x\n", dev->mdev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+static struct attribute *mlx5_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_fw_pages.attr,
+ &dev_attr_reg_pages.attr,
+ NULL,
+};
-static struct device_attribute *mlx5_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_fw_pages,
- &dev_attr_reg_pages,
+static const struct attribute_group mlx5_attr_group = {
+ .attrs = mlx5_class_attributes,
};
static void pkey_change_handler(struct work_struct *work)
@@ -4164,7 +4522,7 @@
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
@@ -4189,6 +4547,67 @@
mutex_unlock(&delay_drop->lock);
}
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET)
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ break;
+ default: /* do nothing */
+ return;
+ }
+}
+
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+ ibev->element.port_num = port;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ /* In RoCE, port up/down events are handled in
+ * mlx5_netdev_event().
+ */
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ ibev->event = IB_EVENT_LID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ ibev->event = IB_EVENT_PKEY_CHANGE;
+ schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ ibev->event = IB_EVENT_GID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ ibev->event = IB_EVENT_CLIENT_REREGISTER;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void mlx5_ib_handle_event(struct work_struct *_work)
{
struct mlx5_ib_event_work *work =
@@ -4196,65 +4615,37 @@
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
- u8 port = (u8)work->param;
- if (mlx5_core_is_mp_slave(work->dev)) {
- ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+ if (work->is_slave) {
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
if (!ibdev)
goto out;
} else {
- ibdev = work->context;
+ ibdev = work->dev;
}
switch (work->event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
+ ibev.element.port_num = (u8)(unsigned long)work->param;
fatal = true;
break;
-
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
- case MLX5_DEV_EVENT_PORT_INITIALIZED:
- /* In RoCE, port up/down events are handled in
- * mlx5_netdev_event().
- */
- if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET)
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ if (handle_port_change(ibdev, work->param, &ibev))
goto out;
-
- ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
-
- case MLX5_DEV_EVENT_LID_CHANGE:
- ibev.event = IB_EVENT_LID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_PKEY_CHANGE:
- ibev.event = IB_EVENT_PKEY_CHANGE;
- schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
- break;
-
- case MLX5_DEV_EVENT_GUID_CHANGE:
- ibev.event = IB_EVENT_GID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_CLIENT_REREG:
- ibev.event = IB_EVENT_CLIENT_REREGISTER;
- break;
- case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
- schedule_work(&ibdev->delay_drop.delay_drop_work);
- goto out;
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ handle_general_event(ibdev, work->param, &ibev);
+ /* fall through */
default:
goto out;
}
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
+ ibev.device = &ibdev->ib_dev;
- if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
- mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
goto out;
}
@@ -4267,22 +4658,43 @@
kfree(work);
}
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param)
+static int mlx5_ib_event(struct notifier_block *nb,
+ unsigned long event, void *param)
{
struct mlx5_ib_event_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5_ib_handle_event);
- work->dev = dev;
+ work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+ work->is_slave = false;
work->param = param;
- work->context = context;
work->event = event;
queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
+}
+
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+ unsigned long event, void *param)
+{
+ struct mlx5_ib_event_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+ work->is_slave = true;
+ work->param = param;
+ work->event = event;
+ queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -4291,7 +4703,7 @@
int err;
int port;
- for (port = 1; port <= dev->num_ports; port++) {
+ for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) {
dev->mdev->port_caps[port - 1].has_smi = false;
if (MLX5_CAP_GEN(dev->mdev, port_type) ==
MLX5_CAP_PORT_TYPE_IB) {
@@ -4322,14 +4734,14 @@
mlx5_query_ext_port_caps(dev, port);
}
-static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
+static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+ pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
@@ -4337,17 +4749,12 @@
if (!dprops)
goto out;
- err = set_has_smi_cap(dev);
- if (err)
- goto out;
-
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
}
- memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -4369,6 +4776,16 @@
return err;
}
+static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
+{
+ /* For representors use port 1, is this is the only native
+ * port
+ */
+ if (dev->is_rep)
+ return __get_port_caps(dev, 1);
+ return __get_port_caps(dev, port);
+}
+
static void destroy_umrc_res(struct mlx5_ib_dev *dev)
{
int err;
@@ -4378,7 +4795,7 @@
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp);
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
if (dev->umrc.cq)
ib_free_cq(dev->umrc.cq);
if (dev->umrc.pd)
@@ -4483,7 +4900,7 @@
return 0;
error_4:
- mlx5_ib_destroy_qp(qp);
+ mlx5_ib_destroy_qp(qp, NULL);
dev->umrc.qp = NULL;
error_3:
@@ -4516,36 +4933,42 @@
{
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
+ struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
+ ibdev = &dev->ib_dev;
mutex_init(&devr->mutex);
- devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
- if (IS_ERR(devr->p0)) {
- ret = PTR_ERR(devr->p0);
- goto error0;
- }
- devr->p0->device = &dev->ib_dev;
+ devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
+ if (!devr->p0)
+ return -ENOMEM;
+
+ devr->p0->device = ibdev;
devr->p0->uobject = NULL;
atomic_set(&devr->p0->usecnt, 0);
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
+ ret = mlx5_ib_alloc_pd(devr->p0, NULL);
+ if (ret)
+ goto error0;
+
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
+ if (!devr->c0) {
+ ret = -ENOMEM;
goto error1;
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
+
+ devr->c0->device = &dev->ib_dev;
atomic_set(&devr->c0->usecnt, 0);
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
+ if (ret)
+ goto err_create_cq;
+
+ devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
goto error2;
@@ -4556,7 +4979,7 @@
mutex_init(&devr->x0->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
if (IS_ERR(devr->x1)) {
ret = PTR_ERR(devr->x1);
goto error3;
@@ -4574,19 +4997,21 @@
attr.ext.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
- devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s0)) {
- ret = PTR_ERR(devr->s0);
+ devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
+ if (!devr->s0) {
+ ret = -ENOMEM;
goto error4;
}
+
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
- devr->s0->uobject = NULL;
- devr->s0->event_handler = NULL;
- devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.cq = devr->c0;
+ ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
+ if (ret)
+ goto err_create;
+
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
@@ -4596,18 +5021,21 @@
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s1)) {
- ret = PTR_ERR(devr->s1);
+ devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
+ if (!devr->s1) {
+ ret = -ENOMEM;
goto error5;
}
+
devr->s1->device = &dev->ib_dev;
devr->s1->pd = devr->p0;
- devr->s1->uobject = NULL;
- devr->s1->event_handler = NULL;
- devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
devr->s1->ext.cq = devr->c0;
+
+ ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
+ if (ret)
+ goto error6;
+
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s1->usecnt, 0);
@@ -4619,35 +5047,44 @@
return 0;
+error6:
+ kfree(devr->s1);
error5:
- mlx5_ib_destroy_srq(devr->s0);
+ mlx5_ib_destroy_srq(devr->s0, NULL);
+err_create:
+ kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1);
+ mlx5_ib_dealloc_xrcd(devr->x1, NULL);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0);
+ mlx5_ib_dealloc_xrcd(devr->x0, NULL);
error2:
- mlx5_ib_destroy_cq(devr->c0);
+ mlx5_ib_destroy_cq(devr->c0, NULL);
+err_create_cq:
+ kfree(devr->c0);
error1:
- mlx5_ib_dealloc_pd(devr->p0);
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
error0:
+ kfree(devr->p0);
return ret;
}
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
- struct mlx5_ib_dev *dev =
- container_of(devr, struct mlx5_ib_dev, devr);
int port;
- mlx5_ib_destroy_srq(devr->s1);
- mlx5_ib_destroy_srq(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0);
- mlx5_ib_dealloc_xrcd(devr->x1);
- mlx5_ib_destroy_cq(devr->c0);
- mlx5_ib_dealloc_pd(devr->p0);
+ mlx5_ib_destroy_srq(devr->s1, NULL);
+ kfree(devr->s1);
+ mlx5_ib_destroy_srq(devr->s0, NULL);
+ kfree(devr->s0);
+ mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_ib_destroy_cq(devr->c0, NULL);
+ kfree(devr->c0);
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
+ kfree(devr->p0);
/* Make sure no change P_Key work items are still executing */
- for (port = 0; port < dev->num_ports; ++port)
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
@@ -4750,7 +5187,7 @@
struct mlx5_flow_table *ft;
int err;
- if (!ns || !mlx5_lag_is_active(mdev))
+ if (!ns || !mlx5_lag_is_roce(mdev))
return 0;
err = mlx5_cmd_create_vport_lag(mdev);
@@ -4764,6 +5201,7 @@
}
dev->flow_db->lag_demux_ft = ft;
+ dev->lag_active = true;
return 0;
err_destroy_vport_lag:
@@ -4775,7 +5213,9 @@
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db->lag_demux_ft) {
+ if (dev->lag_active) {
+ dev->lag_active = false;
+
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;
@@ -4787,10 +5227,10 @@
{
int err;
- dev->roce[port_num].nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->roce[port_num].nb);
+ dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
if (err) {
- dev->roce[port_num].nb.notifier_call = NULL;
+ dev->port[port_num].roce.nb.notifier_call = NULL;
return err;
}
@@ -4799,9 +5239,9 @@
static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
{
- if (dev->roce[port_num].nb.notifier_call) {
- unregister_netdevice_notifier(&dev->roce[port_num].nb);
- dev->roce[port_num].nb.notifier_call = NULL;
+ if (dev->port[port_num].roce.nb.notifier_call) {
+ unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
+ dev->port[port_num].roce.nb.notifier_call = NULL;
}
}
@@ -4893,11 +5333,21 @@
INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int i;
- for (i = 0; i < dev->num_ports; i++) {
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
if (dev->port[i].cnts.set_id_valid)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].cnts.set_id);
@@ -4999,10 +5449,15 @@
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
+ int num_cnt_ports;
int err = 0;
int i;
+ bool is_shared;
- for (i = 0; i < dev->num_ports; i++) {
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
if (err)
goto err_alloc;
@@ -5010,8 +5465,10 @@
mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
dev->port[i].cnts.offsets);
- err = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].cnts.set_id);
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &dev->port[i].cnts.set_id,
+ is_shared ?
+ MLX5_SHARED_RESOURCE_UID : 0);
if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
@@ -5020,7 +5477,6 @@
}
dev->port[i].cnts.set_id_valid = true;
}
-
return 0;
err_alloc:
@@ -5028,26 +5484,52 @@
return err;
}
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
- /* We support only per port stats */
- if (port_num == 0)
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
return NULL;
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters,
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
{
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
@@ -5058,14 +5540,12 @@
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_q_counter(mdev,
- port->cnts.set_id, 0,
- out, outlen);
+ ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen);
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_q_counters; i++) {
- val = *(__be32 *)(out + port->cnts.offsets[i]);
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)(out + cnts->offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
@@ -5075,10 +5555,10 @@
}
static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
{
- int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int ret, i;
void *out;
@@ -5091,12 +5571,10 @@
if (ret)
goto free;
- for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
stats->value[i + offset] =
be64_to_cpup((__be64 *)(out +
- port->cnts.offsets[i + offset]));
- }
-
+ cnts->offsets[i + offset]));
free:
kvfree(out);
return ret;
@@ -5107,7 +5585,7 @@
u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
u8 mdev_port_num;
@@ -5115,17 +5593,17 @@
if (!stats)
return -EINVAL;
- num_counters = port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters;
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
if (ret)
return ret;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
if (ret)
return ret;
}
@@ -5142,10 +5620,10 @@
}
ret = mlx5_lag_query_cong_counters(dev->mdev,
stats->value +
- port->cnts.num_q_counters,
- port->cnts.num_cong_counters,
- port->cnts.offsets +
- port->cnts.num_q_counters);
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
mlx5_ib_put_native_port_mdev(dev, port_num);
if (ret)
@@ -5156,22 +5634,78 @@
return num_counters;
}
-static struct net_device*
-mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
- u8 port_num,
- enum rdma_netdev_t type,
- const char *name,
- unsigned char name_assign_type,
- void (*setup)(struct net_device *))
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
- struct net_device *netdev;
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+ /* Q counters are in the beginning of all counters */
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ u16 cnt_set_id = 0;
+ int err;
+
+ if (!counter->id) {
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &cnt_set_id,
+ MLX5_SHARED_RESOURCE_UID);
+ if (err)
+ return err;
+ counter->id = cnt_set_id;
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+
+ return mlx5_core_dealloc_q_counter(dev->mdev, counter->id);
+}
+
+static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params)
+{
if (type != RDMA_NETDEV_IPOIB)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
- netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
- name, setup);
- return netdev;
+ return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -5296,15 +5830,6 @@
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
-static const struct cpumask *
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
-}
-
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -5314,6 +5839,8 @@
int err;
int i;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
@@ -5321,9 +5848,13 @@
spin_unlock(&port->mp.mpi_lock);
return;
}
+
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
mlx5_remove_netdev_notifier(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
@@ -5356,16 +5887,17 @@
mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
port_num + 1);
- ibdev->roce[port_num].last_port_state = IB_PORT_DOWN;
+ ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
int err;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
spin_lock(&ibdev->port[port_num].mp.mpi_lock);
if (ibdev->port[port_num].mp.mpi) {
mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
@@ -5376,6 +5908,7 @@
ibdev->port[port_num].mp.mpi = mpi;
mpi->ibdev = ibdev;
+ mpi->mdev_events.notifier_call = NULL;
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5393,9 +5926,10 @@
goto unbind;
}
- err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
- if (err)
- goto unbind;
+ mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+ mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
+ mlx5_ib_init_cong_debugfs(ibdev, port_num);
return true;
@@ -5455,7 +5989,8 @@
}
if (bound) {
- dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n");
+ dev_dbg(mpi->mdev->device,
+ "removing port from unaffiliated list.\n");
mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
list_del(&mpi->list);
break;
@@ -5513,7 +6048,10 @@
UA_MANDATORY),
UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
UVERBS_ATTR_TYPE(u16),
- UA_MANDATORY));
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ enum mlx5_ib_uapi_dm_type,
+ UA_OPTIONAL));
ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_flow_action,
@@ -5522,30 +6060,17 @@
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
-static int populate_specs_root(struct mlx5_ib_dev *dev)
-{
- const struct uverbs_object_tree_def **trees = dev->driver_trees;
- size_t num_trees = 0;
+static const struct uapi_definition mlx5_ib_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+#endif
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- trees[num_trees++] = &mlx5_ib_flow_action;
-
- if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
- trees[num_trees++] = &mlx5_ib_dm;
-
- if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
- trees[num_trees++] = mlx5_ib_get_devx_tree();
-
- num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
-
- WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
- trees[num_trees] = NULL;
- dev->ib_dev.driver_specs = trees;
-
- return 0;
-}
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_action),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ {}
+};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
@@ -5617,35 +6142,40 @@
return &mcounters->ibcntrs;
}
-void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- cleanup_srcu_struct(&dev->mr_srcu);
-#endif
- kfree(dev->port);
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ srcu_barrier(&dev->mr_srcu);
+ cleanup_srcu_struct(&dev->mr_srcu);
+ }
+
+ WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
}
-int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- const char *name;
int err;
int i;
- dev->port = kcalloc(dev->num_ports, sizeof(*dev->port),
- GFP_KERNEL);
- if (!dev->port)
- return -ENOMEM;
-
for (i = 0; i < dev->num_ports; i++) {
spin_lock_init(&dev->port[i].mp.mpi_lock);
- rwlock_init(&dev->roce[i].netdev_lock);
+ rwlock_init(&dev->port[i].roce.netdev_lock);
+ dev->port[i].roce.dev = dev;
+ dev->port[i].roce.native_port_num = i + 1;
+ dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}
+ mlx5_ib_internal_fill_odp_caps(dev);
+
err = mlx5_ib_init_multiport_master(dev);
if (err)
- goto err_free_port;
+ return err;
+
+ err = set_has_smi_cap(dev);
+ if (err)
+ return err;
if (!mlx5_core_mp_enabled(mdev)) {
for (i = 1; i <= dev->num_ports; i++) {
@@ -5662,40 +6192,30 @@
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- if (!mlx5_lag_is_active(mdev))
- name = "mlx5_%d";
- else
- name = "mlx5_bond_%d";
-
- strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
- dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors =
- dev->mdev->priv.eq_table.num_comp_vectors;
- dev->ib_dev.dev.parent = &mdev->pdev->dev;
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
+ dev->ib_dev.dev.parent = mdev->device;
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
- spin_lock_init(&dev->memic.memic_lock);
- dev->memic.dev = mdev;
+ spin_lock_init(&dev->dm.lock);
+ dev->dm.dev = mdev;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_free_port;
-#endif
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = init_srcu_struct(&dev->mr_srcu);
+ if (err)
+ goto err_mp;
+ }
return 0;
+
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
-err_free_port:
- kfree(dev->port);
-
return -ENOMEM;
}
@@ -5711,31 +6231,116 @@
return 0;
}
-int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_ib_dev *nic_dev;
-
- nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
-
- if (!nic_dev)
- return -EINVAL;
-
- dev->flow_db = nic_dev->flow_db;
-
- return 0;
-}
-
static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
{
kfree(dev->flow_db);
}
-int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
+ .add_gid = mlx5_ib_add_gid,
+ .alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
+ .alloc_pd = mlx5_ib_alloc_pd,
+ .alloc_ucontext = mlx5_ib_alloc_ucontext,
+ .attach_mcast = mlx5_ib_mcg_attach,
+ .check_mr_status = mlx5_ib_check_mr_status,
+ .create_ah = mlx5_ib_create_ah,
+ .create_counters = mlx5_ib_create_counters,
+ .create_cq = mlx5_ib_create_cq,
+ .create_flow = mlx5_ib_create_flow,
+ .create_qp = mlx5_ib_create_qp,
+ .create_srq = mlx5_ib_create_srq,
+ .dealloc_pd = mlx5_ib_dealloc_pd,
+ .dealloc_ucontext = mlx5_ib_dealloc_ucontext,
+ .del_gid = mlx5_ib_del_gid,
+ .dereg_mr = mlx5_ib_dereg_mr,
+ .destroy_ah = mlx5_ib_destroy_ah,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .destroy_cq = mlx5_ib_destroy_cq,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+ .destroy_qp = mlx5_ib_destroy_qp,
+ .destroy_srq = mlx5_ib_destroy_srq,
+ .detach_mcast = mlx5_ib_mcg_detach,
+ .disassociate_ucontext = mlx5_ib_disassociate_ucontext,
+ .drain_rq = mlx5_ib_drain_rq,
+ .drain_sq = mlx5_ib_drain_sq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mlx5_ib_get_dma_mr,
+ .get_link_layer = mlx5_ib_port_link_layer,
+ .map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
+ .mmap = mlx5_ib_mmap,
+ .modify_cq = mlx5_ib_modify_cq,
+ .modify_device = mlx5_ib_modify_device,
+ .modify_port = mlx5_ib_modify_port,
+ .modify_qp = mlx5_ib_modify_qp,
+ .modify_srq = mlx5_ib_modify_srq,
+ .poll_cq = mlx5_ib_poll_cq,
+ .post_recv = mlx5_ib_post_recv,
+ .post_send = mlx5_ib_post_send,
+ .post_srq_recv = mlx5_ib_post_srq_recv,
+ .process_mad = mlx5_ib_process_mad,
+ .query_ah = mlx5_ib_query_ah,
+ .query_device = mlx5_ib_query_device,
+ .query_gid = mlx5_ib_query_gid,
+ .query_pkey = mlx5_ib_query_pkey,
+ .query_qp = mlx5_ib_query_qp,
+ .query_srq = mlx5_ib_query_srq,
+ .read_counters = mlx5_ib_read_counters,
+ .reg_user_mr = mlx5_ib_reg_user_mr,
+ .req_notify_cq = mlx5_ib_arm_cq,
+ .rereg_user_mr = mlx5_ib_rereg_user_mr,
+ .resize_cq = mlx5_ib_resize_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
+ .rdma_netdev_get_params = mlx5_ib_rn_get_params,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
+ .get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_stats = mlx5_ib_get_vf_stats,
+ .set_vf_guid = mlx5_ib_set_vf_guid,
+ .set_vf_link_state = mlx5_ib_set_vf_link_state,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
+ .alloc_mw = mlx5_ib_alloc_mw,
+ .dealloc_mw = mlx5_ib_dealloc_mw,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx5_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
+
+static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -5768,103 +6373,46 @@
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
- dev->ib_dev.query_gid = mlx5_ib_query_gid;
- dev->ib_dev.add_gid = mlx5_ib_add_gid;
- dev->ib_dev.del_gid = mlx5_ib_del_gid;
- dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
- dev->ib_dev.modify_device = mlx5_ib_modify_device;
- dev->ib_dev.modify_port = mlx5_ib_modify_port;
- dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
- dev->ib_dev.mmap = mlx5_ib_mmap;
- dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
- dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
- dev->ib_dev.create_ah = mlx5_ib_create_ah;
- dev->ib_dev.query_ah = mlx5_ib_query_ah;
- dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
- dev->ib_dev.create_srq = mlx5_ib_create_srq;
- dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
- dev->ib_dev.query_srq = mlx5_ib_query_srq;
- dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
- dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
- dev->ib_dev.create_qp = mlx5_ib_create_qp;
- dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
- dev->ib_dev.query_qp = mlx5_ib_query_qp;
- dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
- dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
- dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
- dev->ib_dev.post_send = mlx5_ib_post_send;
- dev->ib_dev.post_recv = mlx5_ib_post_recv;
- dev->ib_dev.create_cq = mlx5_ib_create_cq;
- dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
- dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
- dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
- dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
- dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
- dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
- dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
- dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
- dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
- dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
- dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
- dev->ib_dev.process_mad = mlx5_ib_process_mad;
- dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
- dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
- if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
- dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
+ ib_set_device_ops(&dev->ib_dev,
+ &mlx5_ib_dev_ipoib_enhanced_ops);
- if (mlx5_core_is_pf(mdev)) {
- dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
- dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
- dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
- dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
- }
-
- dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
+ if (mlx5_core_is_pf(mdev))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops);
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
- dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
}
if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
- dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
}
- if (MLX5_CAP_DEV_MEM(mdev, memic)) {
- dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
- dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
- dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
- }
+ if (MLX5_CAP_DEV_MEM(mdev, memic) ||
+ MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
- dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
- dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
- dev->ib_dev.create_counters = mlx5_ib_create_counters;
- dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
- dev->ib_dev.read_counters = mlx5_ib_read_counters;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ dev->ib_dev.driver_def = mlx5_ib_defs;
err = init_node_data(dev);
if (err)
@@ -5873,44 +6421,47 @@
if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
- mutex_init(&dev->lb_mutex);
+ mutex_init(&dev->lb.mutex);
+
+ dev->ib_dev.use_cq_dim = true;
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_port_ops = {
+ .get_port_immutable = mlx5_port_immutable,
+ .query_port = mlx5_ib_query_port,
+};
+
static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
- dev->ib_dev.query_port = mlx5_ib_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops);
return 0;
}
-int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
+ .get_port_immutable = mlx5_port_rep_immutable,
+ .query_port = mlx5_ib_rep_query_port,
+};
+
+static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
- dev->ib_dev.query_port = mlx5_ib_rep_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
+ .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table,
+ .create_wq = mlx5_ib_create_wq,
+ .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx5_ib_destroy_wq,
+ .get_netdev = mlx5_ib_get_netdev,
+ .modify_wq = mlx5_ib_modify_wq,
+};
+
static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
{
u8 port_num;
- int i;
-
- for (i = 0; i < dev->num_ports; i++) {
- dev->roce[i].dev = dev;
- dev->roce[i].native_port_num = i + 1;
- dev->roce[i].last_port_state = IB_PORT_DOWN;
- }
-
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
@@ -5918,9 +6469,11 @@
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ /* Register only for native ports */
return mlx5_add_netdev_notifier(dev, port_num);
}
@@ -5931,7 +6484,7 @@
mlx5_remove_netdev_notifier(dev, port_num);
}
-int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
@@ -5947,7 +6500,7 @@
return err;
}
-void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_stage_common_roce_cleanup(dev);
}
@@ -5994,28 +6547,40 @@
}
}
-int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
-void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
{
- mlx5_ib_internal_fill_odp_caps(dev);
-
return mlx5_ib_odp_init_one(dev);
}
-int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_odp_cleanup_one(dev);
+}
+
+static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+};
+
+static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
return mlx5_ib_alloc_counters(dev);
}
@@ -6023,7 +6588,7 @@
return 0;
}
-void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -6031,8 +6596,9 @@
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
{
- return mlx5_ib_init_cong_debugfs(dev,
- mlx5_core_native_port_num(dev->mdev) - 1);
+ mlx5_ib_init_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+ return 0;
}
static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -6052,7 +6618,7 @@
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
-int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@@ -6067,33 +6633,35 @@
return err;
}
-void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
- return populate_specs_root(dev);
+ const char *name;
+
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
+ if (!mlx5_lag_is_roce(dev->mdev))
+ name = "mlx5_%d";
+ else
+ name = "mlx5_bond_%d";
+ return ib_register_device(&dev->ib_dev, name);
}
-int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
-{
- return ib_register_device(&dev->ib_dev, NULL);
-}
-
-void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
}
-void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
ib_unregister_device(&dev->ib_dev);
}
-int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
return create_umr_res(dev);
}
@@ -6110,31 +6678,36 @@
cancel_delay_drop(dev);
}
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
- int err;
- int i;
+ dev->mdev_events.notifier_call = mlx5_ib_event;
+ mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+ return 0;
+}
- for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
- err = device_create_file(&dev->ib_dev.dev,
- mlx5_class_attributes[i]);
- if (err)
- return err;
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+}
+
+static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+{
+ int uid;
+
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ mlx5_ib_devx_init_event_table(dev);
}
return 0;
}
-
-static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
- mlx5_ib_register_vport_reps(dev);
-
- return 0;
-}
-
-static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_unregister_vport_reps(dev);
+ if (dev->devx_whitelist_uid) {
+ mlx5_ib_devx_cleanup_event_table(dev);
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ }
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
@@ -6148,7 +6721,8 @@
profile->stage[stage].cleanup(dev);
}
- ib_dealloc_device((struct ib_device *)dev);
+ kfree(dev->port);
+ ib_dealloc_device(&dev->ib_dev);
}
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
@@ -6157,8 +6731,6 @@
int err;
int i;
- printk_once(KERN_INFO "%s", mlx5_version);
-
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@@ -6194,12 +6766,18 @@
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_stage_odp_init,
- NULL),
+ mlx5_ib_stage_odp_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6215,9 +6793,9 @@
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_stage_devx_init,
+ mlx5_ib_stage_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -6227,12 +6805,9 @@
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
};
-static const struct mlx5_ib_profile nic_rep_profile = {
+const struct mlx5_ib_profile uplink_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
@@ -6248,9 +6823,15 @@
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6263,21 +6844,15 @@
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_stage_devx_init,
+ mlx5_ib_stage_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
- mlx5_ib_stage_class_attr_init,
- NULL),
- STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
- mlx5_ib_stage_rep_reg_init,
- mlx5_ib_stage_rep_reg_cleanup),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
@@ -6313,7 +6888,8 @@
if (!bound) {
list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
- dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
+ dev_dbg(mdev->device,
+ "no suitable IB device found to bind to, added to unaffiliated list.\n");
}
mutex_unlock(&mlx5_ib_multiport_mutex);
@@ -6325,29 +6901,37 @@
enum rdma_link_layer ll;
struct mlx5_ib_dev *dev;
int port_type_cap;
+ int num_ports;
printk_once(KERN_INFO "%s", mlx5_version);
+ if (MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
+ if (!mlx5_core_mp_enabled(mdev))
+ mlx5_ib_register_vport_reps(mdev);
+ return mdev;
+ }
+
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
return mlx5_ib_add_slave_port(mdev);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+ dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!dev)
return NULL;
+ dev->port = kcalloc(num_ports, sizeof(*dev->port),
+ GFP_KERNEL);
+ if (!dev->port) {
+ ib_dealloc_device(&dev->ib_dev);
+ return NULL;
+ }
dev->mdev = mdev;
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
- MLX5_CAP_GEN(mdev, num_vhca_ports));
-
- if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
- dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
-
- return __mlx5_ib_add(dev, &nic_rep_profile);
- }
+ dev->num_ports = num_ports;
return __mlx5_ib_add(dev, &pf_profile);
}
@@ -6357,6 +6941,11 @@
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
+ if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
+ mlx5_ib_unregister_vport_reps(mdev);
+ return;
+ }
+
if (mlx5_core_is_mp_slave(mdev)) {
mpi = context;
mutex_lock(&mlx5_ib_multiport_mutex);
@@ -6364,6 +6953,7 @@
mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
list_del(&mpi->list);
mutex_unlock(&mlx5_ib_multiport_mutex);
+ kfree(mpi);
return;
}
@@ -6374,10 +6964,6 @@
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
- .event = mlx5_ib_event,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- .pfault = mlx5_ib_pfault,
-#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};