IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV
MCG paravirtualization support includes:
- Creating multicast groups by VFs, and keeping accounting of them
- Leaving multicast groups by VFs
- Updating SM only with real changes in the overall picture of MCGs status
- Creation of MGID=0 groups (let SM choose MGID)
Note that the MCG module maintains its own internal MCG object
reference counts. The reason for this is that the IB core is used to
track only the multicast groups joins generated by the PF it runs
over. The PF IB core layer is unaware of slaves, so it cannot be used
to keep track of MCG joins they generate.
Signed-off-by: Oren Duer <oren@mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index ba25806..29ed3b4 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -75,6 +75,14 @@
struct ib_mad mad;
} __packed;
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
+
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
+{
+ return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
+ cpu_to_be64(0xff00000000000000LL);
+}
+
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
@@ -209,8 +217,7 @@
pinfo->neighbormtu_mastersmsl & 0xf);
if (pinfo->clientrereg_resv_subnetto & 0x80)
- mlx4_ib_dispatch_event(dev, port_num,
- IB_EVENT_CLIENT_REREGISTER);
+ handle_client_rereg_event(dev, port_num);
if (prev_lid != lid)
mlx4_ib_dispatch_event(dev, port_num,
@@ -308,7 +315,17 @@
static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad)
{
- return 0;
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
}
int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
@@ -768,6 +785,16 @@
}
}
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ /* re-configure the mcg's */
+ if (mlx4_is_master(dev->dev)) {
+ if (!dev->sriov.is_going_down)
+ mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
+ }
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
+}
+
void handle_port_mgmt_change_event(struct work_struct *work)
{
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
@@ -797,8 +824,7 @@
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
- mlx4_ib_dispatch_event(dev, port,
- IB_EVENT_CLIENT_REREGISTER);
+ handle_client_rereg_event(dev, port);
break;
case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
@@ -868,7 +894,17 @@
static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
int slave, struct ib_sa_mad *sa_mad)
{
- return 0;
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
}
static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
@@ -1590,6 +1626,7 @@
int ret = 0;
if (!do_init) {
+ clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
/* for master, destroy real sqp resources */
if (slave == mlx4_master_func_num(dev->dev))
destroy_pv_resources(dev, slave, port,
@@ -1643,10 +1680,16 @@
ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
if (ret) {
ret = -ENOMEM;
- goto err_wq;
+ goto err_mcg;
}
}
+ ret = mlx4_ib_mcg_port_init(ctx);
+ if (ret) {
+ pr_err("Failed initializing mcg para-virt (%d)\n", ret);
+ goto err_mcg;
+ }
+
snprintf(name, sizeof name, "mlx4_ibt%d", port);
ctx->wq = create_singlethread_workqueue(name);
if (!ctx->wq) {
@@ -1670,6 +1713,8 @@
ctx->wq = NULL;
err_wq:
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+err_mcg:
for (i = 0; i < dev->dev->caps.sqp_demux; i++)
free_pv_object(dev, i, port);
kfree(ctx->tun);
@@ -1705,6 +1750,7 @@
int i;
if (ctx) {
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
if (!ctx->tun[i])
continue;