IB/core: Add "type 2" memory windows support

This patch enhances the IB core support for Memory Windows (MWs).

MWs allow an application to have better/flexible control over remote
access to memory.

Two types of MWs are supported, with the second type having two flavors:

    Type 1  - associated with PD only
    Type 2A - associated with QPN only
    Type 2B - associated with PD and QPN

Applications can allocate a MW once, and then repeatedly bind the MW
to different ranges in MRs that are associated to the same PD. Type 1
windows are bound through a verb, while type 2 windows are bound by
posting a work request.

The 32-bit memory key is composed of a 24-bit index and an 8-bit
key. The key is changed with each bind, thus allowing more control
over the peer's use of the memory key.

The changes introduced are the following:

* add memory window type enum and a corresponding parameter to ib_alloc_mw.
* type 2 memory window bind work request support.
* create a struct that contains the common part of the bind verb struct
  ibv_mw_bind and the bind work request into a single struct.
* add the ib_inc_rkey helper function to advance the tag part of an rkey.

Consumer interface details:

* new device capability flags IB_DEVICE_MEM_WINDOW_TYPE_2A and
  IB_DEVICE_MEM_WINDOW_TYPE_2B are added to indicate device support
  for these features.

  Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or
  IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B
  memory windows. It can set neither to indicate it doesn't support
  type 2 windows at all.

* modify existing provides and consumers code to the new param of
  ib_alloc_mw and the ib_mw_bind_info structure

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 30f199e..a8fdd33 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1099,18 +1099,19 @@
 
 /* Memory windows */
 
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct ib_mw *mw;
 
 	if (!pd->device->alloc_mw)
 		return ERR_PTR(-ENOSYS);
 
-	mw = pd->device->alloc_mw(pd);
+	mw = pd->device->alloc_mw(pd, type);
 	if (!IS_ERR(mw)) {
 		mw->device  = pd->device;
 		mw->pd      = pd;
 		mw->uobject = NULL;
+		mw->type    = type;
 		atomic_inc(&pd->usecnt);
 	}
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 0bdf09a..074d5c2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -738,7 +738,7 @@
 	return ibmr;
 }
 
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct iwch_dev *rhp;
 	struct iwch_pd *php;
@@ -747,6 +747,9 @@
 	u32 stag = 0;
 	int ret;
 
+	if (type != IB_MW_TYPE_1)
+		return ERR_PTR(-EINVAL);
+
 	php = to_iwch_pd(pd);
 	rhp = php->rhp;
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 6de8463..e5649e8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -567,18 +567,19 @@
 	if (mw_bind->send_flags & IB_SEND_SIGNALED)
 		t3_wr_flags = T3_COMPLETION_FLAG;
 
-	sgl.addr = mw_bind->addr;
-	sgl.lkey = mw_bind->mr->lkey;
-	sgl.length = mw_bind->length;
+	sgl.addr = mw_bind->bind_info.addr;
+	sgl.lkey = mw_bind->bind_info.mr->lkey;
+	sgl.length = mw_bind->bind_info.length;
 	wqe->bind.reserved = 0;
 	wqe->bind.type = TPT_VATO;
 
 	/* TBD: check perms */
-	wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags);
-	wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+	wqe->bind.perms = iwch_ib_to_tpt_bind_access(
+		mw_bind->bind_info.mw_access_flags);
+	wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
 	wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
-	wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
-	wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+	wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
+	wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
 	err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
 	if (err) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 9c1644f..c8bd239 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -866,7 +866,7 @@
 					int page_list_len);
 struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
 int c4iw_dealloc_mw(struct ib_mw *mw);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
 					   u64 length, u64 virt, int acc,
 					   struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index afd8179..903a92d 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -650,7 +650,7 @@
 	return ERR_PTR(err);
 }
 
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
@@ -659,6 +659,9 @@
 	u32 stag = 0;
 	int ret;
 
+	if (type != IB_MW_TYPE_1)
+		return ERR_PTR(-EINVAL);
+
 	php = to_c4iw_pd(pd);
 	rhp = php->rhp;
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 8f7f282..22f79af 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -95,7 +95,7 @@
 
 int ehca_dereg_mr(struct ib_mr *mr);
 
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 
 int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
 		 struct ib_mw_bind *mw_bind);
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 8784486..bcfb0c1 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -688,7 +688,7 @@
 
 /*----------------------------------------------------------------------*/
 
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
 {
 	struct ib_mw *ib_mw;
 	u64 h_ret;
@@ -698,6 +698,9 @@
 		container_of(pd->device, struct ehca_shca, ib_device);
 	struct ehca_mw_hipzout_parms hipzout;
 
+	if (type != IB_MW_TYPE_1)
+		return ERR_PTR(-EINVAL);
+
 	e_mw = ehca_mw_new();
 	if (!e_mw) {
 		ib_mw = ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 07e4fba..8f67fe2 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -55,7 +55,8 @@
 /**
  * nes_alloc_mw
  */
-static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
+static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type)
+{
 	struct nes_pd *nespd = to_nespd(ibpd);
 	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
 	struct nes_device *nesdev = nesvnic->nesdev;
@@ -71,6 +72,9 @@
 	u32 driver_key = 0;
 	u8 stag_key = 0;
 
+	if (type != IB_MW_TYPE_1)
+		return ERR_PTR(-EINVAL);
+
 	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
 	stag_key = (u8)next_stag_index;
 
@@ -244,20 +248,19 @@
 	if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
 		wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
 
-	if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
+	if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
 		wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
-	}
-	if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
+	if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
 		wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-	}
 
 	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
-	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
+	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
+			    ibmw_bind->bind_info.mr->lkey);
 	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
 	set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
-			ibmw_bind->length);
+			ibmw_bind->bind_info.length);
 	wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
-	u64temp = (u64)ibmw_bind->addr;
+	u64temp = (u64)ibmw_bind->bind_info.addr;
 	set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
 
 	head++;