mlx4_en: Fix partial rings feature

In case of allocation failure, the actual ring size is rounded down to
nearest power of 2. The remaining descriptors are freed.
The CQ and SRQ are allocated with the actual size and the mask is updated.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index a276125..21786ad 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c
@@ -89,6 +89,9 @@
 	*cq->mcq.arm_db    = 0;
 	memset(cq->buf, 0, cq->buf_size);
 
+	if (!cq->is_tx)
+		cq->size = priv->rx_ring[cq->ring].actual_size;
+
 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
 			    cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
 	if (err)
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index be487fa..0cd185a 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -556,7 +556,6 @@
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_cq *cq;
 	struct mlx4_en_tx_ring *tx_ring;
-	struct mlx4_en_rx_ring *rx_ring;
 	int rx_index = 0;
 	int tx_index = 0;
 	int err = 0;
@@ -572,10 +571,15 @@
 	dev->mtu = min(dev->mtu, priv->max_mtu);
 	mlx4_en_calc_rx_buf(dev);
 	mlx4_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size);
+
 	/* Configure rx cq's and rings */
+	err = mlx4_en_activate_rx_rings(priv);
+	if (err) {
+		mlx4_err(mdev, "Failed to activate RX rings\n");
+		return err;
+	}
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		cq = &priv->rx_cq[i];
-		rx_ring = &priv->rx_ring[i];
 
 		err = mlx4_en_activate_cq(priv, cq);
 		if (err) {
@@ -591,20 +595,14 @@
 			goto cq_err;
 		}
 		mlx4_en_arm_cq(priv, cq);
-
+		priv->rx_ring[i].cqn = cq->mcq.cqn;
 		++rx_index;
 	}
 
-	err = mlx4_en_activate_rx_rings(priv);
-	if (err) {
-		mlx4_err(mdev, "Failed to activate RX rings\n");
-		goto cq_err;
-	}
-
 	err = mlx4_en_config_rss_steer(priv);
 	if (err) {
 		mlx4_err(mdev, "Failed configuring rss steering\n");
-		goto rx_err;
+		goto cq_err;
 	}
 
 	/* Configure tx cq's and rings */
@@ -691,12 +689,11 @@
 	}
 
 	mlx4_en_release_rss_steer(priv);
-rx_err:
-	for (i = 0; i < priv->rx_ring_num; i++)
-		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
+	for (i = 0; i < priv->rx_ring_num; i++)
+		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
 
 	return err; /* need to close devices */
 }
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index 9ee873e..6bfab6e 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -202,12 +202,35 @@
 	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
 }
 
+static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
+				 struct mlx4_en_rx_ring *ring,
+				 int index)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct skb_frag_struct *skb_frags;
+	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
+	dma_addr_t dma;
+	int nr;
+
+	skb_frags = ring->rx_info + (index << priv->log_rx_info);
+	for (nr = 0; nr < priv->num_frags; nr++) {
+		mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
+		dma = be64_to_cpu(rx_desc->data[nr].addr);
+
+		mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
+		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
+				 PCI_DMA_FROMDEVICE);
+		put_page(skb_frags[nr].page);
+	}
+}
+
 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring;
 	int ring_ind;
 	int buf_ind;
+	int new_size;
 
 	for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
 		for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
@@ -220,18 +243,30 @@
 						       "enough rx buffers\n");
 					return -ENOMEM;
 				} else {
-					if (netif_msg_rx_err(priv))
-						mlx4_warn(mdev,
-							  "Only %d buffers allocated\n",
-							  ring->actual_size);
-					goto out;
+					new_size = rounddown_pow_of_two(ring->actual_size);
+					mlx4_warn(mdev, "Only %d buffers allocated "
+							"reducing ring size to %d",
+						  ring->actual_size, new_size);
+					goto reduce_rings;
 				}
 			}
 			ring->actual_size++;
 			ring->prod++;
 		}
 	}
-out:
+	return 0;
+
+reduce_rings:
+	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+		ring = &priv->rx_ring[ring_ind];
+		while (ring->actual_size > new_size) {
+			ring->actual_size--;
+			ring->prod--;
+			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
+		}
+		ring->size_mask = ring->actual_size - 1;
+	}
+
 	return 0;
 }
 
@@ -255,7 +290,7 @@
 		++num;
 		++ring->prod;
 	}
-	if ((u32) (ring->prod - ring->cons) == ring->size)
+	if ((u32) (ring->prod - ring->cons) == ring->actual_size)
 		ring->full = 1;
 
 	return num;
@@ -264,33 +299,17 @@
 static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
 				struct mlx4_en_rx_ring *ring)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
-	struct skb_frag_struct *skb_frags;
-	struct mlx4_en_rx_desc *rx_desc;
-	dma_addr_t dma;
 	int index;
-	int nr;
 
 	mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
 			ring->cons, ring->prod);
 
 	/* Unmap and free Rx buffers */
-	BUG_ON((u32) (ring->prod - ring->cons) > ring->size);
+	BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
 	while (ring->cons != ring->prod) {
 		index = ring->cons & ring->size_mask;
-		rx_desc = ring->buf + (index << ring->log_stride);
-		skb_frags = ring->rx_info + (index << priv->log_rx_info);
 		mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index);
-
-		for (nr = 0; nr < priv->num_frags; nr++) {
-			mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
-			dma = be64_to_cpu(rx_desc->data[nr].addr);
-
-			mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
-			pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
-					 PCI_DMA_FROMDEVICE);
-			put_page(skb_frags[nr].page);
-		}
+		mlx4_en_free_rx_desc(priv, ring, index);
 		++ring->cons;
 	}
 }
@@ -454,7 +473,7 @@
 		mlx4_en_update_rx_prod_db(ring);
 
 		/* Configure SRQ representing the ring */
-		ring->srq.max    = ring->size;
+		ring->srq.max    = ring->actual_size;
 		ring->srq.max_gs = max_gs;
 		ring->srq.wqe_shift = ilog2(ring->stride);