bnx2x: Missing memory barriers

While working on IA64, it became clear that the following memory
barriers are missing

Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index cc6ffba..f0b2e73 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -1357,11 +1357,23 @@
 	rx_prods.cqe_prod = rx_comp_prod;
 	rx_prods.sge_prod = rx_sge_prod;
 
+	/*
+	 * Make sure that the BD and SGE data is updated before updating the
+	 * producers since FW might read the BD/SGE right after the producer
+	 * is updated.
+	 * This is only applicable for weak-ordered memory model archs such
+	 * as IA-64. The following barrier is also mandatory since FW will
+	 * assumes BDs must have buffers.
+	 */
+	wmb();
+
 	for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++)
 		REG_WR(bp, BAR_TSTRORM_INTMEM +
 		       TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
 		       ((u32 *)&rx_prods)[i]);
 
+	mmiowb(); /* keep prod updates ordered */
+
 	DP(NETIF_MSG_RX_STATUS,
 	   "Wrote: bd_prod %u  cqe_prod %u  sge_prod %u\n",
 	   bd_prod, rx_comp_prod, rx_sge_prod);
@@ -1582,7 +1594,6 @@
 	/* Update producers */
 	bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
 			     fp->rx_sge_prod);
-	mmiowb(); /* keep prod updates ordered */
 
 	fp->rx_pkt += rx_pkt;
 	fp->rx_calls++;
@@ -8729,6 +8740,8 @@
 	tx_bd->general_data = ((UNICAST_ADDRESS <<
 				ETH_TX_BD_ETH_ADDR_TYPE_SHIFT) | 1);
 
+	wmb();
+
 	fp->hw_tx_prods->bds_prod =
 		cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + 1);
 	mb(); /* FW restriction: must not reorder writing nbd and packets */
@@ -8780,7 +8793,6 @@
 	/* Update producers */
 	bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
 			     fp->rx_sge_prod);
-	mmiowb(); /* keep prod updates ordered */
 
 test_loopback_exit:
 	bp->link_params.loopback_mode = LOOPBACK_NONE;
@@ -9707,6 +9719,15 @@
 
 	DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d  bd %u\n", nbd, bd_prod);
 
+	/*
+	 * Make sure that the BD data is updated before updating the producer
+	 * since FW might read the BD right after the producer is updated.
+	 * This is only applicable for weak-ordered memory model archs such
+	 * as IA-64. The following barrier is also mandatory since FW will
+	 * assumes packets must have BDs.
+	 */
+	wmb();
+
 	fp->hw_tx_prods->bds_prod =
 		cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + nbd);
 	mb(); /* FW restriction: must not reorder writing nbd and packets */
@@ -9720,6 +9741,9 @@
 	dev->trans_start = jiffies;
 
 	if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) {
+		/* We want bnx2x_tx_int to "see" the updated tx_bd_prod
+		   if we put Tx into XOFF state. */
+		smp_mb();
 		netif_stop_queue(dev);
 		bp->eth_stats.driver_xoff++;
 		if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)