spi_qsd: Ensure the DMA callback is called before completion.

There may be some latency in the DMA IRQ firing in hardware.
Hence, in the case of write transfers, the QUP interrupt handler
waits for the DMA callback before issuing completion.

Change-Id: I4962c5c04c3a45dbf6e98165d2815adda657a9fb
Signed-off-by: Kiran Gunda <kgunda@codeaurora.org>
diff --git a/drivers/spi/spi_qsd.c b/drivers/spi/spi_qsd.c
index 2fc95af..152bbb4 100644
--- a/drivers/spi/spi_qsd.c
+++ b/drivers/spi/spi_qsd.c
@@ -392,6 +392,7 @@
 	u32 num_transfers;
 
 	atomic_set(&dd->rx_irq_called, 0);
+	atomic_set(&dd->tx_irq_called, 0);
 	if (dd->write_len && !dd->read_len) {
 		/* WR-WR transfer */
 		bytes_sent = dd->cur_msg_len - dd->tx_bytes_remaining;
@@ -712,6 +713,8 @@
 		    readl_relaxed(dd->base + SPI_OPERATIONAL) &
 		    SPI_OP_MAX_OUTPUT_DONE_FLAG) {
 			msm_spi_ack_transfer(dd);
+			if (atomic_inc_return(&dd->tx_irq_called) == 1)
+				return IRQ_HANDLED;
 			msm_spi_complete(dd);
 			return IRQ_HANDLED;
 		}
@@ -1586,9 +1589,12 @@
 	}
 	/* restore original context */
 	dd = container_of(cmd, struct msm_spi, tx_hdr);
-	if (result & DMOV_RSLT_DONE)
+	if (result & DMOV_RSLT_DONE) {
 		dd->stat_dmov_tx++;
-	else {
+		if ((atomic_inc_return(&dd->tx_irq_called) == 1))
+			return;
+		complete(&dd->transfer_complete);
+	} else {
 		/* Error or flush */
 		if (result & DMOV_RSLT_ERROR) {
 			dev_err(dd->dev, "DMA error (0x%08x)\n", result);
diff --git a/drivers/spi/spi_qsd.h b/drivers/spi/spi_qsd.h
index a434bbb..223fce6 100644
--- a/drivers/spi/spi_qsd.h
+++ b/drivers/spi/spi_qsd.h
@@ -285,6 +285,7 @@
 	int                      output_block_size;
 	int                      burst_size;
 	atomic_t                 rx_irq_called;
+	atomic_t                 tx_irq_called;
 	/* Used to pad messages unaligned to block size */
 	u8                       *tx_padding;
 	dma_addr_t               tx_padding_dma;