mmc: msm_sdcc: Fix scatterlist processing in PIO mode

sg_miter_next() does not place byte alignment restrictions on
sgl lengths. However, SDCC FIFO must be accessed in multiples
of 4 bytes, requiring rounding which can cause data corruption.

For example, an sgl of length 481 is decribed below:
	struct scatterlist sgl;
	sgl.page_link = 0xXXXX_XXXX;
	sgl.length    = 481;
	sgl.offset    = 3821;

First call to sg_miter_next():
	Sgl length returned = (PAGE_SIZE - 3821) = 275 bytes
	Driver rounds this length to multiple of 4 = 276 bytes

Next call to sg_miter_next():
	Sgl length returned = (481 - 275) = 206 bytes
	Driver rounds this length to 208 bytes

On a write, the extra byte written to the FIFO during the first
276 byte chunk is assumed valid. The next call writes 208 bytes,
but the last 3 bytes (276 + 208 - 481) including 1 valid byte,
are ignored, as the controller uses the MCI_DATA_LENGTH register
to figure out that it only needs to write a total of 481 bytes.

To fix the data corruption in cases as above, a 4 byte bounce
buffer is used realign buffer access to the FIFO, such that 4 byte
multiples are written until the last buffer chunk.

Note that in a simple case where all 481 bytes lie within a page,
the driver rounds the length to 484, but the MCI_DATA_LENGTH
register enusres only 481 bytes are actually written.

Change-Id: I164bae2df4857017b35857e465d753b9dc9edf6a
Signed-off-by: Oluwafemi Adeyemi <aadeyemi@codeaurora.org>
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 0d2b11d..ad4dfe6 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -134,6 +134,7 @@
 		      u32 c);
 static inline void msmsdcc_delay(struct msmsdcc_host *host);
 static void msmsdcc_dump_sdcc_state(struct msmsdcc_host *host);
+static void msmsdcc_sg_start(struct msmsdcc_host *host);
 
 static inline unsigned short msmsdcc_get_nr_sg(struct msmsdcc_host *host)
 {
@@ -1076,21 +1077,15 @@
 
 	/* Is data transfer in PIO mode required? */
 	if (!(datactrl & MCI_DPSM_DMAENABLE)) {
-		unsigned int sg_miter_flags = SG_MITER_ATOMIC;
-
 		if (data->flags & MMC_DATA_READ) {
-			sg_miter_flags |= SG_MITER_TO_SG;
 			pio_irqmask = MCI_RXFIFOHALFFULLMASK;
 			if (host->curr.xfer_remain < MCI_FIFOSIZE)
 				pio_irqmask |= MCI_RXDATAAVLBLMASK;
-		} else {
-			sg_miter_flags |= SG_MITER_FROM_SG;
+		} else
 			pio_irqmask = MCI_TXFIFOHALFEMPTYMASK |
 					MCI_TXFIFOEMPTYMASK;
-		}
 
-		sg_miter_start(&host->sg_miter, data->sg, data->sg_len,
-				sg_miter_flags);
+		msmsdcc_sg_start(host);
 	}
 
 	if (data->flags & MMC_DATA_READ)
@@ -1250,6 +1245,144 @@
 	return ptr - buffer;
 }
 
+/*
+ * Copy up to a word (4 bytes) between a scatterlist
+ * and a temporary bounce buffer when the word lies across
+ * two pages. The temporary buffer can then be read to/
+ * written from the FIFO once.
+ */
+static void _msmsdcc_sg_consume_word(struct msmsdcc_host *host)
+{
+	struct msmsdcc_pio_data *pio = &host->pio;
+	unsigned int bytes_avail;
+
+	if (host->curr.data->flags & MMC_DATA_READ)
+		memcpy(pio->sg_miter.addr, pio->bounce_buf,
+		       pio->bounce_buf_len);
+	else
+		memcpy(pio->bounce_buf, pio->sg_miter.addr,
+		       pio->bounce_buf_len);
+
+	while (pio->bounce_buf_len != 4) {
+		if (!sg_miter_next(&pio->sg_miter))
+			break;
+		bytes_avail = min_t(unsigned int, pio->sg_miter.length,
+			4 - pio->bounce_buf_len);
+		if (host->curr.data->flags & MMC_DATA_READ)
+			memcpy(pio->sg_miter.addr,
+			       &pio->bounce_buf[pio->bounce_buf_len],
+			       bytes_avail);
+		else
+			memcpy(&pio->bounce_buf[pio->bounce_buf_len],
+			       pio->sg_miter.addr, bytes_avail);
+
+		pio->sg_miter.consumed = bytes_avail;
+		pio->bounce_buf_len += bytes_avail;
+	}
+}
+
+/*
+ * Use sg_miter_next to return as many 4-byte aligned
+ * chunks as possible, using a temporary 4 byte buffer
+ * for alignment if necessary
+ */
+static int msmsdcc_sg_next(struct msmsdcc_host *host, char **buf, int *len)
+{
+	struct msmsdcc_pio_data *pio = &host->pio;
+	unsigned int length, rlength;
+	char *buffer;
+
+	if (!sg_miter_next(&pio->sg_miter))
+		return 0;
+
+	buffer = pio->sg_miter.addr;
+	length = pio->sg_miter.length;
+
+	if (length < host->curr.xfer_remain) {
+		rlength = round_down(length, 4);
+		if (rlength) {
+			/*
+			 * We have a 4-byte aligned chunk.
+			 * The rounding will be reflected by
+			 * a call to msmsdcc_sg_consumed
+			 */
+			length = rlength;
+			goto sg_next_end;
+		}
+		/*
+		 * We have a length less than 4 bytes. Check to
+		 * see if more buffer is available, and combine
+		 * to make 4 bytes if possible.
+		 */
+		pio->bounce_buf_len = length;
+		memset(pio->bounce_buf, 0, 4);
+
+		/*
+		 * On a read, get 4 bytes from FIFO, and distribute
+		 * (4-bouce_buf_len) bytes into consecutive
+		 * sgl buffers when msmsdcc_sg_consumed is called
+		 */
+		if (host->curr.data->flags & MMC_DATA_READ) {
+			buffer = pio->bounce_buf;
+			length = 4;
+			goto sg_next_end;
+		} else {
+			_msmsdcc_sg_consume_word(host);
+			buffer = pio->bounce_buf;
+			length = pio->bounce_buf_len;
+		}
+	}
+
+sg_next_end:
+	*buf = buffer;
+	*len = length;
+	return 1;
+}
+
+/*
+ * Update sg_miter.consumed based on how many bytes were
+ * consumed. If the bounce buffer was used to read from FIFO,
+ * redistribute into sgls.
+ */
+static void msmsdcc_sg_consumed(struct msmsdcc_host *host,
+				unsigned int length)
+{
+	struct msmsdcc_pio_data *pio = &host->pio;
+
+	if (host->curr.data->flags & MMC_DATA_READ) {
+		if (length > pio->sg_miter.consumed)
+			/*
+			 * consumed 4 bytes, but sgl
+			 * describes < 4 bytes
+			 */
+			_msmsdcc_sg_consume_word(host);
+		else
+			pio->sg_miter.consumed = length;
+	} else
+		if (length < pio->sg_miter.consumed)
+			pio->sg_miter.consumed = length;
+}
+
+static void msmsdcc_sg_start(struct msmsdcc_host *host)
+{
+	unsigned int sg_miter_flags = SG_MITER_ATOMIC;
+
+	host->pio.bounce_buf_len = 0;
+
+	if (host->curr.data->flags & MMC_DATA_READ)
+		sg_miter_flags |= SG_MITER_TO_SG;
+	else
+		sg_miter_flags |= SG_MITER_FROM_SG;
+
+	sg_miter_start(&host->pio.sg_miter, host->curr.data->sg,
+		       host->curr.data->sg_len, sg_miter_flags);
+}
+
+static void msmsdcc_sg_stop(struct msmsdcc_host *host)
+{
+	sg_miter_stop(&host->pio.sg_miter);
+}
+
 static irqreturn_t
 msmsdcc_pio_irq(int irq, void *dev_id)
 {
@@ -1257,6 +1390,8 @@
 	void __iomem		*base = host->base;
 	uint32_t		status;
 	unsigned long flags;
+	unsigned int remain;
+	char *buffer;
 
 	spin_lock(&host->lock);
 
@@ -1267,45 +1402,43 @@
 		spin_unlock(&host->lock);
 		return IRQ_NONE;
 	}
-
 #if IRQ_DEBUG
 	msmsdcc_print_status(host, "irq1-r", status);
 #endif
 	local_irq_save(flags);
 
-	while (sg_miter_next(&host->sg_miter)) {
-
-		unsigned int remain, len;
-		char *buffer;
+	do {
+		unsigned int len;
 
 		if (!(status & (MCI_TXFIFOHALFEMPTY | MCI_TXFIFOEMPTY
 				| MCI_RXDATAAVLBL)))
 			break;
 
-		buffer = host->sg_miter.addr;
-		remain = host->sg_miter.length;
+		if (!msmsdcc_sg_next(host, &buffer, &remain))
+			break;
 
 		len = 0;
 		if (status & MCI_RXACTIVE)
 			len = msmsdcc_pio_read(host, buffer, remain);
 		if (status & MCI_TXACTIVE)
 			len = msmsdcc_pio_write(host, buffer, remain);
+
 		/* len might have aligned to 32bits above */
 		if (len > remain)
 			len = remain;
 
-		host->sg_miter.consumed = len;
 		host->curr.xfer_remain -= len;
 		host->curr.data_xfered += len;
 		remain -= len;
+		msmsdcc_sg_consumed(host, len);
 
 		if (remain) /* Done with this page? */
 			break; /* Nope */
 
 		status = readl_relaxed(base + MMCISTATUS);
-	}
+	} while (1);
 
-	sg_miter_stop(&host->sg_miter);
+	msmsdcc_sg_stop(host);
 	local_irq_restore(flags);
 
 	if (status & MCI_RXACTIVE && host->curr.xfer_remain < MCI_FIFOSIZE) {
diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h
index 319d721..8a728f2 100644
--- a/drivers/mmc/host/msm_sdcc.h
+++ b/drivers/mmc/host/msm_sdcc.h
@@ -281,9 +281,10 @@
 };
 
 struct msmsdcc_pio_data {
-	struct scatterlist	*sg;
-	unsigned int		sg_len;
-	unsigned int		sg_off;
+	struct sg_mapping_iter		sg_miter;
+	char				bounce_buf[4];
+	/* valid bytes in bounce_buf */
+	int				bounce_buf_len;
 };
 
 struct msmsdcc_curr_req {
@@ -361,7 +362,7 @@
 	struct msmsdcc_sps_data sps;
 	bool			is_dma_mode;
 	bool			is_sps_mode;
-	struct sg_mapping_iter sg_miter;
+	struct msmsdcc_pio_data	pio;
 
 #ifdef CONFIG_HAS_EARLYSUSPEND
 	struct early_suspend early_suspend;