msm: kgsl: Move the adreno WPTR poke logic to the hang detection

Sometimes a write to CP_WPTR fails to latch on A20x GPUs. In the past
we worked around this by writing to WPTR numerous times in the various
loops that waited for GPU activity.  Consolidate these disparate calls
by moving the poke into the hang detect function.  If the core is idle
but the RPTR != WPTR, then rewrite the WPTR and go back through the
loop again.

Change-Id: Ic0dedbad474c7884c2467fe050d8afe673588add
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 72108f0..a202660 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1809,12 +1809,6 @@
 	return status;
 }
 
-static inline void adreno_poke(struct kgsl_device *device)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr);
-}
-
 static int adreno_ringbuffer_drain(struct kgsl_device *device,
 	unsigned int *regs)
 {
@@ -1835,12 +1829,8 @@
 
 	wait = jiffies + msecs_to_jiffies(100);
 
-	adreno_poke(device);
-
 	do {
 		if (time_after(jiffies, wait)) {
-			adreno_poke(device);
-
 			/* Check to see if the core is hung */
 			if (adreno_hang_detect(device, regs))
 				return -ETIMEDOUT;
@@ -2256,8 +2246,24 @@
 	if (!adreno_dev->fast_hang_detect)
 		return 0;
 
-	if (is_adreno_rbbm_status_idle(device))
+	if (is_adreno_rbbm_status_idle(device)) {
+
+		/*
+		 * On A20X if the RPTR != WPTR and the device is idle, then
+		 * the last write to WPTR probably failed to latch so write it
+		 * again
+		 */
+
+		if (adreno_is_a2xx(adreno_dev)) {
+			unsigned int rptr;
+			adreno_regread(device, REG_CP_RB_RPTR, &rptr);
+			if (rptr != adreno_dev->ringbuffer.wptr)
+				adreno_regwrite(device, REG_CP_RB_WPTR,
+					adreno_dev->ringbuffer.wptr);
+		}
+
 		return 0;
+	}
 
 	for (i = 0; i < hang_detect_regs_count; i++) {
 
@@ -2363,7 +2369,7 @@
 			status = 0;
 			goto done;
 		}
-		adreno_poke(device);
+
 		io_cnt = (io_cnt + 1) % 100;
 		if (io_cnt <
 		    pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)