msm: kgsl: Update required timestamps during command submission

Update all the required timestamps when submitting commands to the
GPU ringbuffer for execution. The global timestamp was not being
updated in all the required cases. Now the global timestamp and
per context timestamp are updated if following conditions are met:
	1. A valid non global context is passed
	2. The context passed supports per context timestamps
	3. The command submission is not from kgsl
If 3 conditions above are not met then only the global timestamp is
updated.

Change-Id: Ib4fe6fbd2ac57fbc5306377f09072f9e4c09d442
Signed-off-by: Shubhraprakash Das <sadas@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 91609ef..25878a1 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -535,18 +535,21 @@
 	unsigned int total_sizedwords = sizedwords;
 	unsigned int i;
 	unsigned int rcmd_gpu;
-	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+	unsigned int context_id;
 	unsigned int gpuaddr = rb->device->memstore.gpuaddr;
 	unsigned int timestamp;
 
 	/*
 	 * if the context was not created with per context timestamp
 	 * support, we must use the global timestamp since issueibcmds
-	 * will be returning that one.
+	 * will be returning that one, or if an internal issue then
+	 * use global timestamp.
 	 */
-	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
-			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+	if ((context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) &&
+		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
 		context_id = context->id;
+	else
+		context_id = KGSL_MEMSTORE_GLOBAL;
 
 	/* reserve space to temporarily turn off protected mode
 	*  error checking if needed
@@ -554,33 +557,30 @@
 	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
 	/* 2 dwords to store the start of command sequence */
 	total_sizedwords += 2;
-
 	/* internal ib command identifier for the ringbuffer */
 	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
 
 	/* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
 	total_sizedwords += context ? 13 : 0;
 
+	if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) &&
+		(flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE |
+		KGSL_CMD_FLAGS_GET_INT)))
+			total_sizedwords += 2;
+
 	if (adreno_is_a3xx(adreno_dev))
 		total_sizedwords += 7;
 
 	if (adreno_is_a2xx(adreno_dev))
 		total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
 
-	total_sizedwords += 2; /* scratchpad ts for fault tolerance */
+	total_sizedwords += 2; /* scratchpad ts for recovery */
+	total_sizedwords += 3; /* sop timestamp */
+	total_sizedwords += 4; /* eop timestamp */
 
-	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
-			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
-		total_sizedwords += 3; /* sop timestamp */
-		total_sizedwords += 4; /* eop timestamp */
+	if (KGSL_MEMSTORE_GLOBAL != context_id)
 		total_sizedwords += 3; /* global timestamp without cache
 					* flush for non-zero context */
-	} else {
-		total_sizedwords += 4; /* global timestamp for fault tolerance*/
-	}
-
-	if (flags & KGSL_CMD_FLAGS_EOF)
-		total_sizedwords += 2;
 
 	ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
 	if (!ringcmds)
@@ -605,6 +605,16 @@
 	else
 		timestamp = rb->global_ts;
 
+	/* scratchpad ts for recovery */
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts);
+
+	/* start-of-pipeline timestamp */
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+		KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
 	if (flags & KGSL_CMD_FLAGS_PMODE) {
 		/* disable protected mode error checking */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -634,14 +644,10 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
-	/* scratchpad ts for fault tolerance */
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts);
-
 	if (adreno_is_a3xx(adreno_dev)) {
 		/*
-		 * FLush HLSQ lazy updates to make sure there are no
-		 * rsources pending for indirect loads after the timestamp
+		 * Flush HLSQ lazy updates to make sure there are no
+		 * resources pending for indirect loads after the timestamp
 		 */
 
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -652,36 +658,23 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
-	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS
-			&& !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
-		/* start-of-pipeline timestamp */
+	/*
+	 * end-of-pipeline timestamp.  If per context timestamps is not
+	 * enabled, then context_id will be KGSL_MEMSTORE_GLOBAL so all
+	 * eop timestamps will work out.
+	 */
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+		KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
+	if (KGSL_MEMSTORE_GLOBAL != context_id) {
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_MEM_WRITE, 2));
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
-		/* end-of-pipeline timestamp */
-		GSL_RB_WRITE(ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_EVENT_WRITE, 3));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
-		GSL_RB_WRITE(ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_MEM_WRITE, 2));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-				eoptimestamp)));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts);
-	} else {
-		GSL_RB_WRITE(ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_EVENT_WRITE, 3));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-						eoptimestamp)));
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			eoptimestamp)));
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts);
 	}
 	if (context) {
@@ -720,6 +713,19 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
 	}
 
+	/*
+	 * If per context timestamps are enabled and any of the kgsl
+	 * internal commands want INT to be generated trigger the INT
+	*/
+	if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) &&
+		(flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE |
+		KGSL_CMD_FLAGS_GET_INT))) {
+			GSL_RB_WRITE(ringcmds, rcmd_gpu,
+				cp_type3_packet(CP_INTERRUPT, 1));
+			GSL_RB_WRITE(ringcmds, rcmd_gpu,
+				CP_INT_CNTL__RB_INT_MASK);
+	}
+
 	if (adreno_is_a3xx(adreno_dev)) {
 		/* Dummy set-constant to trigger context rollover */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,