msm: kgsl: Add support to use timestamps from userspace for an issue

Use timestamp from userspace instead of incrementing context's last
issued timestamp. This allows userspace to defer submissions to KGSL.

Change-Id: I2a104f97420f73dac3bbd23464e92e6ea9d92869
Signed-off-by: Vijay Krishnamoorthy <adivarah@codeaurora.org>
Signed-off-by: Rajeev Kulkarni <krajeev@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index ea863ee..33013f5 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -2282,12 +2282,15 @@
 	static uint io_cnt;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct adreno_context *adreno_ctx = context->devctxt;
 	int retries = 0;
 	unsigned int ts_issued;
 	unsigned int context_id = _get_context_id(context);
 	unsigned int time_elapsed = 0;
 	unsigned int prev_reg_val[hang_detect_regs_count];
 	unsigned int wait;
+	unsigned int retry_ts_cmp = 0;
+	unsigned int retry_ts_cmp_msecs = KGSL_SYNCOBJ_SERVER_TIMEOUT;
 
 	memset(prev_reg_val, 0, sizeof(prev_reg_val));
 
@@ -2297,12 +2300,20 @@
 	if (msecs == KGSL_TIMEOUT_DEFAULT)
 		msecs = adreno_dev->wait_timeout;
 
+	/*
+	 * With user generated ts, if this check fails perform this check
+	 * again after 'retry_ts_cmp_msecs' milliseconds.
+	 */
 	if (timestamp_cmp(timestamp, ts_issued) > 0) {
-		KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, "
-			"last issued ts <%d:0x%x>\n",
-			context_id, timestamp, context_id, ts_issued);
-		status = -EINVAL;
-		goto done;
+		if (!(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) {
+			KGSL_DRV_ERR(device,
+				"Cannot wait for invalid ts <%d:0x%x>, "
+				"last issued ts <%d:0x%x>\n",
+				context_id, timestamp, context_id, ts_issued);
+			status = -EINVAL;
+			goto done;
+		} else
+			retry_ts_cmp = 1;
 	}
 
 	/*
@@ -2372,7 +2383,22 @@
 		time_elapsed += wait;
 		wait = KGSL_TIMEOUT_PART;
 
-		retries++;
+		if (!retry_ts_cmp)
+			retries++;
+		else if (time_elapsed >= retry_ts_cmp_msecs) {
+			ts_issued =
+				adreno_dev->ringbuffer.timestamp[context_id];
+			if (timestamp_cmp(timestamp, ts_issued) > 0) {
+				KGSL_DRV_ERR(device,
+				"Cannot wait for user-generated ts <%d:0x%x>, "
+				"not submitted within server timeout period. "
+				"last issued ts <%d:0x%x>\n",
+				context_id, timestamp, context_id, ts_issued);
+				status = -EINVAL;
+				goto done;
+			}
+			retry_ts_cmp = 0;
+		}
 
 	} while (!msecs || time_elapsed < msecs);
 
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index b227134..95d995d 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -33,7 +33,7 @@
 /* Flags to control command packet settings */
 #define KGSL_CMD_FLAGS_NONE             0x00000000
 #define KGSL_CMD_FLAGS_PMODE		0x00000001
-#define KGSL_CMD_FLAGS_DUMMY_INTR_CMD	0x00000002
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE	0x00000002
 
 /* Command identifiers */
 #define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index f30854d..4dda7ba 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -169,6 +169,14 @@
 	if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
 		drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
 
+	if (flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		if (!(flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
+			ret = -EINVAL;
+			goto err;
+		}
+		drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
+	}
+
 	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 	if (ret)
 		goto err;
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 034d6e9..58e4791 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -48,6 +48,8 @@
 #define CTXT_FLAGS_GPU_HANG_RECOVERED	BIT(12)
 /* Context is being destroyed so dont save it */
 #define CTXT_FLAGS_BEING_DESTROYED	BIT(13)
+/* User mode generated timestamps enabled */
+#define CTXT_FLAGS_USER_GENERATED_TS    BIT(14)
 
 struct kgsl_device;
 struct adreno_device;
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 6632d8f..9d0d0ae 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -527,11 +527,10 @@
 adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
 				struct adreno_context *context,
 				unsigned int flags, unsigned int *cmds,
-				int sizedwords)
+				int sizedwords, uint32_t timestamp)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
 	unsigned int *ringcmds;
-	unsigned int timestamp;
 	unsigned int total_sizedwords = sizedwords;
 	unsigned int i;
 	unsigned int rcmd_gpu;
@@ -546,21 +545,38 @@
 	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
 		context_id = context->id;
 
+	if ((context->flags & CTXT_FLAGS_USER_GENERATED_TS) &&
+			(!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))) {
+		if (timestamp_cmp(rb->timestamp[context_id],
+						timestamp) >= 0) {
+			KGSL_DRV_ERR(rb->device,
+				"Invalid user generated ts <%d:0x%x>, "
+				"less than last issued ts <%d:0x%x>\n",
+				context_id, timestamp, context_id,
+				rb->timestamp[context_id]);
+			return -ERANGE;
+		}
+	}
+
 	/* reserve space to temporarily turn off protected mode
 	*  error checking if needed
 	*/
 	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
 	/* 2 dwords to store the start of command sequence */
 	total_sizedwords += 2;
-
-	/* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
-	total_sizedwords += context ? 7 : 0;
+	/*
+	 * Add CP_COND_EXEC commands to generate CP_INTERRUPT only
+	 * for submissions from userspace.
+	 */
+	total_sizedwords += (context &&
+			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) ? 7 : 0;
 
 	if (adreno_is_a3xx(adreno_dev))
 		total_sizedwords += 7;
 
 	total_sizedwords += 2; /* scratchpad ts for recovery */
-	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
+	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
+			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		total_sizedwords += 3; /* sop timestamp */
 		total_sizedwords += 4; /* eop timestamp */
 		total_sizedwords += 3; /* global timestamp without cache
@@ -606,10 +622,13 @@
 	/* always increment the global timestamp. once. */
 	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
 
-	if (context && !(flags & KGSL_CMD_FLAGS_DUMMY_INTR_CMD)) {
+	/* Do not update context's timestamp for internal submissions */
+	if (context && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		if (context_id == KGSL_MEMSTORE_GLOBAL)
 			rb->timestamp[context->id] =
 				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+		else if (context->flags & CTXT_FLAGS_USER_GENERATED_TS)
+			rb->timestamp[context_id] = timestamp;
 		else
 			rb->timestamp[context_id]++;
 	}
@@ -633,7 +652,8 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
-	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
+	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS
+			&& !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		/* start-of-pipeline timestamp */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_MEM_WRITE, 2));
@@ -661,11 +681,13 @@
 			cp_type3_packet(CP_EVENT_WRITE, 3));
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[context_id]);
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+						eoptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+				rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 	}
 
-	if (context) {
+	if (context && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		/* Conditional execution based on memory values */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_COND_EXEC, 4));
@@ -717,8 +739,8 @@
 		device->state & KGSL_STATE_HUNG)
 		return;
 
-	adreno_ringbuffer_addcmds(rb, a_ctxt, KGSL_CMD_FLAGS_DUMMY_INTR_CMD,
-			cmds, sizedwords);
+	adreno_ringbuffer_addcmds(rb, a_ctxt, KGSL_CMD_FLAGS_INTERNAL_ISSUE,
+					cmds, sizedwords, 0);
 }
 
 unsigned int
@@ -734,7 +756,11 @@
 	if (device->state & KGSL_STATE_HUNG)
 		return kgsl_readtimestamp(device, KGSL_MEMSTORE_GLOBAL,
 					KGSL_TIMESTAMP_RETIRED);
-	return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds, sizedwords);
+
+	flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
+
+	return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds,
+							sizedwords, 0);
 }
 
 static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
@@ -1008,8 +1034,9 @@
 	adreno_drawctxt_switch(adreno_dev, drawctxt, flags);
 
 	*timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
-					drawctxt, 0,
-					&link[0], (cmds - link));
+					drawctxt,
+					0,
+					&link[0], (cmds - link), *timestamp);
 
 	KGSL_CMD_INFO(device, "ctxt %d g %08x numibs %d ts %d\n",
 		context->id, (unsigned int)ibdesc, numibs, *timestamp);
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index 271e7b3..ad7308b 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -2,7 +2,7 @@
 #define _MSM_KGSL_H
 
 #define KGSL_VERSION_MAJOR        3
-#define KGSL_VERSION_MINOR        13
+#define KGSL_VERSION_MINOR        14
 
 /*context flags */
 #define KGSL_CONTEXT_SAVE_GMEM		0x00000001
@@ -12,6 +12,7 @@
 #define KGSL_CONTEXT_PREAMBLE		0x00000010
 #define KGSL_CONTEXT_TRASH_STATE	0x00000020
 #define KGSL_CONTEXT_PER_CONTEXT_TS	0x00000040
+#define KGSL_CONTEXT_USER_GENERATED_TS	0x00000080
 
 #define KGSL_CONTEXT_INVALID 0xffffffff
 
@@ -73,6 +74,9 @@
 #define KGSL_CLK_MEM_IFACE 0x00000010
 #define KGSL_CLK_AXI	0x00000020
 
+/* Server Side Sync Timeout in milliseconds */
+#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000
+
 /*
  * Reset status values for context
  */