msm: kgsl: Add per context timestamp

Add new ioctls for per context timestamps.
Timestamp functions (read/write/wait) will now be context
specific rather than only using the global timestamp.
Per context timestamps is a requirement for priority
based queueing.

Change-Id: I5fdfb816480241b9552ecf90ed1bb22db3a652b5
Signed-off-by: Carter Cooper <ccooper@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index fb672e4..ba39fe1 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -614,13 +614,13 @@
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	unsigned int timestamp;
 	unsigned int num_rb_contents;
-	unsigned int bad_context;
 	unsigned int reftimestamp;
 	unsigned int enable_ts;
 	unsigned int soptimestamp;
 	unsigned int eoptimestamp;
-	struct adreno_context *drawctxt;
+	unsigned int context_id;
 	struct kgsl_context *context;
+	struct adreno_context *adreno_context;
 	int next = 0;
 
 	KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n");
@@ -636,22 +636,35 @@
 	ret = adreno_ringbuffer_extract(rb, rb_buffer, &num_rb_contents);
 	if (ret)
 		goto done;
-	timestamp = rb->timestamp;
-	KGSL_DRV_ERR(device, "Last issued timestamp: %x\n", timestamp);
-	kgsl_sharedmem_readl(&device->memstore, &bad_context,
-				KGSL_DEVICE_MEMSTORE_OFFSET(current_context));
+	kgsl_sharedmem_readl(&device->memstore, &context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+	context = idr_find(&device->context_idr, context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
+				context_id);
+		context_id = KGSL_MEMSTORE_GLOBAL;
+	}
+
+	timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+	KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
+
 	kgsl_sharedmem_readl(&device->memstore, &reftimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts));
 	kgsl_sharedmem_readl(&device->memstore, &enable_ts,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ts_cmp_enable));
 	kgsl_sharedmem_readl(&device->memstore, &soptimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					soptimestamp));
 	kgsl_sharedmem_readl(&device->memstore, &eoptimestamp,
-				KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					eoptimestamp));
 	/* Make sure memory is synchronized before restarting the GPU */
 	mb();
 	KGSL_CTXT_ERR(device,
-		"Context that caused a GPU hang: %x\n", bad_context);
+		"Context id that caused a GPU hang: %d\n", context_id);
 	/* restart device */
 	ret = adreno_stop(device);
 	if (ret)
@@ -662,20 +675,20 @@
 	KGSL_DRV_ERR(device, "Device has been restarted after hang\n");
 	/* Restore timestamp states */
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp),
 			soptimestamp);
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp),
 			eoptimestamp);
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp),
+			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp),
 			soptimestamp);
 	if (num_rb_contents) {
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
+			KGSL_MEMSTORE_OFFSET(context_id, ref_wait_ts),
 			reftimestamp);
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
+			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable),
 			enable_ts);
 	}
 	/* Make sure all writes are posted before the GPU reads them */
@@ -683,12 +696,12 @@
 	/* Mark the invalid context so no more commands are accepted from
 	 * that context */
 
-	drawctxt = (struct adreno_context *) bad_context;
+	adreno_context = context->devctxt;
 
 	KGSL_CTXT_ERR(device,
-		"Context that caused a GPU hang: %x\n", bad_context);
+		"Context that caused a GPU hang: %d\n", adreno_context->id);
 
-	drawctxt->flags |= CTXT_FLAGS_GPU_HANG;
+	adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
 
 	/*
 	 * Set the reset status of all contexts to
@@ -698,7 +711,7 @@
 	while ((context = idr_get_next(&device->context_idr, &next))) {
 		if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
 			context->reset_status) {
-			if (context->devctxt != drawctxt)
+			if (context->id != context_id)
 				context->reset_status =
 				KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
 			else
@@ -710,7 +723,7 @@
 
 	/* Restore valid commands in ringbuffer */
 	adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents);
-	rb->timestamp = timestamp;
+	rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp;
 done:
 	vfree(rb_buffer);
 	return ret;
@@ -811,7 +824,8 @@
 				shadowprop.size = device->memstore.size;
 				/* GSL needs this to be set, even if it
 				   appears to be meaningless */
-				shadowprop.flags = KGSL_FLAGS_INITIALIZED;
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
 			}
 			if (copy_to_user(value, &shadowprop,
 				sizeof(shadowprop))) {
@@ -1079,38 +1093,58 @@
 	__raw_writel(value, reg);
 }
 
+static unsigned int _get_context_id(struct kgsl_context *k_ctxt)
+{
+	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+
+	if (k_ctxt != NULL) {
+		struct adreno_context *a_ctxt = k_ctxt->devctxt;
+		/*
+		 * if the context was not created with per context timestamp
+		 * support, we must use the global timestamp since issueibcmds
+		 * will be returning that one.
+		 */
+		if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
+			context_id = a_ctxt->id;
+	}
+
+	return context_id;
+}
+
 static int kgsl_check_interrupt_timestamp(struct kgsl_device *device,
-					unsigned int timestamp)
+		struct kgsl_context *context, unsigned int timestamp)
 {
 	int status;
 	unsigned int ref_ts, enableflag;
+	unsigned int context_id = _get_context_id(context);
 
-	status = kgsl_check_timestamp(device, timestamp);
+	status = kgsl_check_timestamp(device, context, timestamp);
 	if (!status) {
 		mutex_lock(&device->mutex);
 		kgsl_sharedmem_readl(&device->memstore, &enableflag,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable));
+			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
 		mb();
 
 		if (enableflag) {
 			kgsl_sharedmem_readl(&device->memstore, &ref_ts,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts));
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts));
 			mb();
 			if (timestamp_cmp(ref_ts, timestamp) >= 0) {
 				kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-				timestamp);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts), timestamp);
 				wmb();
 			}
 		} else {
 			unsigned int cmds[2];
 			kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-				timestamp);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ref_wait_ts), timestamp);
 			enableflag = 1;
 			kgsl_sharedmem_writel(&device->memstore,
-				KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
-				enableflag);
+				KGSL_MEMSTORE_OFFSET(context_id,
+					ts_cmp_enable), enableflag);
 			wmb();
 			/* submit a dummy packet so that even if all
 			* commands upto timestamp get executed we will still
@@ -1144,6 +1178,7 @@
 
 /* MUST be called with the device mutex held */
 static int adreno_waittimestamp(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
@@ -1155,15 +1190,19 @@
 	int retries;
 	unsigned int msecs_first;
 	unsigned int msecs_part;
+	unsigned int ts_issued;
+	unsigned int context_id = _get_context_id(context);
+
+	ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
 
 	/* Don't wait forever, set a max value for now */
 	if (msecs == -1)
 		msecs = adreno_dev->wait_timeout;
 
-	if (timestamp_cmp(timestamp, adreno_dev->ringbuffer.timestamp) > 0) {
-		KGSL_DRV_ERR(device, "Cannot wait for invalid ts: %x, "
-			"rb->timestamp: %x\n",
-			timestamp, adreno_dev->ringbuffer.timestamp);
+	if (timestamp_cmp(timestamp, ts_issued) > 0) {
+		KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, "
+			"last issued ts <%d:0x%x>\n",
+			context_id, timestamp, context_id, ts_issued);
 		status = -EINVAL;
 		goto done;
 	}
@@ -1175,7 +1214,7 @@
 	msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100;
 	msecs_part = (msecs - msecs_first + 3) / 4;
 	for (retries = 0; retries < 5; retries++) {
-		if (kgsl_check_timestamp(device, timestamp)) {
+		if (kgsl_check_timestamp(device, context, timestamp)) {
 			/* if the timestamp happens while we're not
 			 * waiting, there's a chance that an interrupt
 			 * will not be generated and thus the timestamp
@@ -1197,7 +1236,7 @@
 		status = kgsl_wait_event_interruptible_timeout(
 				device->wait_queue,
 				kgsl_check_interrupt_timestamp(device,
-					timestamp),
+					context, timestamp),
 				msecs_to_jiffies(retries ?
 					msecs_part : msecs_first), io);
 		mutex_lock(&device->mutex);
@@ -1214,9 +1253,10 @@
 	}
 	status = -ETIMEDOUT;
 	KGSL_DRV_ERR(device,
-		     "Device hang detected while waiting for timestamp: %x,"
-		      "last submitted(rb->timestamp): %x, wptr: %x\n",
-		      timestamp, adreno_dev->ringbuffer.timestamp,
+		     "Device hang detected while waiting for timestamp: "
+		     "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, "
+		     "wptr: 0x%x\n",
+		      context_id, timestamp, context_id, ts_issued,
 		      adreno_dev->ringbuffer.wptr);
 	if (!adreno_dump_and_recover(device)) {
 		/* wait for idle after recovery as the
@@ -1230,15 +1270,17 @@
 }
 
 static unsigned int adreno_readtimestamp(struct kgsl_device *device,
-			     enum kgsl_timestamp_type type)
+		struct kgsl_context *context, enum kgsl_timestamp_type type)
 {
 	unsigned int timestamp = 0;
+	unsigned int context_id = _get_context_id(context);
 
 	if (type == KGSL_TIMESTAMP_CONSUMED)
 		adreno_regread(device, REG_CP_TIMESTAMP, &timestamp);
 	else if (type == KGSL_TIMESTAMP_RETIRED)
 		kgsl_sharedmem_readl(&device->memstore, &timestamp,
-				 KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp));
+				 KGSL_MEMSTORE_OFFSET(context_id,
+					 eoptimestamp));
 	rmb();
 
 	return timestamp;
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index d31c2ce..18d0e83 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1521,8 +1521,8 @@
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
 	cmds[3] = device->memstore.gpuaddr +
-		KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
-	cmds[4] = (unsigned int) context;
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->id;
 	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
 	kgsl_mmu_setstate(device, context->pagetable);
 
@@ -1648,11 +1648,18 @@
 
 	if (status & CP_INT_CNTL__RB_INT_MASK) {
 		/* signal intr completion event */
-		unsigned int enableflag = 0;
-		kgsl_sharedmem_writel(&rb->device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
-			enableflag);
-		wmb();
+		unsigned int context_id;
+		kgsl_sharedmem_readl(&device->memstore,
+				&context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+		if (context_id < KGSL_MEMSTORE_MAX) {
+			kgsl_sharedmem_writel(&rb->device->memstore,
+					KGSL_MEMSTORE_OFFSET(context_id,
+						ts_cmp_enable), 0);
+			device->last_expired_ctxt_id = context_id;
+			wmb();
+		}
 		KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
 	}
 
@@ -1865,7 +1872,6 @@
 static void a2xx_start(struct adreno_device *adreno_dev)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	int init_reftimestamp = 0x7fffffff;
 
 	/*
 	 * We need to make sure all blocks are powered up and clocked
@@ -1917,12 +1923,6 @@
 	else
 		adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0x80);
 
-	kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size);
-
-	kgsl_sharedmem_writel(&device->memstore,
-			      KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts),
-			      init_reftimestamp);
-
 	adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000);
 
 	/* Make sure interrupts are disabled */
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 2761b60..73cc46f 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2267,8 +2267,8 @@
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
 	cmds[3] = device->memstore.gpuaddr +
-	    KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
-	cmds[4] = (unsigned int)context;
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->id;
 	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
 	kgsl_mmu_setstate(device, context->pagetable);
 
@@ -2411,9 +2411,17 @@
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 
 	if (irq == A3XX_INT_CP_RB_INT) {
-		kgsl_sharedmem_writel(&rb->device->memstore,
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
-		wmb();
+		unsigned int context_id;
+		kgsl_sharedmem_readl(&adreno_dev->dev.memstore,
+				&context_id,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context));
+		if (context_id < KGSL_MEMSTORE_MAX) {
+			kgsl_sharedmem_writel(&rb->device->memstore,
+					KGSL_MEMSTORE_OFFSET(context_id,
+						ts_cmp_enable), 0);
+			wmb();
+		}
 		KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
 	}
 
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index aeb89b3..d773521 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -17,6 +17,8 @@
 #include "kgsl_sharedmem.h"
 #include "adreno.h"
 
+#define KGSL_INIT_REFTIMESTAMP		0x7FFFFFFF
+
 /* quad for copying GMEM to context shadow */
 #define QUAD_LEN 12
 #define QUAD_RESTORE_LEN 14
@@ -154,6 +156,7 @@
 
 	drawctxt->pagetable = pagetable;
 	drawctxt->bin_base_offset = 0;
+	drawctxt->id = context->id;
 
 	if (flags & KGSL_CONTEXT_PREAMBLE)
 		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
@@ -161,10 +164,17 @@
 	if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
 		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
 
+	if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
+		drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
+
 	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 	if (ret)
 		goto err;
 
+	kgsl_sharedmem_writel(&device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts),
+			KGSL_INIT_REFTIMESTAMP);
+
 	context->devctxt = drawctxt;
 	return 0;
 err:
@@ -187,11 +197,12 @@
 			  struct kgsl_context *context)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_context *drawctxt = context->devctxt;
+	struct adreno_context *drawctxt;
 
-	if (drawctxt == NULL)
+	if (context == NULL)
 		return;
 
+	drawctxt = context->devctxt;
 	/* deactivate context */
 	if (adreno_dev->drawctxt_active == drawctxt) {
 		/* no need to save GMEM or shader, the context is
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 9a7ae3f..3eb1aba 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -40,6 +40,10 @@
 #define CTXT_FLAGS_GPU_HANG		0x00008000
 /* Specifies there is no need to save GMEM */
 #define CTXT_FLAGS_NOGMEMALLOC          0x00010000
+/* Trash state for context */
+#define CTXT_FLAGS_TRASHSTATE		0x00020000
+/* per context timestamps enabled */
+#define CTXT_FLAGS_PER_CONTEXT_TS	0x00040000
 
 struct kgsl_device;
 struct adreno_device;
@@ -72,6 +76,7 @@
 };
 
 struct adreno_context {
+	unsigned int id;
 	uint32_t flags;
 	struct kgsl_pagetable *pagetable;
 	struct kgsl_memdesc gpustate;
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index d97659c..6bdf284 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -14,6 +14,7 @@
 #include <linux/vmalloc.h>
 
 #include "kgsl.h"
+#include "kgsl_sharedmem.h"
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
@@ -690,7 +691,9 @@
 	const uint32_t *rb_vaddr;
 	int num_item = 0;
 	int read_idx, write_idx;
-	unsigned int ts_processed;
+	unsigned int ts_processed = 0xdeaddead;
+	struct kgsl_context *context;
+	unsigned int context_id;
 
 	static struct ib_list ib_list;
 
@@ -716,9 +719,18 @@
 	kgsl_regread(device, REG_CP_IB2_BASE, &cp_ib2_base);
 	kgsl_regread(device, REG_CP_IB2_BUFSZ, &cp_ib2_bufsz);
 
-	ts_processed = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
-	KGSL_LOG_DUMP(device, "TIMESTM RTRD: %08X\n", ts_processed);
+	kgsl_sharedmem_readl(&device->memstore,
+			(unsigned int *) &context_id,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				current_context));
+	context = idr_find(&device->context_idr, context_id);
+	if (context) {
+		ts_processed = device->ftbl->readtimestamp(device, context,
+				KGSL_TIMESTAMP_RETIRED);
+		KGSL_LOG_DUMP(device, "CTXT: %d  TIMESTM RTRD: %08X\n",
+				context->id, ts_processed);
+	} else
+		KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id);
 
 	num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer,
 						cp_rb_rptr);
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 66dd56b..be0fc1d 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -236,7 +236,7 @@
 		return 0;
 
 	if (init_ram) {
-		rb->timestamp = 0;
+		rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
 		GSL_RB_INIT_TIMESTAMP(rb);
 	}
 
@@ -321,18 +321,13 @@
 	}
 
 	/* setup scratch/timestamp */
-	adreno_regwrite(device, REG_SCRATCH_ADDR,
-			     device->memstore.gpuaddr +
-			     KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp));
+	adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr +
+			     KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				     soptimestamp));
 
 	adreno_regwrite(device, REG_SCRATCH_UMSK,
 			     GSL_RB_MEMPTRS_SCRATCH_MASK);
 
-	/* update the eoptimestamp field with the last retired timestamp */
-	kgsl_sharedmem_writel(&device->memstore,
-			     KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp),
-			     rb->timestamp);
-
 	/* load the CP ucode */
 
 	status = adreno_ringbuffer_load_pm4_ucode(device);
@@ -431,15 +426,28 @@
 
 static uint32_t
 adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
+				struct adreno_context *context,
 				unsigned int flags, unsigned int *cmds,
 				int sizedwords)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
 	unsigned int *ringcmds;
 	unsigned int timestamp;
-	unsigned int total_sizedwords = sizedwords + 6;
+	unsigned int total_sizedwords = sizedwords;
 	unsigned int i;
 	unsigned int rcmd_gpu;
+	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+	unsigned int gpuaddr = rb->device->memstore.gpuaddr;
+
+	if (context != NULL) {
+		/*
+		 * if the context was not created with per context timestamp
+		 * support, we must use the global timestamp since issueibcmds
+		 * will be returning that one.
+		 */
+		if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
+			context_id = context->id;
+	}
 
 	/* reserve space to temporarily turn off protected mode
 	*  error checking if needed
@@ -451,6 +459,13 @@
 	if (adreno_is_a3xx(adreno_dev))
 		total_sizedwords += 7;
 
+	total_sizedwords += 2; /* scratchpad ts for recovery */
+	if (context) {
+		total_sizedwords += 3; /* sop timestamp */
+		total_sizedwords += 4; /* eop timestamp */
+	}
+	total_sizedwords += 4; /* global timestamp for recovery*/
+
 	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
 	rcmd_gpu = rb->buffer_desc.gpuaddr
 		+ sizeof(uint)*(rb->wptr-total_sizedwords);
@@ -478,12 +493,20 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
 	}
 
-	rb->timestamp++;
-	timestamp = rb->timestamp;
+	/* always increment the global timestamp. once. */
+	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
+	if (context) {
+		if (context_id == KGSL_MEMSTORE_GLOBAL)
+			rb->timestamp[context_id] =
+				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+		else
+			rb->timestamp[context_id]++;
+	}
+	timestamp = rb->timestamp[context_id];
 
-	/* start-of-pipeline and end-of-pipeline timestamps */
+	/* scratchpad ts for recovery */
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 
 	if (adreno_is_a3xx(adreno_dev)) {
 		/*
@@ -499,22 +522,41 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
+	if (context) {
+		/* start-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_MEM_WRITE, 2));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
+		/* end-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_EVENT_WRITE, 3));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+	}
+
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
-	GSL_RB_WRITE(ringcmds, rcmd_gpu,
-		     (rb->device->memstore.gpuaddr +
-		      KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+		      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			      eoptimestamp)));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 
 	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
 		/* Conditional execution based on memory values */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_COND_EXEC, 4));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
-			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
-			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2);
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(
+				context_id, ts_cmp_enable)) >> 2);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(
+				context_id, ref_wait_ts)) >> 2);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
 		/* # of conditional command DWORDs */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -533,7 +575,6 @@
 
 	adreno_ringbuffer_submit(rb);
 
-	/* return timestamp of issued coREG_ands */
 	return timestamp;
 }
 
@@ -548,7 +589,7 @@
 
 	if (device->state & KGSL_STATE_HUNG)
 		return;
-	adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords);
+	adreno_ringbuffer_addcmds(rb, NULL, flags, cmds, sizedwords);
 }
 
 static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
@@ -769,8 +810,8 @@
 
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
 		KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.."
-			" will not accept commands for this context\n",
-			drawctxt);
+			" will not accept commands for context %d\n",
+			drawctxt, drawctxt->id);
 		return -EDEADLK;
 	}
 
@@ -822,6 +863,7 @@
 	adreno_drawctxt_switch(adreno_dev, drawctxt, flags);
 
 	*timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
+					drawctxt,
 					KGSL_CMD_FLAGS_NOT_KERNEL_CMD,
 					&link[0], (cmds - link));
 
@@ -855,12 +897,26 @@
 	unsigned int val2;
 	unsigned int val3;
 	unsigned int copy_rb_contents = 0;
-	unsigned int cur_context;
+	struct kgsl_context *context;
+	unsigned int context_id;
 
 	GSL_RB_GET_READPTR(rb, &rb->rptr);
 
-	retired_timestamp = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
+	/* current_context is the context that is presently active in the
+	 * GPU, i.e the context in which the hang is caused */
+	kgsl_sharedmem_readl(&device->memstore, &context_id,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+		current_context));
+	KGSL_DRV_ERR(device, "Last context id: %d\n", context_id);
+	context = idr_find(&device->context_idr, context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(device,
+			"GPU recovery from hang not possible because last"
+			" context id is invalid.\n");
+		return -EINVAL;
+	}
+	retired_timestamp = device->ftbl->readtimestamp(device, context,
+				KGSL_TIMESTAMP_RETIRED);
 	KGSL_DRV_ERR(device, "GPU successfully executed till ts: %x\n",
 			retired_timestamp);
 	/*
@@ -894,7 +950,8 @@
 				(val1 == cp_type3_packet(CP_EVENT_WRITE, 3)
 				&& val2 == CACHE_FLUSH_TS &&
 				val3 == (rb->device->memstore.gpuaddr +
-				KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)))) {
+				KGSL_MEMSTORE_OFFSET(context_id,
+					eoptimestamp)))) {
 				rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
 				KGSL_DRV_ERR(device,
@@ -940,10 +997,6 @@
 		return -EINVAL;
 	}
 
-	/* current_context is the context that is presently active in the
-	 * GPU, i.e the context in which the hang is caused */
-	kgsl_sharedmem_readl(&device->memstore, &cur_context,
-		KGSL_DEVICE_MEMSTORE_OFFSET(current_context));
 	while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
 		kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
 		rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
@@ -958,7 +1011,8 @@
 			rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
 			BUG_ON(val1 != (device->memstore.gpuaddr +
-				KGSL_DEVICE_MEMSTORE_OFFSET(current_context)));
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+					current_context)));
 			kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
 			rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
@@ -970,7 +1024,7 @@
 			 * and leave.
 			 */
 
-			if ((copy_rb_contents == 0) && (value == cur_context)) {
+			if ((copy_rb_contents == 0) && (value == context_id)) {
 				KGSL_DRV_ERR(device, "GPU recovery could not "
 					"find the previous context\n");
 				return -EINVAL;
@@ -986,7 +1040,7 @@
 			/* if context switches to a context that did not cause
 			 * hang then start saving the rb contents as those
 			 * commands can be executed */
-			if (value != cur_context) {
+			if (value != context_id) {
 				copy_rb_contents = 1;
 				temp_rb_buffer[temp_idx++] = cp_nop_packet(1);
 				temp_rb_buffer[temp_idx++] =
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index d0110b9..7c93b3b 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -54,7 +54,8 @@
 
 	unsigned int wptr; /* write pointer offset in dwords from baseaddr */
 	unsigned int rptr; /* read pointer offset in dwords from baseaddr */
-	uint32_t timestamp;
+
+	unsigned int timestamp[KGSL_MEMSTORE_MAX];
 };
 
 
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 45bcf69..693aa74 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -59,22 +59,30 @@
  * @returns - 0 on success or error code on failure
  */
 
-static int kgsl_add_event(struct kgsl_device *device, u32 ts,
-	void (*cb)(struct kgsl_device *, void *, u32), void *priv,
+static int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
+	void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv,
 	struct kgsl_device_private *owner)
 {
 	struct kgsl_event *event;
 	struct list_head *n;
-	unsigned int cur = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
+	unsigned int cur_ts;
+	struct kgsl_context *context = NULL;
 
 	if (cb == NULL)
 		return -EINVAL;
 
+	if (id != KGSL_MEMSTORE_GLOBAL) {
+		context = idr_find(&device->context_idr, id);
+		if (context == NULL)
+			return -EINVAL;
+	}
+	cur_ts = device->ftbl->readtimestamp(device, context,
+				KGSL_TIMESTAMP_RETIRED);
+
 	/* Check to see if the requested timestamp has already fired */
 
-	if (timestamp_cmp(cur, ts) >= 0) {
-		cb(device, priv, cur);
+	if (timestamp_cmp(cur_ts, ts) >= 0) {
+		cb(device, priv, id, cur_ts);
 		return 0;
 	}
 
@@ -82,17 +90,24 @@
 	if (event == NULL)
 		return -ENOMEM;
 
+	event->context = context;
 	event->timestamp = ts;
 	event->priv = priv;
 	event->func = cb;
 	event->owner = owner;
 
-	/* Add the event in order to the list */
+	/*
+	 * Add the event in order to the list.  Order is by context id
+	 * first and then by timestamp for that context.
+	 */
 
 	for (n = device->events.next ; n != &device->events; n = n->next) {
 		struct kgsl_event *e =
 			list_entry(n, struct kgsl_event, list);
 
+		if (e->context != context)
+			continue;
+
 		if (timestamp_cmp(e->timestamp, ts) > 0) {
 			list_add(&event->list, n->prev);
 			break;
@@ -116,12 +131,16 @@
 	struct kgsl_device_private *owner)
 {
 	struct kgsl_event *event, *event_tmp;
-	unsigned int cur = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
+	unsigned int id, cur;
 
 	list_for_each_entry_safe(event, event_tmp, &device->events, list) {
 		if (event->owner != owner)
 			continue;
+
+		cur = device->ftbl->readtimestamp(device, event->context,
+				KGSL_TIMESTAMP_RETIRED);
+
+		id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
 		/*
 		 * "cancel" the events by calling their callback.
 		 * Currently, events are used for lock and memory
@@ -129,7 +148,7 @@
 		 * thing to do is release or free.
 		 */
 		if (event->func)
-			event->func(device, event->priv, cur);
+			event->func(device, event->priv, id, cur);
 
 		list_del(&event->list);
 		kfree(event);
@@ -286,8 +305,8 @@
 			return NULL;
 		}
 
-		ret = idr_get_new(&dev_priv->device->context_idr,
-				  context, &id);
+		ret = idr_get_new_above(&dev_priv->device->context_idr,
+				  context, 1, &id);
 
 		if (ret != -EAGAIN)
 			break;
@@ -298,6 +317,16 @@
 		return NULL;
 	}
 
+	/* MAX - 1, there is one memdesc in memstore for device info */
+	if (id >= KGSL_MEMSTORE_MAX) {
+		KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d "
+				"ctxts due to memstore limitation\n",
+				KGSL_MEMSTORE_MAX);
+		idr_remove(&dev_priv->device->context_idr, id);
+		kfree(context);
+		return NULL;
+	}
+
 	context->id = id;
 	context->dev_priv = dev_priv;
 
@@ -328,25 +357,28 @@
 		ts_expired_ws);
 	struct kgsl_event *event, *event_tmp;
 	uint32_t ts_processed;
+	unsigned int id;
 
 	mutex_lock(&device->mutex);
 
-	/* get current EOP timestamp */
-	ts_processed = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
-
 	/* Process expired events */
 	list_for_each_entry_safe(event, event_tmp, &device->events, list) {
+		ts_processed = device->ftbl->readtimestamp(device,
+				event->context, KGSL_TIMESTAMP_RETIRED);
 		if (timestamp_cmp(ts_processed, event->timestamp) < 0)
-			break;
+			continue;
+
+		id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
 
 		if (event->func)
-			event->func(device, event->priv, ts_processed);
+			event->func(device, event->priv, id, ts_processed);
 
 		list_del(&event->list);
 		kfree(event);
 	}
 
+	device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID;
+
 	mutex_unlock(&device->mutex);
 }
 
@@ -421,11 +453,15 @@
 }
 EXPORT_SYMBOL(kgsl_unregister_ts_notifier);
 
-int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp)
+int kgsl_check_timestamp(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
 {
 	unsigned int ts_processed;
+	unsigned int global;
 
-	ts_processed = device->ftbl->readtimestamp(device,
+	ts_processed = device->ftbl->readtimestamp(device, context,
+		KGSL_TIMESTAMP_RETIRED);
+	global = device->ftbl->readtimestamp(device, NULL,
 		KGSL_TIMESTAMP_RETIRED);
 
 	return (timestamp_cmp(ts_processed, timestamp) >= 0);
@@ -761,6 +797,9 @@
 	kgsl_check_suspended(device);
 
 	if (device->open_count == 0) {
+		kgsl_sharedmem_set(&device->memstore, 0, 0,
+				device->memstore.size);
+
 		result = device->ftbl->start(device, true);
 
 		if (result) {
@@ -910,23 +949,23 @@
 	return result;
 }
 
-static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private
-						*dev_priv, unsigned int cmd,
-						void *data)
+static long _device_waittimestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int timeout)
 {
 	int result = 0;
-	struct kgsl_device_waittimestamp *param = data;
 
-	/* Set the active count so that suspend doesn't do the
-	   wrong thing */
+	/* Set the active count so that suspend doesn't do the wrong thing */
 
 	dev_priv->device->active_cnt++;
 
-	trace_kgsl_waittimestamp_entry(dev_priv->device, param);
+	trace_kgsl_waittimestamp_entry(dev_priv->device,
+			context ? context->id : KGSL_MEMSTORE_GLOBAL,
+			timestamp, timeout);
 
 	result = dev_priv->device->ftbl->waittimestamp(dev_priv->device,
-					param->timestamp,
-					param->timeout);
+					context, timestamp, timeout);
 
 	trace_kgsl_waittimestamp_exit(dev_priv->device, result);
 
@@ -939,6 +978,34 @@
 	return result;
 }
 
+static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_device_waittimestamp *param = data;
+
+	return _device_waittimestamp(dev_priv, KGSL_MEMSTORE_GLOBAL,
+			param->timestamp, param->timeout);
+}
+
+static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_device_waittimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
+			param->context_id);
+		return -EINVAL;
+	}
+
+	return _device_waittimestamp(dev_priv, context,
+			param->timestamp, param->timeout);
+}
+
 static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
 				      unsigned int cmd, void *data)
 {
@@ -951,7 +1018,7 @@
 	if (context == NULL) {
 		result = -EINVAL;
 		KGSL_DRV_ERR(dev_priv->device,
-			"invalid drawctxt drawctxt_id %d\n",
+			"invalid context_id %d\n",
 			param->drawctxt_id);
 		goto done;
 	}
@@ -1019,60 +1086,117 @@
 	return result;
 }
 
+static long _cmdstream_readtimestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, unsigned int type,
+		unsigned int *timestamp)
+{
+	*timestamp = dev_priv->device->ftbl->readtimestamp(dev_priv->device,
+			context, type);
+
+	trace_kgsl_readtimestamp(dev_priv->device,
+			context ? context->id : KGSL_MEMSTORE_GLOBAL,
+			type, *timestamp);
+
+	return 0;
+}
+
 static long kgsl_ioctl_cmdstream_readtimestamp(struct kgsl_device_private
 						*dev_priv, unsigned int cmd,
 						void *data)
 {
 	struct kgsl_cmdstream_readtimestamp *param = data;
 
-	param->timestamp =
-		dev_priv->device->ftbl->readtimestamp(dev_priv->device,
-		param->type);
+	return _cmdstream_readtimestamp(dev_priv, NULL,
+			param->type, &param->timestamp);
+}
 
-	trace_kgsl_readtimestamp(dev_priv->device, param);
+static long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_readtimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
 
-	return 0;
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
+		KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
+			param->context_id);
+		return -EINVAL;
+	}
+
+	return _cmdstream_readtimestamp(dev_priv, context,
+			param->type, &param->timestamp);
 }
 
 static void kgsl_freemem_event_cb(struct kgsl_device *device,
-	void *priv, u32 timestamp)
+	void *priv, u32 id, u32 timestamp)
 {
 	struct kgsl_mem_entry *entry = priv;
 	spin_lock(&entry->priv->mem_lock);
 	rb_erase(&entry->node, &entry->priv->mem_rb);
 	spin_unlock(&entry->priv->mem_lock);
-	trace_kgsl_mem_timestamp_free(entry, timestamp);
+	trace_kgsl_mem_timestamp_free(entry, id, timestamp);
 	kgsl_mem_entry_detach_process(entry);
 }
 
+static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv,
+		unsigned int gpuaddr, struct kgsl_context *context,
+		unsigned int timestamp, unsigned int type)
+{
+	int result = 0;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_device *device = dev_priv->device;
+	unsigned int cur;
+	unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL;
+
+	spin_lock(&dev_priv->process_priv->mem_lock);
+	entry = kgsl_sharedmem_find(dev_priv->process_priv, gpuaddr);
+	spin_unlock(&dev_priv->process_priv->mem_lock);
+
+	if (entry) {
+		cur = device->ftbl->readtimestamp(device, context,
+						KGSL_TIMESTAMP_RETIRED);
+
+		trace_kgsl_mem_timestamp_queue(entry, context_id, cur);
+		result = kgsl_add_event(dev_priv->device, context_id,
+				timestamp, kgsl_freemem_event_cb,
+				entry, dev_priv);
+	} else {
+		KGSL_DRV_ERR(dev_priv->device,
+			"invalid gpuaddr %08x\n", gpuaddr);
+		result = -EINVAL;
+	}
+
+	return result;
+}
+
 static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private
 						    *dev_priv, unsigned int cmd,
 						    void *data)
 {
-	int result = 0;
 	struct kgsl_cmdstream_freememontimestamp *param = data;
-	struct kgsl_mem_entry *entry = NULL;
-	struct kgsl_device *device = dev_priv->device;
-	unsigned int cur;
 
-	spin_lock(&dev_priv->process_priv->mem_lock);
-	entry = kgsl_sharedmem_find(dev_priv->process_priv, param->gpuaddr);
-	spin_unlock(&dev_priv->process_priv->mem_lock);
+	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
+			NULL, param->timestamp, param->type);
+}
 
-	if (entry) {
-		cur = device->ftbl->readtimestamp(device,
-						KGSL_TIMESTAMP_RETIRED);
+static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data;
+	struct kgsl_context *context;
 
-		trace_kgsl_mem_timestamp_queue(entry, cur);
-		result = kgsl_add_event(dev_priv->device, param->timestamp,
-					kgsl_freemem_event_cb, entry, dev_priv);
-	} else {
+	context = kgsl_find_context(dev_priv, param->context_id);
+	if (context == NULL) {
 		KGSL_DRV_ERR(dev_priv->device,
-			"invalid gpuaddr %08x\n", param->gpuaddr);
-		result = -EINVAL;
+			"invalid drawctxt context_id %d\n", param->context_id);
+		return -EINVAL;
 	}
 
-	return result;
+	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
+			context, param->timestamp, param->type);
 }
 
 static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
@@ -1095,7 +1219,6 @@
 			context, param->flags);
 
 	param->drawctxt_id = context->id;
-
 done:
 	if (result && context)
 		kgsl_destroy_context(dev_priv, context);
@@ -1799,13 +1922,14 @@
  * kgsl_genlock_event_cb - Event callback for a genlock timestamp event
  * @device - The KGSL device that expired the timestamp
  * @priv - private data for the event
+ * @context_id - the context id that goes with the timestamp
  * @timestamp - the timestamp that triggered the event
  *
  * Release a genlock lock following the expiration of a timestamp
  */
 
 static void kgsl_genlock_event_cb(struct kgsl_device *device,
-	void *priv, u32 timestamp)
+	void *priv, u32 context_id, u32 timestamp)
 {
 	struct kgsl_genlock_event_priv *ev = priv;
 	int ret;
@@ -1833,7 +1957,7 @@
  */
 
 static int kgsl_add_genlock_event(struct kgsl_device *device,
-	u32 timestamp, void __user *data, int len,
+	u32 context_id, u32 timestamp, void __user *data, int len,
 	struct kgsl_device_private *owner)
 {
 	struct kgsl_genlock_event_priv *event;
@@ -1859,8 +1983,8 @@
 		return ret;
 	}
 
-	ret = kgsl_add_event(device, timestamp, kgsl_genlock_event_cb, event,
-			     owner);
+	ret = kgsl_add_event(device, context_id, timestamp,
+			kgsl_genlock_event_cb, event, owner);
 	if (ret)
 		kfree(event);
 
@@ -1868,7 +1992,7 @@
 }
 #else
 static long kgsl_add_genlock_event(struct kgsl_device *device,
-	u32 timestamp, void __user *data, int len,
+	u32 context_id, u32 timestamp, void __user *data, int len,
 	struct kgsl_device_private *owner)
 {
 	return -EINVAL;
@@ -1892,8 +2016,8 @@
 	switch (param->type) {
 	case KGSL_TIMESTAMP_EVENT_GENLOCK:
 		ret = kgsl_add_genlock_event(dev_priv->device,
-			param->timestamp, param->priv, param->len,
-			dev_priv);
+			param->context_id, param->timestamp, param->priv,
+			param->len, dev_priv);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1917,12 +2041,18 @@
 			kgsl_ioctl_device_getproperty, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP,
 			kgsl_ioctl_device_waittimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
 			kgsl_ioctl_rb_issueibcmds, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP,
 			kgsl_ioctl_cmdstream_readtimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP,
 			kgsl_ioctl_cmdstream_freememontimestamp, 1),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
 			kgsl_ioctl_drawctxt_create, 1),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
@@ -2252,13 +2382,13 @@
 
 	INIT_LIST_HEAD(&device->events);
 
+	device->last_expired_ctxt_id = KGSL_CONTEXT_INVALID;
+
 	ret = kgsl_mmu_init(device);
 	if (ret != 0)
 		goto err_dest_work_q;
 
-	ret = kgsl_allocate_contiguous(&device->memstore,
-		sizeof(struct kgsl_devmemstore));
-
+	ret = kgsl_allocate_contiguous(&device->memstore, KGSL_MEMSTORE_SIZE);
 	if (ret != 0)
 		goto err_close_mmu;
 
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 87bba25..fcd8448 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -25,6 +25,14 @@
 
 #define KGSL_NAME "kgsl"
 
+/* The number of memstore arrays limits the number of contexts allowed.
+ * If more contexts are needed, update multiple for MEMSTORE_SIZE
+ */
+#define KGSL_MEMSTORE_SIZE	((int)(PAGE_SIZE * 2))
+#define KGSL_MEMSTORE_GLOBAL	(0)
+#define KGSL_MEMSTORE_MAX	(KGSL_MEMSTORE_SIZE / \
+		sizeof(struct kgsl_devmemstore) - 1)
+
 /*cache coherency ops */
 #define DRM_KGSL_GEM_CACHE_OP_TO_DEV	0x0001
 #define DRM_KGSL_GEM_CACHE_OP_FROM_DEV	0x0002
@@ -143,6 +151,7 @@
 	void *priv_data;
 	struct rb_node node;
 	uint32_t free_timestamp;
+	unsigned int context_id;
 	/* back pointer to private structure under whose context this
 	* allocation is made */
 	struct kgsl_process_private *priv;
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index feaf652..334b29f 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -76,9 +76,10 @@
 		enum kgsl_property_type type, void *value,
 		unsigned int sizebytes);
 	int (*waittimestamp) (struct kgsl_device *device,
-		unsigned int timestamp, unsigned int msecs);
+		struct kgsl_context *context, unsigned int timestamp,
+		unsigned int msecs);
 	unsigned int (*readtimestamp) (struct kgsl_device *device,
-		enum kgsl_timestamp_type type);
+		struct kgsl_context *context, enum kgsl_timestamp_type type);
 	int (*issueibcmds) (struct kgsl_device_private *dev_priv,
 		struct kgsl_context *context, struct kgsl_ibdesc *ibdesc,
 		unsigned int sizedwords, uint32_t *timestamp,
@@ -126,8 +127,9 @@
 };
 
 struct kgsl_event {
+	struct kgsl_context *context;
 	uint32_t timestamp;
-	void (*func)(struct kgsl_device *, void *, u32);
+	void (*func)(struct kgsl_device *, void *, u32, u32);
 	void *priv;
 	struct list_head list;
 	struct kgsl_device_private *owner;
@@ -159,6 +161,7 @@
 	uint32_t state;
 	uint32_t requested_state;
 
+	unsigned int last_expired_ctxt_id;
 	unsigned int active_cnt;
 	struct completion suspend_gate;
 
@@ -333,7 +336,8 @@
 	return  (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL;
 }
 
-int kgsl_check_timestamp(struct kgsl_device *device, unsigned int timestamp);
+int kgsl_check_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp);
 
 int kgsl_register_ts_notifier(struct kgsl_device *device,
 			      struct notifier_block *nb);
diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c
index d43b29b..33f4b95 100644
--- a/drivers/gpu/msm/kgsl_drm.c
+++ b/drivers/gpu/msm/kgsl_drm.c
@@ -1398,7 +1398,7 @@
 	}
 
 	device = kgsl_get_device(ts_device);
-	ts_done = kgsl_check_timestamp(device, args->timestamp);
+	ts_done = kgsl_check_timestamp(device, NULL, args->timestamp);
 
 	mutex_lock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index cfcb2ea..c24576d 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -64,7 +64,7 @@
 
 	header->timestamp_queued = -1;
 	header->timestamp_retired = device->ftbl->readtimestamp(device,
-		KGSL_TIMESTAMP_RETIRED);
+		context, KGSL_TIMESTAMP_RETIRED);
 
 	_ctxtptr += sizeof(struct kgsl_snapshot_linux_context);
 
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index 22bc576..84d7f94 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -76,25 +76,30 @@
 TRACE_EVENT(kgsl_readtimestamp,
 
 	TP_PROTO(struct kgsl_device *device,
-			struct kgsl_cmdstream_readtimestamp *cmd),
+			unsigned int context_id,
+			unsigned int type,
+			unsigned int timestamp),
 
-	TP_ARGS(device, cmd),
+	TP_ARGS(device, context_id, type, timestamp),
 
 	TP_STRUCT__entry(
 		__string(device_name, device->name)
+		__field(unsigned int, context_id)
 		__field(unsigned int, type)
 		__field(unsigned int, timestamp)
 	),
 
 	TP_fast_assign(
 		__assign_str(device_name, device->name);
-		__entry->type = cmd->type;
-		__entry->timestamp = cmd->timestamp;
+		__entry->context_id = context_id;
+		__entry->type = type;
+		__entry->timestamp = timestamp;
 	),
 
 	TP_printk(
-		"d_name=%s type=%u timestamp=%u",
+		"d_name=%s context_id=%u type=%u timestamp=%u",
 		__get_str(device_name),
+		__entry->context_id,
 		__entry->type,
 		__entry->timestamp
 	)
@@ -106,25 +111,30 @@
 TRACE_EVENT(kgsl_waittimestamp_entry,
 
 	TP_PROTO(struct kgsl_device *device,
-			struct kgsl_device_waittimestamp *cmd),
+			unsigned int context_id,
+			unsigned int timestamp,
+			unsigned int timeout),
 
-	TP_ARGS(device, cmd),
+	TP_ARGS(device, context_id, timestamp, timeout),
 
 	TP_STRUCT__entry(
 		__string(device_name, device->name)
+		__field(unsigned int, context_id)
 		__field(unsigned int, timestamp)
 		__field(unsigned int, timeout)
 	),
 
 	TP_fast_assign(
 		__assign_str(device_name, device->name);
-		__entry->timestamp = cmd->timestamp;
-		__entry->timeout = cmd->timeout;
+		__entry->context_id = context_id;
+		__entry->timestamp = timestamp;
+		__entry->timeout = timeout;
 	),
 
 	TP_printk(
-		"d_name=%s timestamp=%u timeout=%u",
+		"d_name=%s context_id=%u timestamp=%u timeout=%u",
 		__get_str(device_name),
+		__entry->context_id,
 		__entry->timestamp,
 		__entry->timeout
 	)
@@ -333,9 +343,10 @@
 
 DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template,
 
-	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int curr_ts),
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int id,
+		unsigned int curr_ts),
 
-	TP_ARGS(mem_entry, curr_ts),
+	TP_ARGS(mem_entry, id, curr_ts),
 
 	TP_STRUCT__entry(
 		__field(unsigned int, gpuaddr)
@@ -349,7 +360,7 @@
 	TP_fast_assign(
 		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
 		__entry->size = mem_entry->memdesc.size;
-		__entry->drawctxt_id = 1337;
+		__entry->drawctxt_id = id;
 		__entry->type = mem_entry->memtype;
 		__entry->curr_ts = curr_ts;
 		__entry->free_ts = mem_entry->free_timestamp;
@@ -363,13 +374,15 @@
 );
 
 DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue,
-	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int curr_ts),
-	TP_ARGS(mem_entry, curr_ts)
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int id,
+		unsigned int curr_ts),
+	TP_ARGS(mem_entry, id, curr_ts)
 );
 
 DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free,
-	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int curr_ts),
-	TP_ARGS(mem_entry, curr_ts)
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int id,
+		unsigned int curr_ts),
+	TP_ARGS(mem_entry, id, curr_ts)
 );
 
 
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
index 41f4435..bc5c960 100644
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -101,6 +101,7 @@
 static int z180_start(struct kgsl_device *device, unsigned int init_ram);
 static int z180_stop(struct kgsl_device *device);
 static int z180_wait(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs);
 static void z180_regread(struct kgsl_device *device,
@@ -385,8 +386,8 @@
 
 	if (timestamp_cmp(z180_dev->current_timestamp,
 		z180_dev->timestamp) > 0)
-		status = z180_wait(device, z180_dev->current_timestamp,
-					timeout);
+		status = z180_wait(device, NULL,
+				z180_dev->current_timestamp, timeout);
 
 	if (status)
 		KGSL_DRV_ERR(device, "z180_waittimestamp() timed out\n");
@@ -821,14 +822,16 @@
 }
 
 static unsigned int z180_readtimestamp(struct kgsl_device *device,
-			     enum kgsl_timestamp_type type)
+		struct kgsl_context *context, enum kgsl_timestamp_type type)
 {
 	struct z180_device *z180_dev = Z180_DEVICE(device);
+	(void)context;
 	/* get current EOP timestamp */
 	return z180_dev->timestamp;
 }
 
 static int z180_waittimestamp(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
@@ -839,13 +842,14 @@
 		msecs = 10 * MSEC_PER_SEC;
 
 	mutex_unlock(&device->mutex);
-	status = z180_wait(device, timestamp, msecs);
+	status = z180_wait(device, context, timestamp, msecs);
 	mutex_lock(&device->mutex);
 
 	return status;
 }
 
 static int z180_wait(struct kgsl_device *device,
+				struct kgsl_context *context,
 				unsigned int timestamp,
 				unsigned int msecs)
 {
@@ -854,7 +858,7 @@
 
 	timeout = wait_io_event_interruptible_timeout(
 			device->wait_queue,
-			kgsl_check_timestamp(device, timestamp),
+			kgsl_check_timestamp(device, context, timestamp),
 			msecs_to_jiffies(msecs));
 
 	if (timeout > 0)