gpu: msm2: Sync to upstream

* Sync with changes done in Google MSM kernel

Change-Id: I38b3d4b41a986dc16097b838afc770e372ddfc7d
diff --git a/drivers/gpu/msm2/adreno.c b/drivers/gpu/msm2/adreno.c
index 72d7410..9c9b761 100644
--- a/drivers/gpu/msm2/adreno.c
+++ b/drivers/gpu/msm2/adreno.c
@@ -1743,7 +1743,7 @@
 		goto error_rb_stop;
 
 	/* Start the dispatcher */
-	adreno_dispatcher_start(adreno_dev);
+	adreno_dispatcher_start(device);
 
 	device->reset_counter++;
 
@@ -2411,14 +2411,13 @@
  * adreno_drain() - Drain the dispatch queue
  * @device: Pointer to the KGSL device structure for the GPU
  *
- * Tell the dispatcher to pause - this has the effect of draining the inflight
- * command batches
+ * Drain the dispatcher of existing command batches.  This halts
+ * additional commands from being issued until the gate is completed.
  */
 static int adreno_drain(struct kgsl_device *device)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	INIT_COMPLETION(device->cmdbatch_gate);
 
-	adreno_dispatcher_pause(adreno_dev);
 	return 0;
 }
 
@@ -2720,8 +2719,17 @@
 	}
 	case IOCTL_KGSL_PERFCOUNTER_GET: {
 		struct kgsl_perfcounter_get *get = data;
+		/*
+		 * adreno_perfcounter_get() is called by kernel clients
+		 * during start(), so it is not safe to take an
+		 * active count inside this function.
+		 */
+		result = kgsl_active_count_get(device);
+		if (result)
+			break;
 		result = adreno_perfcounter_get(adreno_dev, get->groupid,
 			get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
+		kgsl_active_count_put(device);
 		break;
 	}
 	case IOCTL_KGSL_PERFCOUNTER_PUT: {
@@ -2739,8 +2747,12 @@
 	}
 	case IOCTL_KGSL_PERFCOUNTER_READ: {
 		struct kgsl_perfcounter_read *read = data;
+		result = kgsl_active_count_get(device);
+		if (result)
+			break;
 		result = adreno_perfcounter_read_group(adreno_dev,
 			read->reads, read->count);
+		kgsl_active_count_put(device);
 		break;
 	}
 	default:
@@ -2817,13 +2829,6 @@
 	return (0x0003 << 16) | ((int) adreno_dev->gpurev);
 }
 
-static void adreno_resume(struct kgsl_device *device)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
-	adreno_dispatcher_resume(adreno_dev);
-}
-
 static const struct kgsl_functable adreno_functable = {
 	/* Mandatory functions */
 	.regread = adreno_regread,
@@ -2855,7 +2860,7 @@
 	.setproperty = adreno_setproperty,
 	.postmortem_dump = adreno_dump,
 	.drawctxt_sched = adreno_drawctxt_sched,
-	.resume = adreno_resume,
+	.resume = adreno_dispatcher_start,
 };
 
 static struct platform_driver adreno_platform_driver = {
diff --git a/drivers/gpu/msm2/adreno.h b/drivers/gpu/msm2/adreno.h
index 881e39c..54503f2 100644
--- a/drivers/gpu/msm2/adreno.h
+++ b/drivers/gpu/msm2/adreno.h
@@ -126,7 +126,7 @@
  */
 struct adreno_dispatcher {
 	struct mutex mutex;
-	unsigned int state;
+	unsigned long priv;
 	struct timer_list timer;
 	struct timer_list fault_timer;
 	unsigned int inflight;
@@ -140,6 +140,10 @@
 	struct kobject kobj;
 };
 
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+};
+
 struct adreno_gpudev;
 
 struct adreno_device {
@@ -355,10 +359,6 @@
 
 	/* GPU specific function hooks */
 	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
-	int (*ctxt_save)(struct adreno_device *, struct adreno_context *);
-	int (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
-	int (*ctxt_draw_workaround)(struct adreno_device *,
-					struct adreno_context *);
 	irqreturn_t (*irq_handler)(struct adreno_device *);
 	void (*irq_control)(struct adreno_device *, int);
 	unsigned int (*irq_pending)(struct adreno_device *);
@@ -414,6 +414,14 @@
 	{ BIT(KGSL_FT_DISABLE), "disable" }, \
 	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
 
+#define ADRENO_FT_TYPES \
+	{ BIT(KGSL_FT_OFF), "off" }, \
+	{ BIT(KGSL_FT_REPLAY), "replay" }, \
+	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
+	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
+	{ BIT(KGSL_FT_DISABLE), "disable" }, \
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
+
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
 extern struct adreno_gpudev adreno_a4xx_gpudev;
@@ -477,7 +485,7 @@
 void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
 		int hang);
 
-void adreno_dispatcher_start(struct adreno_device *adreno_dev);
+void adreno_dispatcher_start(struct kgsl_device *device);
 int adreno_dispatcher_init(struct adreno_device *adreno_dev);
 void adreno_dispatcher_close(struct adreno_device *adreno_dev);
 int adreno_dispatcher_idle(struct adreno_device *adreno_dev,
@@ -491,7 +499,6 @@
 
 void adreno_dispatcher_schedule(struct kgsl_device *device);
 void adreno_dispatcher_pause(struct adreno_device *adreno_dev);
-void adreno_dispatcher_resume(struct adreno_device *adreno_dev);
 void adreno_dispatcher_queue_context(struct kgsl_device *device,
 	struct adreno_context *drawctxt);
 int adreno_reset(struct kgsl_device *device);
diff --git a/drivers/gpu/msm2/adreno_a2xx.c b/drivers/gpu/msm2/adreno_a2xx.c
index 1f4544f..a5987af 100644
--- a/drivers/gpu/msm2/adreno_a2xx.c
+++ b/drivers/gpu/msm2/adreno_a2xx.c
@@ -1398,11 +1398,56 @@
 	return 0;
 }
 
+static void a2xx_drawctxt_detach(struct adreno_context *drawctxt)
+{
+	kgsl_sharedmem_free(&drawctxt->gpustate);
+	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
+}
+
+static int a2xx_drawctxt_save(struct adreno_device *adreno_dev,
+			struct adreno_context *context);
+
+static int a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
+			struct adreno_context *context);
+
+static int a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev,
+					struct adreno_context *context);
+
+static const struct adreno_context_ops a225_preamble_ctx_ops = {
+	.restore = adreno_context_restore,
+	.draw_workaround = a2xx_drawctxt_draw_workaround,
+};
+
+static const struct adreno_context_ops a225_legacy_ctx_ops = {
+	.save = a2xx_drawctxt_save,
+	.restore = a2xx_drawctxt_restore,
+	.draw_workaround = a2xx_drawctxt_draw_workaround,
+	.detach = a2xx_drawctxt_detach,
+};
+
+static const struct adreno_context_ops a2xx_legacy_ctx_ops = {
+	.save = a2xx_drawctxt_save,
+	.restore = a2xx_drawctxt_restore,
+	.detach = a2xx_drawctxt_detach,
+};
+
+
 static int a2xx_drawctxt_create(struct adreno_device *adreno_dev,
 	struct adreno_context *drawctxt)
 {
 	int ret;
 
+	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE
+	   && drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC) {
+		drawctxt->ops = (adreno_is_a225(adreno_dev))
+			?  &a225_preamble_ctx_ops : &adreno_preamble_ctx_ops;
+
+		return 0;
+	}
+
+	drawctxt->ops = (adreno_is_a225(adreno_dev))
+			?  &a225_legacy_ctx_ops : &a2xx_legacy_ctx_ops;
+
 	/*
 	 * Allocate memory for the GPU state and the context commands.
 	 * Despite the name, this is much more then just storage for
@@ -1510,12 +1555,6 @@
 	struct kgsl_device *device = &adreno_dev->dev;
 	int ret;
 
-	if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
-		return 0;
-
-	if (context->state == ADRENO_CONTEXT_STATE_INVALID)
-		return 0;
-
 	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->reg_save[1],
@@ -1597,40 +1636,14 @@
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int cmds[5];
-	int ret = 0;
+	int ret;
 
-	if (context == NULL) {
-		/* No context - set the default pagetable and thats it */
-		unsigned int id;
-		/*
-		 * If there isn't a current context, the kgsl_mmu_setstate
-		 * will use the CPU path so we don't need to give
-		 * it a valid context id.
-		 */
-		id = (adreno_dev->drawctxt_active != NULL)
-			? adreno_dev->drawctxt_active->base.id
-			: KGSL_CONTEXT_INVALID;
-		kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
-				  id);
-		return 0;
-	}
-
-	cmds[0] = cp_nop_packet(1);
-	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
-	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
-	cmds[3] = device->memstore.gpuaddr +
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
-	cmds[4] = context->base.id;
-	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
+	ret = adreno_context_restore(adreno_dev, context);
 	if (ret)
 		return ret;
 
-	kgsl_mmu_setstate(&device->mmu, context->base.proc_priv->pagetable,
-			context->base.id);
-
-	/* restore gmem.
+	/*
+	 *  restore gmem.
 	 *  (note: changes shader. shader must not already be restored.)
 	 */
 	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
@@ -1689,6 +1702,7 @@
 	}
 
 	if (adreno_is_a20x(adreno_dev)) {
+		unsigned int cmds[2];
 		cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1);
 		cmds[1] = context->bin_base_offset;
 		ret = adreno_ringbuffer_issuecmds(device, context,
@@ -2293,9 +2307,6 @@
 	.reg_offsets = &a2xx_reg_offsets,
 
 	.ctxt_create = a2xx_drawctxt_create,
-	.ctxt_save = a2xx_drawctxt_save,
-	.ctxt_restore = a2xx_drawctxt_restore,
-	.ctxt_draw_workaround = a2xx_drawctxt_draw_workaround,
 	.irq_handler = a2xx_irq_handler,
 	.irq_control = a2xx_irq_control,
 	.irq_pending = a2xx_irq_pending,
diff --git a/drivers/gpu/msm2/adreno_a3xx.c b/drivers/gpu/msm2/adreno_a3xx.c
index b563c13..3934226 100644
--- a/drivers/gpu/msm2/adreno_a3xx.c
+++ b/drivers/gpu/msm2/adreno_a3xx.c
@@ -2343,17 +2343,40 @@
 	return 0;
 }
 
+static void a3xx_drawctxt_detach(struct adreno_context *drawctxt)
+{
+	kgsl_sharedmem_free(&drawctxt->gpustate);
+	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
+}
+
+static int a3xx_drawctxt_save(struct adreno_device *adreno_dev,
+			   struct adreno_context *context);
+
+static int a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
+			      struct adreno_context *context);
+
+static const struct adreno_context_ops a3xx_legacy_ctx_ops = {
+	.save = a3xx_drawctxt_save,
+	.restore = a3xx_drawctxt_restore,
+	.detach = a3xx_drawctxt_detach,
+};
+
+
 static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
 	struct adreno_context *drawctxt)
 {
 	int ret;
 
 	/*
-	 * Allocate memory for the GPU state and the context commands.
-	 * Despite the name, this is much more then just storage for
-	 * the gpustate.  This contains command space for gmem save
-	 * and texture and vertex buffer storage too
+	 * Nothing to do here if the context is using preambles and doesn't need
+	 * GMEM save/restore
 	 */
+	if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) &&
+		(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+		drawctxt->ops = &adreno_preamble_ctx_ops;
+		return 0;
+	}
+	drawctxt->ops = &a3xx_legacy_ctx_ops;
 
 	ret = kgsl_allocate(&drawctxt->gpustate,
 		drawctxt->base.proc_priv->pagetable, CONTEXT_SIZE);
@@ -2389,9 +2412,6 @@
 	struct kgsl_device *device = &adreno_dev->dev;
 	int ret;
 
-	if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
-		return 0;
-
 	if (context->state == ADRENO_CONTEXT_STATE_INVALID)
 		return 0;
 
@@ -2449,39 +2469,13 @@
 			      struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int cmds[5];
-	int ret = 0;
+	int ret;
 
-	if (context == NULL) {
-		/* No context - set the default pagetable and thats it */
-		unsigned int id;
-		/*
-		 * If there isn't a current context, the kgsl_mmu_setstate
-		 * will use the CPU path so we don't need to give
-		 * it a valid context id.
-		 */
-		id = (adreno_dev->drawctxt_active != NULL)
-			? adreno_dev->drawctxt_active->base.id
-			: KGSL_CONTEXT_INVALID;
-		kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
-				  id);
-		return 0;
-	}
-
-	cmds[0] = cp_nop_packet(1);
-	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
-	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
-	cmds[3] = device->memstore.gpuaddr +
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
-	cmds[4] = context->base.id;
-	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
+	/* do the common part */
+	ret = adreno_context_restore(adreno_dev, context);
 	if (ret)
 		return ret;
 
-	kgsl_mmu_setstate(&device->mmu, context->base.proc_priv->pagetable,
-			context->base.id);
-
 	/*
 	 * Restore GMEM.  (note: changes shader.
 	 * Shader must not already be restored.)
@@ -4384,9 +4378,6 @@
 	.perfcounters = &a3xx_perfcounters,
 
 	.ctxt_create = a3xx_drawctxt_create,
-	.ctxt_save = a3xx_drawctxt_save,
-	.ctxt_restore = a3xx_drawctxt_restore,
-	.ctxt_draw_workaround = NULL,
 	.rb_init = a3xx_rb_init,
 	.perfcounter_init = a3xx_perfcounter_init,
 	.perfcounter_close = a3xx_perfcounter_close,
diff --git a/drivers/gpu/msm2/adreno_a4xx.c b/drivers/gpu/msm2/adreno_a4xx.c
index 90d816b..9f8b7583 100644
--- a/drivers/gpu/msm2/adreno_a4xx.c
+++ b/drivers/gpu/msm2/adreno_a4xx.c
@@ -106,70 +106,6 @@
 
 const unsigned int a4xx_registers_count = ARRAY_SIZE(a4xx_registers) / 2;
 
-static int a4xx_drawctxt_create(struct adreno_device *adreno_dev,
-	struct adreno_context *drawctxt)
-{
-	int ret = 0;
-	struct kgsl_device *device = &adreno_dev->dev;
-
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
-		/* This option is not supported on a4xx */
-		KGSL_DRV_ERR(device,
-			"Preambles required for A4XX draw contexts\n");
-		ret = -EPERM;
-		goto done;
-	}
-
-	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
-		/* This option is not supported on a4xx */
-		KGSL_DRV_ERR(device,
-			"Cannot create context with gmemalloc\n");
-		ret = -EPERM;
-	}
-
-done:
-	return ret;
-}
-
-static int a4xx_drawctxt_restore(struct adreno_device *adreno_dev,
-			      struct adreno_context *context)
-{
-	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int cmds[5];
-	int ret;
-
-	if (context == NULL) {
-		/* No context - set the default pagetable and thats it */
-		unsigned int id;
-		/*
-		 * If there isn't a current context, the kgsl_mmu_setstate
-		 * will use the CPU path so we don't need to give
-		 * it a valid context id.
-		 */
-		id = (adreno_dev->drawctxt_active != NULL)
-			? adreno_dev->drawctxt_active->base.id
-			: KGSL_CONTEXT_INVALID;
-		kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
-				  id);
-		return 0;
-	}
-
-	cmds[0] = cp_nop_packet(1);
-	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
-	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
-	cmds[3] = device->memstore.gpuaddr +
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
-	cmds[4] = context->base.id;
-	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
-	if (ret)
-		return ret;
-	ret = kgsl_mmu_setstate(&device->mmu,
-			context->base.proc_priv->pagetable,
-			context->base.id);
-	return ret;
-}
-
 static const struct adreno_vbif_data a420_vbif[] = {
 	{ A4XX_VBIF_ABIT_SORT, 0x0001001F },
 	{ A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
@@ -294,8 +230,6 @@
 struct adreno_gpudev adreno_a4xx_gpudev = {
 	.reg_offsets = &a4xx_reg_offsets,
 
-	.ctxt_create = a4xx_drawctxt_create,
-	.ctxt_restore = a4xx_drawctxt_restore,
 	.rb_init = a3xx_rb_init,
 	.irq_control = a3xx_irq_control,
 	.irq_handler = a3xx_irq_handler,
diff --git a/drivers/gpu/msm2/adreno_dispatch.c b/drivers/gpu/msm2/adreno_dispatch.c
index 3bcc7a3..357242f 100644
--- a/drivers/gpu/msm2/adreno_dispatch.c
+++ b/drivers/gpu/msm2/adreno_dispatch.c
@@ -22,13 +22,6 @@
 #include "adreno_ringbuffer.h"
 #include "adreno_trace.h"
 
-#define ADRENO_DISPATCHER_ACTIVE 0
-#define ADRENO_DISPATCHER_PAUSE 1
-
-#define ADRENO_DISPATCHER_SOFT_FAULT 1
-#define ADRENO_DISPATCHER_HARD_FAULT 2
-#define ADRENO_DISPATCHER_TIMEOUT_FAULT 3
-
 #define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
 
 /* Number of commands that can be queued in a context before it sleeps */
@@ -134,12 +127,12 @@
 }
 
 /**
- * adreno_context_get_cmdbatch() - Get a new command from a context queue
+ * adreno_dispatcher_get_cmdbatch() - Get a new command from a context queue
  * @drawctxt: Pointer to the adreno draw context
  *
  * Dequeue a new command batch from the context list
  */
-static inline struct kgsl_cmdbatch *adreno_context_get_cmdbatch(
+static inline struct kgsl_cmdbatch *adreno_dispatcher_get_cmdbatch(
 		struct adreno_context *drawctxt)
 {
 	struct kgsl_cmdbatch *cmdbatch = NULL;
@@ -170,7 +163,8 @@
 }
 
 /**
- * adreno_context_requeue_cmdbatch() - Put a command back on the context queue
+ * adreno_dispatcher_requeue_cmdbatch() - Put a command back on the context
+ * queue
  * @drawctxt: Pointer to the adreno draw context
  * @cmdbatch: Pointer to the KGSL cmdbatch to requeue
  *
@@ -261,7 +255,9 @@
 
 	dispatcher->inflight++;
 
-	if (dispatcher->inflight == 1) {
+	if (dispatcher->inflight == 1 &&
+			!test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+
 		/* Time to make the donuts.  Turn on the GPU */
 		ret = kgsl_active_count_get(device);
 		if (ret) {
@@ -269,6 +265,8 @@
 			mutex_unlock(&device->mutex);
 			return ret;
 		}
+
+		set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
 	}
 
 	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch);
@@ -281,8 +279,10 @@
 	if (dispatcher->inflight == 1) {
 		if (ret == 0)
 			fault_detect_read(device);
-		else
+		else {
 			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
 	}
 
 	mutex_unlock(&device->mutex);
@@ -340,11 +340,15 @@
 	/*
 	 * Each context can send a specific number of command batches per cycle
 	 */
-	for ( ; count < _context_cmdbatch_burst &&
-		dispatcher->inflight < _dispatcher_inflight; count++) {
+	while ((count < _context_cmdbatch_burst) &&
+		(dispatcher->inflight < _dispatcher_inflight)) {
 		int ret;
-		struct kgsl_cmdbatch *cmdbatch =
-			adreno_context_get_cmdbatch(drawctxt);
+		struct kgsl_cmdbatch *cmdbatch;
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		cmdbatch = adreno_dispatcher_get_cmdbatch(drawctxt);
 
 		if (cmdbatch == NULL)
 			break;
@@ -369,7 +373,6 @@
 		 */
 
 		if (cmdbatch->flags & KGSL_CONTEXT_SYNC) {
-			count--;
 			kgsl_cmdbatch_destroy(cmdbatch);
 			continue;
 		}
@@ -424,8 +427,7 @@
 	int ret;
 
 	/* Leave early if the dispatcher isn't in a happy state */
-	if ((dispatcher->state != ADRENO_DISPATCHER_ACTIVE) ||
-		adreno_gpu_fault(adreno_dev) != 0)
+	if (adreno_gpu_fault(adreno_dev) != 0)
 			return 0;
 
 	plist_head_init(&requeue);
@@ -434,8 +436,7 @@
 	while (dispatcher->inflight < _dispatcher_inflight) {
 
 		/* Stop doing things if the dispatcher is paused or faulted */
-		if ((dispatcher->state != ADRENO_DISPATCHER_ACTIVE) ||
-			adreno_gpu_fault(adreno_dev) != 0)
+		if (adreno_gpu_fault(adreno_dev) != 0)
 			break;
 
 		spin_lock(&dispatcher->plist_lock);
@@ -596,7 +597,7 @@
 
 	mutex_lock(&drawctxt->mutex);
 
-	if (drawctxt->flags & CTXT_FLAGS_BEING_DESTROYED) {
+	if (kgsl_context_detached(&drawctxt->base)) {
 		mutex_unlock(&drawctxt->mutex);
 		return -EINVAL;
 	}
@@ -607,7 +608,7 @@
 	 */
 
 	if (drawctxt->flags & CTXT_FLAGS_FORCE_PREAMBLE) {
-		cmdbatch->priv |= CMDBATCH_FLAG_FORCE_PREAMBLE;
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
 		drawctxt->flags &= ~CTXT_FLAGS_FORCE_PREAMBLE;
 	}
 
@@ -618,7 +619,7 @@
 	 */
 
 	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
-		cmdbatch->priv |= CMDBATCH_FLAG_SKIP;
+		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
 
 		/*
 		 * If this command batch represents the EOF then clear the way
@@ -640,7 +641,7 @@
 	/* Wait for room in the context queue */
 
 	while (drawctxt->queued >= _context_cmdqueue_size) {
-		trace_adreno_context_sleep(drawctxt);
+		trace_adreno_drawctxt_sleep(drawctxt);
 		mutex_unlock(&drawctxt->mutex);
 
 		ret = wait_event_interruptible_timeout(drawctxt->wq,
@@ -648,7 +649,7 @@
 			msecs_to_jiffies(_context_queue_wait));
 
 		mutex_lock(&drawctxt->mutex);
-		trace_adreno_context_wake(drawctxt);
+		trace_adreno_drawctxt_wake(drawctxt);
 
 		if (ret <= 0) {
 			mutex_unlock(&drawctxt->mutex);
@@ -677,9 +678,6 @@
 
 	cmdbatch->timestamp = *timestamp;
 
-	/* The batch fault policy is the current system fault policy */
-	cmdbatch->fault_policy = adreno_dev->ft_policy;
-
 	/*
 	 * Set the fault tolerance policy for the command batch - assuming the
 	 * context hsn't disabled FT use the current device policy
@@ -1156,10 +1154,6 @@
 		}
 	}
 
-	mutex_lock(&device->mutex);
-	kgsl_active_count_put(device);
-	mutex_unlock(&device->mutex);
-
 	kfree(replay);
 
 	return 1;
@@ -1327,19 +1321,16 @@
 	 */
 	if (!fault_handled && dispatcher_do_fault(device))
 		goto done;
-	/*
-	 * Decrement the active count to 0 - this will allow the system to go
-	 * into suspend even if there are queued command batches
-	 */
 
-	mutex_lock(&device->mutex);
+	/*
+	 * If inflight went to 0, queue back up the event processor to catch
+	 * stragglers
+	 */
 	if (count && dispatcher->inflight == 0) {
-		del_timer_sync(&dispatcher->fault_timer);
-		kgsl_active_count_put(device);
-		/* Queue back up the event processor to catch stragglers */
+		mutex_lock(&device->mutex);
 		queue_work(device->work_queue, &device->ts_expired_ws);
+		mutex_unlock(&device->mutex);
 	}
-	mutex_unlock(&device->mutex);
 
 	/* Dispatch new commands if we have the room */
 	if (dispatcher->inflight < _dispatcher_inflight)
@@ -1353,12 +1344,29 @@
 
 		/* Update the timeout timer for the next command batch */
 		mod_timer(&dispatcher->timer, cmdbatch->expires);
-	}
 
-	/* Before leaving update the pwrscale information */
-	mutex_lock(&device->mutex);
-	kgsl_pwrscale_idle(device);
-	mutex_unlock(&device->mutex);
+		/* There are still things in flight - update the idle counts */
+		mutex_lock(&device->mutex);
+		kgsl_pwrscale_idle(device);
+		mutex_unlock(&device->mutex);
+	} else {
+		/* There is nothing left in the pipeline.  Shut 'er down boys */
+		mutex_lock(&device->mutex);
+
+		/*
+		 * Stop the fault timer before decrementing the active count to
+		 * avoid reading the hardware registers while we are trying to
+		 * turn clocks off
+		 */
+		del_timer_sync(&dispatcher->fault_timer);
+
+		if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+
+		mutex_unlock(&device->mutex);
+	}
 
 	mutex_unlock(&dispatcher->mutex);
 }
@@ -1452,54 +1460,17 @@
 }
 
 /**
- * adreno_dispatcher_pause() - stop the dispatcher
- * @adreno_dev: pointer to the adreno device structure
- *
- * Pause the dispather so it doesn't accept any new commands
- */
-void adreno_dispatcher_pause(struct adreno_device *adreno_dev)
-{
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-
-	/*
-	 * This will probably get called while holding other mutexes so don't
-	 * take the dispatcher mutex.  The biggest penalty is that another
-	 * command might be submitted while we are in here but thats okay
-	 * because whoever is waiting for the drain will just have another
-	 * command batch to wait for
-	 */
-
-	dispatcher->state = ADRENO_DISPATCHER_PAUSE;
-}
-
-/**
- * adreno_dispatcher_resume() - resume the dispatcher
- * @adreno_dev: pointer to the adreno device structure
- *
- * Set the dispatcher active so it can start accepting commands again
- */
-void adreno_dispatcher_resume(struct adreno_device *adreno_dev)
-{
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-
-	dispatcher->state = ADRENO_DISPATCHER_ACTIVE;
-	adreno_dispatcher_schedule(&adreno_dev->dev);
-}
-
-/**
  * adreno_dispatcher_start() - activate the dispatcher
  * @adreno_dev: pointer to the adreno device structure
  *
  * Set the disaptcher active and start the loop once to get things going
  */
-void adreno_dispatcher_start(struct adreno_device *adreno_dev)
+void adreno_dispatcher_start(struct kgsl_device *device)
 {
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-
-	dispatcher->state = ADRENO_DISPATCHER_ACTIVE;
+	complete_all(&device->cmdbatch_gate);
 
 	/* Schedule the work loop to get things going */
-	adreno_dispatcher_schedule(&adreno_dev->dev);
+	adreno_dispatcher_schedule(device);
 }
 
 /**
@@ -1514,8 +1485,6 @@
 
 	del_timer_sync(&dispatcher->timer);
 	del_timer_sync(&dispatcher->fault_timer);
-
-	dispatcher->state = ADRENO_DISPATCHER_PAUSE;
 }
 
 /**
@@ -1688,8 +1657,6 @@
 	plist_head_init(&dispatcher->pending);
 	spin_lock_init(&dispatcher->plist_lock);
 
-	dispatcher->state = ADRENO_DISPATCHER_ACTIVE;
-
 	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
 		&device->dev->kobj, "dispatch");
 
diff --git a/drivers/gpu/msm2/adreno_drawctxt.c b/drivers/gpu/msm2/adreno_drawctxt.c
index b666c47..a62c96c 100644
--- a/drivers/gpu/msm2/adreno_drawctxt.c
+++ b/drivers/gpu/msm2/adreno_drawctxt.c
@@ -455,10 +455,21 @@
 	drawctxt->type =
 		(*flags & KGSL_CONTEXT_TYPE_MASK) >> KGSL_CONTEXT_TYPE_SHIFT;
 
-	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
-	if (ret)
+	if (adreno_dev->gpudev->ctxt_create) {
+		ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
+		if (ret)
+			goto err;
+	} else if ((*flags & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		  (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		KGSL_DEV_ERR_ONCE(device,
+				"legacy context switch not supported\n");
+		ret = -EINVAL;
 		goto err;
 
+	} else {
+		drawctxt->ops = &adreno_preamble_ctx_ops;
+	}
+
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
 			0);
@@ -507,19 +518,8 @@
 	drawctxt = ADRENO_CONTEXT(context);
 
 	/* deactivate context */
-	if (adreno_dev->drawctxt_active == drawctxt) {
-		/* no need to save GMEM or shader, the context is
-		 * being destroyed.
-		 */
-		drawctxt->flags &= ~(CTXT_FLAGS_GMEM_SAVE |
-				     CTXT_FLAGS_SHADER_SAVE |
-				     CTXT_FLAGS_GMEM_SHADOW |
-				     CTXT_FLAGS_STATE_SHADOW);
-
-		drawctxt->flags |= CTXT_FLAGS_BEING_DESTROYED;
-
+	if (adreno_dev->drawctxt_active == drawctxt)
 		adreno_drawctxt_switch(adreno_dev, NULL, 0);
-	}
 
 	mutex_lock(&drawctxt->mutex);
 
@@ -566,6 +566,9 @@
 	kgsl_sharedmem_free(&drawctxt->gpustate);
 	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
 
+	if (drawctxt->ops->detach)
+		drawctxt->ops->detach(drawctxt);
+
 	/* wake threads waiting to submit commands from this context */
 	wake_up_interruptible_all(&drawctxt->waiting);
 	wake_up_interruptible_all(&drawctxt->wq);
@@ -584,6 +587,69 @@
 	kfree(drawctxt);
 }
 
+
+/**
+ * adreno_context_restore() - generic context restore handler
+ * @adreno_dev: the device
+ * @context: the context
+ *
+ * Basic context restore handler that writes the context identifier
+ * to the ringbuffer and issues pagetable switch commands if necessary.
+ * May be called directly from the adreno_context_ops.restore function
+ * pointer or as the first action in a hardware specific restore
+ * function.
+ */
+int adreno_context_restore(struct adreno_device *adreno_dev,
+				  struct adreno_context *context)
+{
+	int ret;
+	struct kgsl_device *device;
+	unsigned int cmds[5];
+
+	if (adreno_dev == NULL || context == NULL)
+		return -EINVAL;
+
+	device = &adreno_dev->dev;
+	/* write the context identifier to the ringbuffer */
+	cmds[0] = cp_nop_packet(1);
+	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[3] = device->memstore.gpuaddr +
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->base.id;
+	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
+					cmds, 5);
+	if (ret)
+		return ret;
+
+	return kgsl_mmu_setstate(&device->mmu,
+			context->base.proc_priv->pagetable,
+			context->base.id);
+}
+
+
+const struct adreno_context_ops adreno_preamble_ctx_ops = {
+	.restore = adreno_context_restore,
+};
+
+/**
+ * context_save() - save old context when necessary
+ * @drawctxt - the old context
+ *
+ * For legacy context switching, we need to issue save
+ * commands unless the context is being destroyed.
+ */
+static inline int context_save(struct adreno_device *adreno_dev,
+				struct adreno_context *context)
+{
+	if (context->ops->save == NULL
+		|| kgsl_context_detached(&context->base)
+		|| context->state == ADRENO_CONTEXT_STATE_INVALID)
+		return 0;
+
+	return context->ops->save(adreno_dev, context);
+}
+
 /**
  * adreno_drawctxt_set_bin_base_offset - set bin base offset for the context
  * @device - KGSL device that owns the context
@@ -622,6 +688,12 @@
 	int ret = 0;
 
 	if (drawctxt) {
+		/*
+		* Handle legacy gmem / save restore flag on each IB.
+		* Userspace sets to guard IB sequences that require
+		* gmem to be saved and clears it at the end of the
+		* sequence.
+		*/
 		if (flags & KGSL_CONTEXT_SAVE_GMEM)
 			/* Set the flag in context so that the save is done
 			* when this context is switched out. */
@@ -633,46 +705,52 @@
 
 	/* already current? */
 	if (adreno_dev->drawctxt_active == drawctxt) {
-		if (adreno_dev->gpudev->ctxt_draw_workaround &&
-			adreno_is_a225(adreno_dev))
-				ret = adreno_dev->gpudev->ctxt_draw_workaround(
-					adreno_dev, drawctxt);
+		if (drawctxt && drawctxt->ops->draw_workaround)
+			ret = drawctxt->ops->draw_workaround(adreno_dev,
+							 drawctxt);
 		return ret;
 	}
 
 	trace_adreno_drawctxt_switch(adreno_dev->drawctxt_active,
 		drawctxt, flags);
 
-	/* Save the old context */
-	if (adreno_dev->gpudev->ctxt_save) {
-		ret = adreno_dev->gpudev->ctxt_save(adreno_dev,
-			adreno_dev->drawctxt_active);
-
+	if (adreno_dev->drawctxt_active) {
+		ret = context_save(adreno_dev, adreno_dev->drawctxt_active);
 		if (ret) {
 			KGSL_DRV_ERR(device,
 				"Error in GPU context %d save: %d\n",
 				adreno_dev->drawctxt_active->base.id, ret);
 			return ret;
 		}
-	}
 
-	/* Put the old instance of the active drawctxt */
-	if (adreno_dev->drawctxt_active)
+		/* Put the old instance of the active drawctxt */
 		kgsl_context_put(&adreno_dev->drawctxt_active->base);
+		adreno_dev->drawctxt_active = NULL;
+	}
 
 	/* Get a refcount to the new instance */
 	if (drawctxt) {
 		if (!_kgsl_context_get(&drawctxt->base))
 			return -EINVAL;
-	}
 
-	/* Set the new context */
-	ret = adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt);
-	if (ret) {
-		KGSL_DRV_ERR(device,
-			"Error in GPU context %d restore: %d\n",
-			drawctxt->base.id, ret);
-		return ret;
+		/* Set the new context */
+		ret = drawctxt->ops->restore(adreno_dev, drawctxt);
+		if (ret) {
+			KGSL_DRV_ERR(device,
+					"Error in GPU context %d restore: %d\n",
+					drawctxt->base.id, ret);
+			return ret;
+		}
+	} else {
+		/*
+		 * No context - set the default pagetable and thats it.
+		 * If there isn't a current context, the kgsl_mmu_setstate
+		 * will use the CPU path so we don't need to give
+		 * it a valid context id.
+		 */
+		ret = kgsl_mmu_setstate(&device->mmu,
+					 device->mmu.defaultpagetable,
+					 KGSL_CONTEXT_INVALID);
 	}
 
 	adreno_dev->drawctxt_active = drawctxt;
diff --git a/drivers/gpu/msm2/adreno_drawctxt.h b/drivers/gpu/msm2/adreno_drawctxt.h
index 0f65d4b..9312db6 100644
--- a/drivers/gpu/msm2/adreno_drawctxt.h
+++ b/drivers/gpu/msm2/adreno_drawctxt.h
@@ -46,8 +46,6 @@
 #define CTXT_FLAGS_PER_CONTEXT_TS	BIT(11)
 /* Context has caused a GPU hang and fault tolerance successful */
 #define CTXT_FLAGS_GPU_HANG_FT	BIT(12)
-/* Context is being destroyed so dont save it */
-#define CTXT_FLAGS_BEING_DESTROYED	BIT(13)
 /* User mode generated timestamps enabled */
 #define CTXT_FLAGS_USER_GENERATED_TS    BIT(14)
 /* Context skip till EOF */
@@ -102,6 +100,29 @@
 	struct kgsl_memdesc quad_vertices_restore;
 };
 
+struct adreno_context;
+
+/**
+ * struct adreno_context_ops - context state management functions
+ * @save: optional hook for saving context state
+ * @restore: required hook for restoring state,
+ *		adreno_context_restore() may be used directly here.
+ * @draw_workaround: optional hook for a workaround after every IB
+ * @detach: optional hook for freeing state tracking memory.
+ */
+struct adreno_context_ops {
+	int (*save)(struct adreno_device *, struct adreno_context *);
+	int (*restore)(struct adreno_device *, struct adreno_context *);
+	int (*draw_workaround)(struct adreno_device *,
+				struct adreno_context *);
+	void (*detach)(struct adreno_context *);
+};
+
+int adreno_context_restore(struct adreno_device *, struct adreno_context *);
+
+/* generic context ops for preamble context switch */
+extern const struct adreno_context_ops adreno_preamble_ctx_ops;
+
 /**
  * struct adreno_context - Adreno GPU draw context
  * @id: Unique integer ID of the context
@@ -140,6 +161,7 @@
  * @wq: Workqueue structure for contexts to sleep pending room in the queue
  * @waiting: Workqueue structure for contexts waiting for a timestamp or event
  * @queued: Number of commands queued in the cmdqueue
+ * @ops: Context switch functions for this context.
  */
 struct adreno_context {
 	struct kgsl_context base;
@@ -186,6 +208,8 @@
 	wait_queue_head_t waiting;
 
 	int queued;
+
+	const struct adreno_context_ops *ops;
 };
 
 
diff --git a/drivers/gpu/msm2/adreno_postmortem.c b/drivers/gpu/msm2/adreno_postmortem.c
index 33aa095..2c9b968 100644
--- a/drivers/gpu/msm2/adreno_postmortem.c
+++ b/drivers/gpu/msm2/adreno_postmortem.c
@@ -626,7 +626,7 @@
 end:
 	/* Restart the dispatcher after a manually triggered dump */
 	if (manual)
-		adreno_dispatcher_start(adreno_dev);
+		adreno_dispatcher_start(device);
 
 	return result;
 }
diff --git a/drivers/gpu/msm2/adreno_ringbuffer.c b/drivers/gpu/msm2/adreno_ringbuffer.c
index 2fe2c4c..9e69a49 100644
--- a/drivers/gpu/msm2/adreno_ringbuffer.c
+++ b/drivers/gpu/msm2/adreno_ringbuffer.c
@@ -1006,6 +1006,9 @@
 	/* For now everybody has the same priority */
 	cmdbatch->priority = ADRENO_CONTEXT_DEFAULT_PRIORITY;
 
+	/* wait for the suspend gate */
+	wait_for_completion(&device->cmdbatch_gate);
+
 	/* Queue the command in the ringbuffer */
 	ret = adreno_dispatcher_queue_cmd(adreno_dev, drawctxt, cmdbatch,
 		timestamp);
@@ -1147,12 +1150,14 @@
 					cmdbatch->timestamp);
 
 #ifdef CONFIG_MSM_KGSL_CFF_DUMP
+	if (ret)
+		goto done;
 	/*
 	 * insert wait for idle after every IB1
 	 * this is conservative but works reliably and is ok
 	 * even for performance simulations
 	 */
-	adreno_idle(device);
+	ret = adreno_idle(device);
 #endif
 
 done:
diff --git a/drivers/gpu/msm2/adreno_trace.h b/drivers/gpu/msm2/adreno_trace.h
index 8a9046c..6079b61 100644
--- a/drivers/gpu/msm2/adreno_trace.h
+++ b/drivers/gpu/msm2/adreno_trace.h
@@ -29,15 +29,21 @@
 		__field(unsigned int, id)
 		__field(unsigned int, timestamp)
 		__field(unsigned int, queued)
+		__field(unsigned int, flags)
 	),
 	TP_fast_assign(
 		__entry->id = cmdbatch->context->id;
 		__entry->timestamp = cmdbatch->timestamp;
 		__entry->queued = queued;
+		__entry->flags = cmdbatch->flags;
 	),
 	TP_printk(
-		"ctx=%u ts=%u queued=%u",
-			__entry->id, __entry->timestamp, __entry->queued
+		"ctx=%u ts=%u queued=%u flags=%s",
+			__entry->id, __entry->timestamp, __entry->queued,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				{ KGSL_CONTEXT_SYNC, "SYNC" },
+				{ KGSL_CONTEXT_END_OF_FRAME, "EOF" })
+				: "none"
 	)
 );
 
@@ -147,12 +153,12 @@
 	TP_printk("ctx=%u", __entry->id)
 );
 
-DEFINE_EVENT(adreno_drawctxt_template, adreno_context_sleep,
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep,
 	TP_PROTO(struct adreno_context *drawctxt),
 	TP_ARGS(drawctxt)
 );
 
-DEFINE_EVENT(adreno_drawctxt_template, adreno_context_wake,
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake,
 	TP_PROTO(struct adreno_context *drawctxt),
 	TP_ARGS(drawctxt)
 );
@@ -162,6 +168,11 @@
 	TP_ARGS(drawctxt)
 );
 
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
 TRACE_EVENT(adreno_drawctxt_wait_start,
 	TP_PROTO(unsigned int id, unsigned int ts),
 	TP_ARGS(id, ts),
diff --git a/drivers/gpu/msm2/kgsl.c b/drivers/gpu/msm2/kgsl.c
index 8ca70e1..72888b2 100644
--- a/drivers/gpu/msm2/kgsl.c
+++ b/drivers/gpu/msm2/kgsl.c
@@ -934,21 +934,16 @@
 	device->open_count--;
 	if (device->open_count == 0) {
 
-		/* Wait for the active count to go to 1 */
-		kgsl_active_count_wait(device, 1);
+		/* Wait for the active count to go to 0 */
+		kgsl_active_count_wait(device, 0);
 
 		/* Fail if the wait times out */
-		BUG_ON(atomic_read(&device->active_cnt) > 1);
+		BUG_ON(atomic_read(&device->active_cnt) > 0);
 
+		/* Force power on to do the stop */
+		kgsl_pwrctrl_enable(device);
 		result = device->ftbl->stop(device);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
-		/*
-		 * active_cnt special case: we just stopped the device,
-		 * so no need to use kgsl_active_count_put()
-		 */
-		atomic_dec(&device->active_cnt);
-	} else {
-		kgsl_active_count_put(device);
 	}
 	return result;
 
@@ -967,7 +962,6 @@
 	filep->private_data = NULL;
 
 	mutex_lock(&device->mutex);
-	kgsl_active_count_get(device);
 
 	while (1) {
 		read_lock(&device->context_lock);
@@ -1454,6 +1448,7 @@
  * @timestamp: Pending timestamp for the event
  * @handle: Pointer to a sync fence handle
  * @device: Pointer to the KGSL device
+ * @refcount: Allow event to be destroyed asynchronously
  */
 struct kgsl_cmdbatch_sync_event {
 	int type;
@@ -1463,10 +1458,37 @@
 	unsigned int timestamp;
 	struct kgsl_sync_fence_waiter *handle;
 	struct kgsl_device *device;
-	spinlock_t lock;
+	struct kref refcount;
 };
 
 /**
+ * kgsl_cmdbatch_sync_event_destroy() - Destroy a sync event object
+ * @kref: Pointer to the kref structure for this object
+ *
+ * Actually destroy a sync event object.  Called from
+ * kgsl_cmdbatch_sync_event_put.
+ */
+static void kgsl_cmdbatch_sync_event_destroy(struct kref *kref)
+{
+	struct kgsl_cmdbatch_sync_event *event = container_of(kref,
+		struct kgsl_cmdbatch_sync_event, refcount);
+
+	kgsl_cmdbatch_put(event->cmdbatch);
+	kfree(event);
+}
+
+/**
+ * kgsl_cmdbatch_sync_event_put() - Decrement the refcount for a
+ *                                  sync event object
+ * @event: Pointer to the sync event object
+ */
+static inline void kgsl_cmdbatch_sync_event_put(
+	struct kgsl_cmdbatch_sync_event *event)
+{
+	kref_put(&event->refcount, kgsl_cmdbatch_sync_event_destroy);
+}
+
+/**
  * kgsl_cmdbatch_destroy_object() - Destroy a cmdbatch object
  * @kref: Pointer to the kref structure for this object
  *
@@ -1484,13 +1506,32 @@
 }
 EXPORT_SYMBOL(kgsl_cmdbatch_destroy_object);
 
+/*
+ * a generic function to retire a pending sync event and (possibly)
+ * kick the dispatcher
+ */
 static void kgsl_cmdbatch_sync_expire(struct kgsl_device *device,
 	struct kgsl_cmdbatch_sync_event *event)
 {
+	struct kgsl_cmdbatch_sync_event *e, *tmp;
 	int sched = 0;
+	int removed = 0;
 
 	spin_lock(&event->cmdbatch->lock);
-	list_del(&event->node);
+
+	/*
+	 * sync events that are contained by a cmdbatch which has been
+	 * destroyed may have already been removed from the synclist
+	 */
+
+	list_for_each_entry_safe(e, tmp, &event->cmdbatch->synclist, node) {
+		if (e == event) {
+			list_del_init(&event->node);
+			removed = 1;
+			break;
+		}
+	}
+
 	sched = list_empty(&event->cmdbatch->synclist) ? 1 : 0;
 	spin_unlock(&event->cmdbatch->lock);
 
@@ -1501,6 +1542,10 @@
 
 	if (sched && device->ftbl->drawctxt_sched)
 		device->ftbl->drawctxt_sched(device, event->cmdbatch->context);
+
+	/* Put events that have been removed from the synclist */
+	if (removed)
+		kgsl_cmdbatch_sync_event_put(event);
 }
 
 
@@ -1514,11 +1559,9 @@
 	struct kgsl_cmdbatch_sync_event *event = priv;
 
 	kgsl_cmdbatch_sync_expire(device, event);
-
 	kgsl_context_put(event->context);
-	kgsl_cmdbatch_put(event->cmdbatch);
-
-	kfree(event);
+	/* Put events that have signaled */
+	kgsl_cmdbatch_sync_event_put(event);
 }
 
 /**
@@ -1526,64 +1569,63 @@
  * @cmdbatch: Pointer to the command batch object to destroy
  *
  * Start the process of destroying a command batch.  Cancel any pending events
- * and decrement the refcount.
+ * and decrement the refcount.  Asynchronous events can still signal after
+ * kgsl_cmdbatch_destroy has returned.
  */
 void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch)
 {
 	struct kgsl_cmdbatch_sync_event *event, *tmp;
-	int canceled = 0;
+	LIST_HEAD(cancel_synclist);
 
+	/*
+	 * Empty the synclist before canceling events
+	 */
 	spin_lock(&cmdbatch->lock);
+	list_splice_init(&cmdbatch->synclist, &cancel_synclist);
+	spin_unlock(&cmdbatch->lock);
 
-	/* Delete any pending sync points for this command batch */
-	list_for_each_entry_safe(event, tmp, &cmdbatch->synclist, node) {
+	/*
+	 * Finish canceling events outside the cmdbatch spinlock and
+	 * require the cancel function to return if the event was
+	 * successfully canceled meaning that the event is guaranteed
+	 * not to signal the callback. This guarantee ensures that
+	 * the reference count for the event and cmdbatch is correct.
+	 */
+	list_for_each_entry_safe(event, tmp, &cancel_synclist, node) {
 
-		switch (event->type) {
-		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
-			/* Cancel the event if it still exists */
-			mutex_lock(&cmdbatch->device->mutex);
+		if (event->type == KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP) {
+			/*
+			 * Timestamp events are guaranteed to signal
+			 * when canceled
+			 */
 			kgsl_cancel_event(cmdbatch->device, event->context,
 				event->timestamp, kgsl_cmdbatch_sync_func,
 				event);
-			canceled = 1;
-			mutex_unlock(&cmdbatch->device->mutex);
-			kgsl_context_put(event->context);
-			break;
-		}
-		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
-			canceled = kgsl_sync_fence_async_cancel(event->handle);
-			break;
-		default:
-			break;
+		} else if (event->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) {
+			/* Put events that are successfully canceled */
+			if (kgsl_sync_fence_async_cancel(event->handle))
+				kgsl_cmdbatch_sync_event_put(event);
 		}
 
-		if(canceled) {
-			list_del(&event->node);
-			kfree(event);
-
-			/*
-			 * Put back a instance of the cmdbatch for each pending event
-			 * that we canceled
-			 */
-
-			kgsl_cmdbatch_put(cmdbatch);
-		}
+		/* Put events that have been removed from the synclist */
+		list_del_init(&event->node);
+		kgsl_cmdbatch_sync_event_put(event);
 	}
-	spin_unlock(&cmdbatch->lock);
-
 	kgsl_cmdbatch_put(cmdbatch);
 }
 EXPORT_SYMBOL(kgsl_cmdbatch_destroy);
 
+/*
+ * A callback that gets registered with kgsl_sync_fence_async_wait and is fired
+ * when a fence is expired
+ */
 static void kgsl_cmdbatch_sync_fence_func(void *priv)
 {
 	struct kgsl_cmdbatch_sync_event *event = priv;
 
-	spin_lock(&event->lock);
 	kgsl_cmdbatch_sync_expire(event->device, event);
-	kgsl_cmdbatch_put(event->cmdbatch);
-	spin_unlock(&event->lock);
-	kfree(event);
+	/* Put events that have signaled */
+	kgsl_cmdbatch_sync_event_put(event);
 }
 
 /* kgsl_cmdbatch_add_sync_fence() - Add a new sync fence syncpoint
@@ -1609,28 +1651,33 @@
 	event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
 	event->cmdbatch = cmdbatch;
 	event->device = device;
-	spin_lock_init(&event->lock);
+	event->context = NULL;
+
+	/*
+	 * Initial kref is to ensure async callback does not free the
+	 * event before this function sets the event handle
+	 */
+	kref_init(&event->refcount);
 
 	/*
 	 * Add it to the list first to account for the possiblity that the
 	 * callback will happen immediately after the call to
-	 * kgsl_sync_fence_async_wait
+	 * kgsl_sync_fence_async_wait. Decrement the event refcount when
+	 * removing from the synclist.
 	 */
 
 	spin_lock(&cmdbatch->lock);
+	kref_get(&event->refcount);
 	list_add(&event->node, &cmdbatch->synclist);
 	spin_unlock(&cmdbatch->lock);
 
 	/*
-	 * There is a distinct race condition that can occur if the fence
-	 * callback is fired before the function has a chance to return.  The
-	 * event struct would be freed before we could write event->handle and
-	 * hilarity ensued.  Protect against this by protecting the call to
-	 * kgsl_sync_fence_async_wait and the kfree in the callback with a lock.
+	 * Increment the reference count for the async callback.
+	 * Decrement when the callback is successfully canceled, when
+	 * the callback is signaled or if the async wait fails.
 	 */
 
-	spin_lock(&event->lock);
-
+	kref_get(&event->refcount);
 	event->handle = kgsl_sync_fence_async_wait(sync->fd,
 		kgsl_cmdbatch_sync_fence_func, event);
 
@@ -1638,18 +1685,27 @@
 	if (IS_ERR_OR_NULL(event->handle)) {
 		int ret = PTR_ERR(event->handle);
 
+		/* Failed to add the event to the async callback */
+		kgsl_cmdbatch_sync_event_put(event);
+
+		/* Remove event from the synclist */
 		spin_lock(&cmdbatch->lock);
 		list_del(&event->node);
+		kgsl_cmdbatch_sync_event_put(event);
 		spin_unlock(&cmdbatch->lock);
 
-		kgsl_cmdbatch_put(cmdbatch);
-		spin_unlock(&event->lock);
-		kfree(event);
+		/* Event no longer needed by this function */
+		kgsl_cmdbatch_sync_event_put(event);
 
 		return ret;
 	}
 
-	spin_unlock(&event->lock);
+	/*
+	 * Event was successfully added to the synclist, the async
+	 * callback and handle to cancel event has been set.
+	 */
+	kgsl_cmdbatch_sync_event_put(event);
+
 	return 0;
 }
 
@@ -1680,9 +1736,15 @@
 	 */
 
 	if (context == cmdbatch->context) {
-		KGSL_DRV_ERR(device,
-			"Cannot create a sync point on your own context\n");
-		goto done;
+		unsigned int queued = kgsl_readtimestamp(device, context,
+			KGSL_TIMESTAMP_QUEUED);
+
+		if (timestamp_cmp(sync->timestamp, queued) > 0) {
+			KGSL_DRV_ERR(device,
+			"Cannot create syncpoint for future timestamp %d (current %d)\n",
+				sync->timestamp, queued);
+			goto done;
+		}
 	}
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
@@ -1698,15 +1760,24 @@
 	event->context = context;
 	event->timestamp = sync->timestamp;
 
+	/*
+	 * Two krefs are required to support events. The first kref is for
+	 * the synclist which holds the event in the cmdbatch. The second
+	 * kref is for the callback which can be asynchronous and be called
+	 * after kgsl_cmdbatch_destroy. The kref should be put when the event
+	 * is removed from the synclist, if the callback is successfully
+	 * canceled or when the callback is signaled.
+	 */
+	kref_init(&event->refcount);
+	kref_get(&event->refcount);
+
 	spin_lock(&cmdbatch->lock);
 	list_add(&event->node, &cmdbatch->synclist);
 	spin_unlock(&cmdbatch->lock);
 
 	mutex_lock(&device->mutex);
-	kgsl_active_count_get(device);
 	ret = kgsl_add_event(device, context->id, sync->timestamp,
 		kgsl_cmdbatch_sync_func, event, NULL);
-	kgsl_active_count_put(device);
 	mutex_unlock(&device->mutex);
 
 	if (ret) {
@@ -1821,7 +1892,7 @@
 	cmdbatch->device = device;
 	cmdbatch->ibcount = (flags & KGSL_CONTEXT_SYNC) ? 0 : numibs;
 	cmdbatch->context = context;
-	cmdbatch->flags = flags;
+	cmdbatch->flags = flags & ~KGSL_CONTEXT_SUBMIT_IB_LIST;
 
 	return cmdbatch;
 }
@@ -1844,15 +1915,22 @@
 	for (i = 0; i < cmdbatch->ibcount; i++) {
 		if (cmdbatch->ibdesc[i].sizedwords == 0) {
 			KGSL_DRV_ERR(dev_priv->device,
-				"Invalid IB: size is 0\n");
+				"invalid size ctx %d ib(%d) %X/%X\n",
+				cmdbatch->context->id, i,
+				cmdbatch->ibdesc[i].gpuaddr,
+				cmdbatch->ibdesc[i].sizedwords);
+
 			return false;
 		}
 
 		if (!kgsl_mmu_gpuaddr_in_range(private->pagetable,
 			cmdbatch->ibdesc[i].gpuaddr)) {
 			KGSL_DRV_ERR(dev_priv->device,
-				"Invalid IB: address 0x%X is out of range\n",
-				cmdbatch->ibdesc[i].gpuaddr);
+				"Invalid address ctx %d ib(%d) %X/%X\n",
+				cmdbatch->context->id, i,
+				cmdbatch->ibdesc[i].gpuaddr,
+				cmdbatch->ibdesc[i].sizedwords);
+
 			return false;
 		}
 	}
@@ -1948,8 +2026,6 @@
 		return ERR_PTR(ret);
 	}
 
-	cmdbatch->flags = flags;
-
 	return cmdbatch;
 }
 
@@ -1977,7 +2053,7 @@
 		 * submission
 		 */
 
-		if (param->numibs == 0 || param->numibs > 100000)
+		if (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS)
 			goto done;
 
 		cmdbatch = _kgsl_cmdbatch_create(device, context, param->flags,
@@ -1991,10 +2067,8 @@
 	}
 
 	/* Run basic sanity checking on the command */
-	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch)) {
-		KGSL_DRV_ERR(device, "Unable to verify the IBs\n");
+	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch))
 		goto free_cmdbatch;
-	}
 
 	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
 		cmdbatch, &param->timestamp);
@@ -2020,10 +2094,10 @@
 
 	/* The number of IBs are completely ignored for sync commands */
 	if (!(param->flags & KGSL_CONTEXT_SYNC)) {
-		if (param->numcmds == 0 || param->numcmds > 100000)
+		if (param->numcmds == 0 || param->numcmds > KGSL_MAX_NUMIBS)
 			return -EINVAL;
 	} else if (param->numcmds != 0) {
-		KGSL_DRV_ERR(device,
+		KGSL_DEV_ERR_ONCE(device,
 			"Commands specified with the SYNC flag.  They will be ignored\n");
 	}
 
@@ -2041,10 +2115,8 @@
 	}
 
 	/* Run basic sanity checking on the command */
-	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch)) {
-		KGSL_DRV_ERR(device, "Unable to verify the IBs\n");
+	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch))
 		goto free_cmdbatch;
-	}
 
 	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
 		cmdbatch, &param->timestamp);
@@ -3292,7 +3364,6 @@
 		{ .cmd = (_cmd), .func = (_func), .flags = (_flags) }
 
 #define KGSL_IOCTL_LOCK		BIT(0)
-#define KGSL_IOCTL_WAKE		BIT(1)
 
 static const struct {
 	unsigned int cmd;
@@ -3304,10 +3375,10 @@
 			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP,
 			kgsl_ioctl_device_waittimestamp,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
 			kgsl_ioctl_device_waittimestamp_ctxtid,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
 			kgsl_ioctl_rb_issueibcmds, 0),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS,
@@ -3329,7 +3400,7 @@
 			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
 			kgsl_ioctl_drawctxt_destroy,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM,
 			kgsl_ioctl_map_user_mem, 0),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM,
@@ -3367,7 +3438,7 @@
 	struct kgsl_device_private *dev_priv = filep->private_data;
 	unsigned int nr;
 	kgsl_ioctl_func_t func;
-	int lock, ret, use_hw;
+	int lock, ret;
 	char ustack[64];
 	void *uptr = NULL;
 
@@ -3425,7 +3496,6 @@
 
 		func = kgsl_ioctl_funcs[nr].func;
 		lock = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_LOCK;
-		use_hw = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_WAKE;
 	} else {
 		func = dev_priv->device->ftbl->ioctl;
 		if (!func) {
@@ -3435,28 +3505,15 @@
 			goto done;
 		}
 		lock = 1;
-		use_hw = 1;
 	}
 
-	if (lock) {
+	if (lock)
 		mutex_lock(&dev_priv->device->mutex);
-		if (use_hw) {
-			ret = kgsl_active_count_get(dev_priv->device);
-			if (ret < 0) {
-				use_hw = 0;
-				goto unlock;
-			}
-		}
-	}
 
 	ret = func(dev_priv, cmd, uptr);
 
-unlock:
-	if (lock) {
-		if (use_hw)
-			kgsl_active_count_put(dev_priv->device);
+	if (lock)
 		mutex_unlock(&dev_priv->device->mutex);
-	}
 
 	/*
 	 * Still copy back on failure, but assume function took
diff --git a/drivers/gpu/msm2/kgsl.h b/drivers/gpu/msm2/kgsl.h
index 651d597..32f105c 100644
--- a/drivers/gpu/msm2/kgsl.h
+++ b/drivers/gpu/msm2/kgsl.h
@@ -78,6 +78,8 @@
 
 #define KGSL_MEMFREE_HIST_SIZE	((int)(PAGE_SIZE * 2))
 
+#define KGSL_MAX_NUMIBS 100000
+
 struct kgsl_memfree_hist_elem {
 	unsigned int pid;
 	unsigned int gpuaddr;
diff --git a/drivers/gpu/msm2/kgsl_device.h b/drivers/gpu/msm2/kgsl_device.h
index c9be418..44324c6 100644
--- a/drivers/gpu/msm2/kgsl_device.h
+++ b/drivers/gpu/msm2/kgsl_device.h
@@ -242,6 +242,7 @@
 	struct kgsl_mh mh;
 	struct kgsl_mmu mmu;
 	struct completion hwaccess_gate;
+	struct completion cmdbatch_gate;
 	const struct kgsl_functable *ftbl;
 	struct work_struct idle_check_ws;
 	struct timer_list idle_timer;
@@ -314,6 +315,7 @@
 
 #define KGSL_DEVICE_COMMON_INIT(_dev) \
 	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
+	.cmdbatch_gate = COMPLETION_INITIALIZER((_dev).cmdbatch_gate),\
 	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
 			kgsl_idle_check),\
 	.ts_expired_ws  = __WORK_INITIALIZER((_dev).ts_expired_ws,\
@@ -685,7 +687,8 @@
  */
 static inline void kgsl_cmdbatch_put(struct kgsl_cmdbatch *cmdbatch)
 {
-	kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object);
+	if (cmdbatch)
+		kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object);
 }
 
 /**
diff --git a/drivers/gpu/msm2/kgsl_events.c b/drivers/gpu/msm2/kgsl_events.c
index 277eae0..d76b628 100644
--- a/drivers/gpu/msm2/kgsl_events.c
+++ b/drivers/gpu/msm2/kgsl_events.c
@@ -58,8 +58,6 @@
 	list_del(&event->list);
 	kgsl_context_put(event->context);
 	kfree(event);
-
-	kgsl_active_count_put(device);
 }
 
 static void _retire_events(struct kgsl_device *device,
@@ -212,7 +210,6 @@
 int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
 	kgsl_event_func func, void *priv, void *owner)
 {
-	int ret;
 	struct kgsl_event *event;
 	unsigned int cur_ts;
 	struct kgsl_context *context = NULL;
@@ -262,17 +259,6 @@
 		return -ENOMEM;
 	}
 
-	/*
-	 * Increase the active count on the device to avoid going into power
-	 * saving modes while events are pending
-	 */
-	ret = kgsl_active_count_get(device);
-	if (ret < 0) {
-		kgsl_context_put(context);
-		kfree(event);
-		return ret;
-	}
-
 	event->context = context;
 	event->timestamp = ts;
 	event->priv = priv;
diff --git a/drivers/gpu/msm2/kgsl_pwrctrl.c b/drivers/gpu/msm2/kgsl_pwrctrl.c
index f2398a5..7ee305f 100644
--- a/drivers/gpu/msm2/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm2/kgsl_pwrctrl.c
@@ -1441,6 +1441,8 @@
 		/* Enable state before turning on irq */
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
 		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
 				device->pwrctrl.pm_qos_latency);
 	case KGSL_STATE_ACTIVE:
@@ -1543,9 +1545,6 @@
 			mutex_lock(&device->mutex);
 		}
 
-		/* Stop the idle timer */
-		del_timer_sync(&device->idle_timer);
-
 		ret = kgsl_pwrctrl_wake(device);
 	}
 	if (ret == 0)
diff --git a/drivers/gpu/msm2/kgsl_pwrctrl.h b/drivers/gpu/msm2/kgsl_pwrctrl.h
index 71a0fdd..9f18160 100644
--- a/drivers/gpu/msm2/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm2/kgsl_pwrctrl.h
@@ -120,8 +120,8 @@
 void kgsl_pwrctrl_set_state(struct kgsl_device *device, unsigned int state);
 void kgsl_pwrctrl_request_state(struct kgsl_device *device, unsigned int state);
 
-int kgsl_active_count_get(struct kgsl_device *device);
-int kgsl_active_count_get_light(struct kgsl_device *device);
+int __must_check kgsl_active_count_get(struct kgsl_device *device);
+int __must_check kgsl_active_count_get_light(struct kgsl_device *device);
 void kgsl_active_count_put(struct kgsl_device *device);
 int kgsl_active_count_wait(struct kgsl_device *device, int count);
 
diff --git a/drivers/gpu/msm2/z180.c b/drivers/gpu/msm2/z180.c
index ac50a91..45f200c 100644
--- a/drivers/gpu/msm2/z180.c
+++ b/drivers/gpu/msm2/z180.c
@@ -406,7 +406,9 @@
 
 	mutex_lock(&device->mutex);
 
-	kgsl_active_count_get(device);
+	result = kgsl_active_count_get(device);
+	if (result)
+		goto error_active_count;
 
 	if (cmdbatch == NULL) {
 		result = EINVAL;
@@ -515,7 +517,7 @@
 		*timestamp, cmdbatch->flags, result, 0);
 
 	kgsl_active_count_put(device);
-
+error_active_count:
 	mutex_unlock(&device->mutex);
 
 	return (int)result;
@@ -864,9 +866,13 @@
 	if (msecs == -1)
 		msecs = Z180_IDLE_TIMEOUT;
 
-	mutex_unlock(&device->mutex);
-	status = z180_wait(device, context, timestamp, msecs);
-	mutex_lock(&device->mutex);
+	status = kgsl_active_count_get(device);
+	if (!status) {
+		mutex_unlock(&device->mutex);
+		status = z180_wait(device, context, timestamp, msecs);
+		mutex_lock(&device->mutex);
+		kgsl_active_count_put(device);
+	}
 
 	return status;
 }
@@ -915,12 +921,17 @@
 static int
 z180_drawctxt_detach(struct kgsl_context *context)
 {
+	int ret;
 	struct kgsl_device *device;
 	struct z180_device *z180_dev;
 
 	device = context->device;
 	z180_dev = Z180_DEVICE(device);
 
+	ret = kgsl_active_count_get(device);
+	if (ret)
+		return ret;
+
 	z180_idle(device);
 
 	if (z180_dev->ringbuffer.prevctx == context->id) {
@@ -932,6 +943,7 @@
 				KGSL_MMUFLAGS_PTUPDATE);
 	}
 
+	kgsl_active_count_put(device);
 	return 0;
 }