msm: kgsl: Add support for the preamble context flag

Userspace will set a flag in the context if preambles are in use. If
they are, we can safely skip save and restore commands for the
context. GMEM save/restore is still required.  To improve performance,
preamble commands are skipped when the context hasn't changed since
the last issueibcmds.

Change-Id: I21bb8996d62651122dbebcf7a79543679109e1f9
Signed-off-by: Vijay Krishnamoorthy <adivarah@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index bdba624..af5bf51 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -77,10 +77,7 @@
 };
 
 struct adreno_gpudev {
-	int (*ctxt_gpustate_shadow)(struct adreno_device *,
-		struct adreno_context *);
-	int (*ctxt_gmem_shadow)(struct adreno_device *,
-		struct adreno_context *);
+	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
 	void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
 	void (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
 	irqreturn_t (*irq_handler)(struct adreno_device *);
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index 7098002..5ce9cf8 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -619,14 +619,16 @@
 	unsigned int addr = shadow->gmemshadow.gpuaddr;
 	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
 
-	/* Store TP0_CHICKEN register */
-	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
-	*cmds++ = REG_TP0_CHICKEN;
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+		/* Store TP0_CHICKEN register */
+		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*cmds++ = REG_TP0_CHICKEN;
 
-	*cmds++ = tmp_ctx.chicken_restore;
+		*cmds++ = tmp_ctx.chicken_restore;
 
-	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
-	*cmds++ = 0;
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		*cmds++ = 0;
+	}
 
 	/* Set TP0_CHICKEN to zero */
 	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
@@ -829,13 +831,15 @@
 	unsigned int *cmds = shadow->gmem_restore_commands;
 	unsigned int *start = cmds;
 
-	/* Store TP0_CHICKEN register */
-	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
-	*cmds++ = REG_TP0_CHICKEN;
-	*cmds++ = tmp_ctx.chicken_restore;
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+		/* Store TP0_CHICKEN register */
+		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
+		*cmds++ = REG_TP0_CHICKEN;
+		*cmds++ = tmp_ctx.chicken_restore;
 
-	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
-	*cmds++ = 0;
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		*cmds++ = 0;
+	}
 
 	/* Set TP0_CHICKEN to zero */
 	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
@@ -1297,45 +1301,22 @@
 }
 
 /* create buffers for saving/restoring registers, constants, & GMEM */
-static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev,
+static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
-	int result;
-
-	/* Allocate vmalloc memory to store the gpustate */
-	result = kgsl_allocate(&drawctxt->gpustate,
-		drawctxt->pagetable, _context_size(adreno_dev));
-
-	if (result)
-		return result;
-
 	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
 
-	/* Blank out h/w register, constant, and command buffer shadows. */
-	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0,
-			   _context_size(adreno_dev));
-
-	/* set-up command and vertex buffer pointers */
-	tmp_ctx.cmd = tmp_ctx.start
-	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
-
 	/* build indirect command buffers to save & restore regs/constants */
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_regsave_cmds(adreno_dev, drawctxt);
 
 	build_shader_save_restore_cmds(adreno_dev, drawctxt);
 
-	kgsl_cache_range_op(&drawctxt->gpustate,
-			    KGSL_CACHE_OP_FLUSH);
-
-	kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate,
-			drawctxt->gpustate.gpuaddr,
-			drawctxt->gpustate.size, false);
 	return 0;
 }
 
 /* create buffers for saving/restoring registers, constants, & GMEM */
-static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
+static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
 	int result;
@@ -1362,7 +1343,8 @@
 		&tmp_ctx.cmd);
 
 	/* build TP0_CHICKEN register restore command buffer */
-	tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE))
+		tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
 
 	/* build indirect command buffers to save & restore gmem */
 	drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd;
@@ -1385,7 +1367,61 @@
 	return 0;
 }
 
-static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
+static int a2xx_drawctxt_create(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt)
+{
+	int ret;
+
+	/*
+	 * Allocate memory for the GPU state and the context commands.
+	 * Despite the name, this is much more then just storage for
+	 * the gpustate.  This contains command space for gmem save
+	 * and texture and vertex buffer storage too
+	 */
+
+	ret = kgsl_allocate(&drawctxt->gpustate,
+		drawctxt->pagetable, _context_size(adreno_dev));
+
+	if (ret)
+		return ret;
+
+	kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0,
+		_context_size(adreno_dev));
+
+	tmp_ctx.cmd = tmp_ctx.start
+	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
+
+	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+		ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt);
+		if (ret)
+			goto done;
+
+		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+	}
+
+	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+		ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt);
+		if (ret)
+			goto done;
+	}
+
+	/* Flush and sync the gpustate memory */
+
+	kgsl_cache_range_op(&drawctxt->gpustate,
+			    KGSL_CACHE_OP_FLUSH);
+
+	kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate,
+			drawctxt->gpustate.gpuaddr,
+			drawctxt->gpustate.size, false);
+
+done:
+	if (ret)
+		kgsl_sharedmem_free(&drawctxt->gpustate);
+
+	return ret;
+}
+
+static void a2xx_drawctxt_save(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
@@ -1398,25 +1434,27 @@
 		KGSL_CTXT_WARN(device,
 			"Current active context has caused gpu hang\n");
 
-	KGSL_CTXT_INFO(device,
-		"active context flags %08x\n", context->flags);
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 
-	/* save registers and constants. */
-	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-		context->reg_save, 3);
-
-	if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
-		/* save shader partitioning and instructions. */
-		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
-			context->shader_save, 3);
-
-		/* fixup shader partitioning parameter for
-		 *  SET_SHADER_BASES.
-		 */
+		/* save registers and constants. */
 		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-			context->shader_fixup, 3);
+			context->reg_save, 3);
 
-		context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+			/* save shader partitioning and instructions. */
+			adreno_ringbuffer_issuecmds(device,
+				KGSL_CMD_FLAGS_PMODE,
+				context->shader_save, 3);
+
+			/*
+			 * fixup shader partitioning parameter for
+			 *  SET_SHADER_BASES.
+			 */
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+				context->shader_fixup, 3);
+
+			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+		}
 	}
 
 	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
@@ -1428,8 +1466,10 @@
 			context->context_gmem_shadow.gmem_save, 3);
 
 		/* Restore TP0_CHICKEN */
-		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-			context->chicken_restore, 3);
+		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+				context->chicken_restore, 3);
+		}
 
 		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
 	} else if (adreno_is_a225(adreno_dev)) {
@@ -1459,7 +1499,7 @@
 	}
 }
 
-static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
+static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
@@ -1495,21 +1535,27 @@
 		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_restore, 3);
 
-		/* Restore TP0_CHICKEN */
-		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-			context->chicken_restore, 3);
+		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+			/* Restore TP0_CHICKEN */
+			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
+				context->chicken_restore, 3);
+		}
 
 		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
 	}
 
-	/* restore registers and constants. */
-	adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-		context->reg_restore, 3);
+	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 
-	/* restore shader instructions & partitioning. */
-	if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+		/* restore registers and constants. */
 		adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
-			context->shader_restore, 3);
+			context->reg_restore, 3);
+
+		/* restore shader instructions & partitioning. */
+		if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+			adreno_ringbuffer_issuecmds(device,
+				KGSL_CMD_FLAGS_NONE,
+				context->shader_restore, 3);
+		}
 	}
 
 	if (adreno_is_a20x(adreno_dev)) {
@@ -1708,10 +1754,9 @@
 	int *remain, int hang);
 
 struct adreno_gpudev adreno_a2xx_gpudev = {
-	.ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow,
-	.ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow,
-	.ctxt_save = a2xx_ctxt_save,
-	.ctxt_restore = a2xx_ctxt_restore,
+	.ctxt_create = a2xx_drawctxt_create,
+	.ctxt_save = a2xx_drawctxt_save,
+	.ctxt_restore = a2xx_drawctxt_restore,
 	.irq_handler = a2xx_irq_handler,
 	.irq_control = a2xx_irq_control,
 	.snapshot = a2xx_snapshot,
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index b7b0ea4..206a678 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -139,27 +139,19 @@
 	drawctxt->pagetable = pagetable;
 	drawctxt->bin_base_offset = 0;
 
-	/* FIXME: Deal with preambles */
+	if (flags & KGSL_CONTEXT_PREAMBLE)
+		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
 
-	ret = adreno_dev->gpudev->ctxt_gpustate_shadow(adreno_dev, drawctxt);
+	if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
+		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
+
+	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 	if (ret)
 		goto err;
 
-	/* Save the shader instruction memory on context switching */
-	drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
-
-	if (!(flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
-		/* create gmem shadow */
-		ret = adreno_dev->gpudev->ctxt_gmem_shadow(adreno_dev,
-			drawctxt);
-		if (ret != 0)
-			goto err;
-	}
-
 	context->devctxt = drawctxt;
 	return 0;
 err:
-	kgsl_sharedmem_free(&drawctxt->gpustate);
 	kfree(drawctxt);
 	return ret;
 }
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 3c3a853..3047660 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -30,12 +30,16 @@
 #define CTXT_FLAGS_GMEM_SAVE		0x00000200
 /* gmem can be restored from shadow */
 #define CTXT_FLAGS_GMEM_RESTORE		0x00000400
+/* preamble packed in cmdbuffer for context switching */
+#define CTXT_FLAGS_PREAMBLE		0x00000800
 /* shader must be copied to shadow */
 #define CTXT_FLAGS_SHADER_SAVE		0x00002000
 /* shader can be restored from shadow */
 #define CTXT_FLAGS_SHADER_RESTORE	0x00004000
 /* Context has caused a GPU hang */
 #define CTXT_FLAGS_GPU_HANG		0x00008000
+/* Specifies there is no need to save GMEM */
+#define CTXT_FLAGS_NOGMEMALLOC          0x00010000
 
 struct kgsl_device;
 struct adreno_device;
@@ -91,7 +95,7 @@
 				struct adreno_context *drawctxt,
 				unsigned int flags);
 void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device,
-				      struct kgsl_context *context,
+					struct kgsl_context *context,
 					unsigned int offset);
 
 /* GPU context switch helper functions */
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 7ace67a..f1748d7 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -561,6 +561,7 @@
 	unsigned int *cmds;
 	unsigned int i;
 	struct adreno_context *drawctxt;
+	unsigned int start_index = 0;
 
 	if (device->state & KGSL_STATE_HUNG)
 		return -EBUSY;
@@ -583,7 +584,16 @@
 			" submission, size %x\n", numibs * 3);
 		return -ENOMEM;
 	}
-	for (i = 0; i < numibs; i++) {
+
+	/*When preamble is enabled, the preamble buffer with state restoration
+	commands are stored in the first node of the IB chain. We can skip that
+	if a context switch hasn't occured */
+
+	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE &&
+		adreno_dev->drawctxt_active == drawctxt)
+		start_index = 1;
+
+	for (i = start_index; i < numibs; i++) {
 		(void)kgsl_cffdump_parse_ibs(dev_priv, NULL,
 			ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false);