msm: kgsl: Recovery policy change

Recovery algorithm is changed to
step 1: retry the same commands that hung the GPU
step 2: if step 1 fails, nop just the IB that hung the GPU
	and retry
step 3: if step 2 fails, skip commands in current context
        till end of frame and retry
step 4: if step 3 fails mark context as bad and execute
        the remaining commands from good contexts.

Previously we used to return -EDEADLK when recovery succeeds,
this is the same error code if the context is not recoverable.
With new policy if recovery succeeds we return -EAGAIN so that
userspace treats recovered context differently from the ones
that are not recoverable. If recovery fails we mark the context
as bad and return -EDEADLK.

Change-Id: I9fa3c40801964186866b6002e62f19cf6aa41361
Signed-off-by: Tarun Karra <tkarra@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 5fd1230..88756c0 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1383,6 +1383,7 @@
 {
 	vfree(rec_data->rb_buffer);
 	vfree(rec_data->bad_rb_buffer);
+	vfree(rec_data->good_rb_buffer);
 }
 
 static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
@@ -1564,9 +1565,9 @@
 			current_context));
 
 	kgsl_sharedmem_readl(&device->memstore,
-				&rec_data->global_eop,
-				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-				eoptimestamp));
+			&rec_data->global_eop,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			eoptimestamp));
 
 	rec_data->rb_buffer = vmalloc(rb->buffer_desc.size);
 	if (!rec_data->rb_buffer) {
@@ -1582,7 +1583,17 @@
 		ret = -ENOMEM;
 		goto done;
 	}
+
+	rec_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!rec_data->good_rb_buffer) {
+		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
+				rb->buffer_desc.size);
+		ret = -ENOMEM;
+		goto done;
+	}
 	rec_data->fault = device->mmu.fault;
+	rec_data->step =  adreno_dev->ft_policy;
+
 	/* find the start of bad command sequence in rb */
 	context = idr_find(&device->context_idr, rec_data->context_id);
 	/* Look for the command stream that is right after the global eop */
@@ -1621,16 +1632,73 @@
 	if (ret) {
 		vfree(rec_data->rb_buffer);
 		vfree(rec_data->bad_rb_buffer);
+		vfree(rec_data->good_rb_buffer);
 	}
 	return ret;
 }
 
 static int
-_adreno_recover_hang(struct kgsl_device *device,
-			struct adreno_recovery_data *rec_data,
-			bool try_bad_commands)
+_adreno_restart_device(struct kgsl_device *device,
+					   struct kgsl_context *context)
 {
-	int ret;
+
+	struct adreno_context *adreno_context = context->devctxt;
+
+	/* restart device */
+	if (adreno_stop(device)) {
+		KGSL_DRV_ERR(device, "Device stop failed in recovery\n");
+		return 1;
+	}
+
+	if (adreno_start(device, true)) {
+		KGSL_DRV_ERR(device, "Device start failed in recovery\n");
+		return 1;
+	}
+
+	if (context)
+		kgsl_mmu_setstate(&device->mmu, adreno_context->pagetable,
+			KGSL_MEMSTORE_GLOBAL);
+
+	/* If iommu is used then we need to make sure that the iommu clocks
+	 * are on since there could be commands in pipeline that touch iommu */
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+		if (kgsl_mmu_enable_clk(&device->mmu,
+				KGSL_IOMMU_CONTEXT_USER))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int
+_adreno_recovery_resubmit(struct kgsl_device *device,
+			struct adreno_ringbuffer *rb,
+			struct kgsl_context *context,
+			struct adreno_recovery_data *rec_data,
+			unsigned int *buff, unsigned int size)
+{
+	unsigned int ret = 0;
+
+	if (_adreno_restart_device(device, context))
+		return 1;
+
+	if (size) {
+
+		/* submit commands and wait for them to pass */
+		adreno_ringbuffer_restore(rb, buff, size);
+
+		ret = adreno_idle(device);
+	}
+
+	return ret;
+}
+
+
+static int
+_adreno_recover_hang(struct kgsl_device *device,
+			struct adreno_recovery_data *rec_data)
+{
+	int ret = 0, i;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	struct kgsl_context *context;
@@ -1655,103 +1723,117 @@
 	 * hang */
 	adreno_ringbuffer_extract(rb, rec_data);
 
-	/* restart device */
-	ret = adreno_stop(device);
-	if (ret) {
-		KGSL_DRV_ERR(device, "Device stop failed in recovery\n");
-		goto done;
-	}
+	/* Do not try the bad commands if  hang is due to a fault */
+	if (rec_data->fault)
+		goto play_good_cmds;
 
-	ret = adreno_start(device, true);
-	if (ret) {
-		KGSL_DRV_ERR(device, "Device start failed in recovery\n");
-		goto done;
-	}
+	if (rec_data->step == FT_REPLAY_BAD_CTXT_CMDS) {
 
-	if (context)
-		kgsl_mmu_setstate(&device->mmu, adreno_context->pagetable,
-			KGSL_MEMSTORE_GLOBAL);
+		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
+				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
 
-	/* If iommu is used then we need to make sure that the iommu clocks
-	 * are on since there could be commands in pipeline that touch iommu */
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-		ret = kgsl_mmu_enable_clk(&device->mmu,
-			KGSL_IOMMU_CONTEXT_USER);
 		if (ret)
-			goto done;
+			rec_data->step = FT_NOT_IB_BAD_CTXT_CMDS;
+		else
+			goto play_good_cmds;
+
 	}
 
-	/* Do not try the bad commands if recovery has failed bad commands
-	 * once already or if hang is due to a fault */
-	if (!try_bad_commands || rec_data->fault)
-		rec_data->bad_rb_size = 0;
+	if (rec_data->step == FT_NOT_IB_BAD_CTXT_CMDS) {
 
-	if (rec_data->bad_rb_size) {
-		int idle_ret;
-		/* submit the bad and good context commands and wait for
-		 * them to pass */
-		adreno_ringbuffer_restore(rb, rec_data->bad_rb_buffer,
-					rec_data->bad_rb_size);
-		idle_ret = adreno_idle(device);
-		if (idle_ret) {
-			ret = adreno_stop(device);
-			if (ret) {
-				KGSL_DRV_ERR(device,
-				"Device stop failed in recovery\n");
-				goto done;
-			}
-			ret = adreno_start(device, true);
-			if (ret) {
-				KGSL_DRV_ERR(device,
-				"Device start failed in recovery\n");
-				goto done;
-			}
-			if (context)
-				kgsl_mmu_setstate(&device->mmu,
-						adreno_context->pagetable,
-						KGSL_MEMSTORE_GLOBAL);
+		for (i = 0; i < rec_data->bad_rb_size; i++) {
+			if ((rec_data->bad_rb_buffer[i] ==
+				CP_HDR_INDIRECT_BUFFER_PFD) &&
+				(rec_data->bad_rb_buffer[i+1] ==
+				rec_data->ib1)) {
 
-			if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-				ret = kgsl_mmu_enable_clk(&device->mmu,
-						KGSL_IOMMU_CONTEXT_USER);
-				if (ret)
-					goto done;
+				rec_data->bad_rb_buffer[i] = cp_nop_packet(2);
+				rec_data->bad_rb_buffer[i+1] =
+							KGSL_NOP_IB_IDENTIFIER;
+				rec_data->bad_rb_buffer[i+2] =
+							KGSL_NOP_IB_IDENTIFIER;
+				break;
 			}
-
-			ret = idle_ret;
-			KGSL_DRV_ERR(device,
-			"Bad context commands hung in recovery\n");
-		} else {
-			KGSL_DRV_ERR(device,
-			"Bad context commands succeeded in recovery\n");
-			if (adreno_context)
-				adreno_context->flags = (adreno_context->flags &
-					~CTXT_FLAGS_GPU_HANG) |
-					CTXT_FLAGS_GPU_HANG_RECOVERED;
-			adreno_dev->drawctxt_active = last_active_ctx;
 		}
-	}
-	/* If either the bad command sequence failed or we did not play it */
-	if (ret || !rec_data->bad_rb_size) {
-		adreno_ringbuffer_restore(rb, rec_data->rb_buffer,
-				rec_data->rb_size);
-		ret = adreno_idle(device);
+
+		if ((i == (rec_data->bad_rb_size)) || (!rec_data->ib1)) {
+			KGSL_DRV_ERR(device, "Bad IB to NOP not found\n");
+			rec_data->step = FT_FAIL_BAD_CTXT_CMDS;
+			goto play_good_cmds;
+		}
+
+		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
+				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
+
 		if (ret) {
-			/* If we fail here we can try to invalidate another
-			 * context and try recovering again */
-			ret = -EAGAIN;
-			goto done;
-		}
-		/* ringbuffer now has data from the last valid context id,
-		 * so restore the active_ctx to the last valid context */
-		if (rec_data->last_valid_ctx_id) {
-			struct kgsl_context *last_ctx =
-					idr_find(&device->context_idr,
-					rec_data->last_valid_ctx_id);
-			if (last_ctx)
-				adreno_dev->drawctxt_active = last_ctx->devctxt;
-		}
+			KGSL_DRV_ERR(device, "NOP faulty IB unsuccessful\n");
+			rec_data->step = FT_SKIP_EOF_BAD_CTXT_CMDS;
+		} else
+			goto play_good_cmds;
 	}
+
+	if (rec_data->step == FT_SKIP_EOF_BAD_CTXT_CMDS) {
+
+		for (i = 0; i < rec_data->bad_rb_size; i++) {
+			if (rec_data->bad_rb_buffer[i] ==
+				KGSL_END_OF_FRAME_IDENTIFIER) {
+				rec_data->bad_rb_buffer[0] = cp_nop_packet(i);
+				break;
+			}
+		}
+
+		/* EOF not found in RB, discard till EOF in
+		   next IB submission */
+		if (i == rec_data->bad_rb_size) {
+			adreno_context->flags |= CTXT_FLAGS_SKIP_EOF;
+			rec_data->bad_rb_buffer[0] = cp_nop_packet(i);
+		}
+
+		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
+				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
+
+		if (ret) {
+			KGSL_DRV_ERR(device, "Skip EOF unsuccessful\n");
+			rec_data->step = FT_FAIL_BAD_CTXT_CMDS;
+		} else
+			goto play_good_cmds;
+	}
+
+play_good_cmds:
+
+	if (rec_data->step == FT_FAIL_BAD_CTXT_CMDS)
+		KGSL_DRV_ERR(device, "Bad context commands failed\n");
+	else {
+
+		if (adreno_context) {
+			adreno_context->flags = (adreno_context->flags &
+			~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_RECOVERED;
+		}
+		adreno_dev->drawctxt_active = last_active_ctx;
+	}
+
+	ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
+			rec_data->good_rb_buffer, rec_data->good_rb_size);
+
+	if (ret) {
+		/* If we fail here we can try to invalidate another
+		 * context and try fault tolerance again */
+		ret = -EAGAIN;
+		KGSL_DRV_ERR(device, "Playing good commands unsuccessful\n");
+		goto done;
+	}
+
+
+	/* ringbuffer now has data from the last valid context id,
+	 * so restore the active_ctx to the last valid context */
+	if (rec_data->last_valid_ctx_id) {
+		struct kgsl_context *last_ctx =
+				idr_find(&device->context_idr,
+				rec_data->last_valid_ctx_id);
+		if (last_ctx)
+			adreno_dev->drawctxt_active = last_ctx->devctxt;
+	}
+
 done:
 	/* Turn off iommu clocks */
 	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
@@ -1779,10 +1861,8 @@
 	/* We may need to replay commands multiple times based on whether
 	 * multiple contexts hang the GPU */
 	while (true) {
-		if (!ret)
-			ret = _adreno_recover_hang(device, rec_data, true);
-		else
-			ret = _adreno_recover_hang(device, rec_data, false);
+
+		ret = _adreno_recover_hang(device, rec_data);
 
 		if (-EAGAIN == ret) {
 			/* setup new recovery parameters and retry, this
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 514c86c..03c82bf 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -34,12 +34,15 @@
 #define KGSL_CMD_FLAGS_NONE             0x00000000
 #define KGSL_CMD_FLAGS_PMODE		0x00000001
 #define KGSL_CMD_FLAGS_INTERNAL_ISSUE	0x00000002
+#define KGSL_CMD_FLAGS_EOF	        0x00000100
 
 /* Command identifiers */
 #define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
 #define KGSL_CMD_IDENTIFIER		0x2EEDFACE
 #define KGSL_START_OF_IB_IDENTIFIER	0x2EADEABE
 #define KGSL_END_OF_IB_IDENTIFIER	0x2ABEDEAD
+#define KGSL_END_OF_FRAME_IDENTIFIER	0x2E0F2E0F
+#define KGSL_NOP_IB_IDENTIFIER	        0x20F20F20
 
 #ifdef CONFIG_MSM_SCM
 #define ADRENO_DEFAULT_PWRSCALE_POLICY  (&kgsl_pwrscale_policy_tz)
@@ -99,6 +102,7 @@
 	unsigned int instruction_size;
 	unsigned int ib_check_level;
 	unsigned int fast_hang_detect;
+	unsigned int ft_policy;
 	unsigned int gpulist_index;
 	struct ocmem_buf *ocmem_hdl;
 	unsigned int ocmem_base;
@@ -139,8 +143,12 @@
  * @rb_size - Number of valid dwords in rb_buffer
  * @bad_rb_buffer - Buffer that holds commands from the hanging context
  * bad_rb_size - Number of valid dwords in bad_rb_buffer
+ * @good_rb_buffer - Buffer that holds commands from good contexts
+ * good_rb_size - Number of valid dwords in good_rb_buffer
  * @last_valid_ctx_id - The last context from which commands were placed in
  * ringbuffer before the GPU hung
+ * @step - Current recovery step being executed
+ * @err_code - Recovery error code
  * @fault - Indicates whether the hang was caused due to a pagefault
  * @start_of_replay_cmds - Offset in ringbuffer from where commands can be
  * replayed during recovery
@@ -155,12 +163,25 @@
 	unsigned int rb_size;
 	unsigned int *bad_rb_buffer;
 	unsigned int bad_rb_size;
+	unsigned int *good_rb_buffer;
+	unsigned int good_rb_size;
 	unsigned int last_valid_ctx_id;
+	unsigned int step;
+	unsigned int err_code;
 	int fault;
 	unsigned int start_of_replay_cmds;
 	unsigned int replay_for_snapshot;
 };
 
+enum ft_steps {
+	FT_REPLAY_BAD_CTXT_CMDS = 0,
+	FT_NOT_IB_BAD_CTXT_CMDS,
+	FT_SKIP_EOF_BAD_CTXT_CMDS,
+	FT_FAIL_BAD_CTXT_CMDS,
+	FT_PLAY_GOOD_CTXT_CMDS
+};
+
+
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
 
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index 1989ff5..1f96f8c 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -64,5 +64,16 @@
 	adreno_dev->fast_hang_detect = 1;
 	debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs,
 			   &adreno_dev->fast_hang_detect);
+	/*
+	 * FT policy can be set to any of the options below.
+	 * FT_REPLAY_BAD_CTXT_CMDS -> try replay, NOP IB and skip to EOF
+	 * of bad cmds
+	 * FT_NOT_IB_BAD_CTXT_CMDS -> try replay and NOP IB of bad cmds
+	 * FT_SKIP_EOF_BAD_CTXT_CMDS -> try skip to EOF of bad cmds
+	 * by default set FT policy to FT_REPLAY_BAD_CTXT_CMDS
+	 */
+	adreno_dev->ft_policy = FT_REPLAY_BAD_CTXT_CMDS;
+	debugfs_create_u32("fault_tolerance_policy", 0644, device->d_debugfs,
+			   &adreno_dev->ft_policy);
 
 }
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 65dbd4c..133f92f 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -50,6 +50,8 @@
 #define CTXT_FLAGS_BEING_DESTROYED	BIT(13)
 /* User mode generated timestamps enabled */
 #define CTXT_FLAGS_USER_GENERATED_TS    BIT(14)
+/* Context skip till EOF */
+#define CTXT_FLAGS_SKIP_EOF             BIT(15)
 
 struct kgsl_device;
 struct adreno_device;
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 5573bdf..6a8bffb 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -711,6 +711,11 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
 	}
 
+	if (flags & KGSL_CMD_FLAGS_EOF) {
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_END_OF_FRAME_IDENTIFIER);
+	}
+
 	adreno_ringbuffer_submit(rb);
 
 	return timestamp;
@@ -977,12 +982,22 @@
 	drawctxt = context->devctxt;
 
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
-		KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.."
+		KGSL_CTXT_ERR(device, "Context %p caused a gpu hang.."
 			" will not accept commands for context %d\n",
 			drawctxt, drawctxt->id);
 		return -EDEADLK;
 	}
 
+	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+		KGSL_CTXT_ERR(device,
+			"Context %p caused a gpu hang.."
+			" skipping commands for context till EOF %d\n",
+			drawctxt, drawctxt->id);
+		if (flags & KGSL_CMD_FLAGS_EOF)
+			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+		numibs = 0;
+	}
+
 	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
 				GFP_KERNEL);
 	if (!link) {
@@ -1032,7 +1047,7 @@
 
 	*timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
 					drawctxt,
-					0,
+					(flags & KGSL_CMD_FLAGS_EOF),
 					&link[0], (cmds - link), *timestamp);
 
 	KGSL_CMD_INFO(device, "ctxt %d g %08x numibs %d ts %d\n",
@@ -1048,13 +1063,13 @@
 	 */
 	adreno_idle(device);
 #endif
+
 	/* If context hung and recovered then return error so that the
 	 * application may handle it */
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_RECOVERED)
-		return -EDEADLK;
+		return -EAGAIN;
 	else
 		return 0;
-
 }
 
 static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
@@ -1106,20 +1121,21 @@
 {
 	struct kgsl_device *device = rb->device;
 	unsigned int rb_rptr = rec_data->start_of_replay_cmds;
+	unsigned int good_rb_idx = 0, bad_rb_idx = 0, temp_rb_idx = 0;
+	unsigned int last_good_cmd_end_idx = 0, last_bad_cmd_end_idx = 0;
+	unsigned int cmd_start_idx = 0;
+	unsigned int val1 = 0;
+	int copy_rb_contents = 0;
+	unsigned int temp_rb_rptr;
+	struct kgsl_context *k_ctxt;
+	struct adreno_context *a_ctxt;
+	unsigned int size = rb->buffer_desc.size;
 	unsigned int *temp_rb_buffer = rec_data->rb_buffer;
 	int *rb_size = &rec_data->rb_size;
 	unsigned int *bad_rb_buffer = rec_data->bad_rb_buffer;
 	int *bad_rb_size = &rec_data->bad_rb_size;
-
-	unsigned int good_rb_idx = 0, cmd_start_idx = 0;
-	unsigned int val1 = 0;
-	struct kgsl_context *k_ctxt;
-	struct adreno_context *a_ctxt;
-	unsigned int bad_rb_idx = 0;
-	int copy_rb_contents = 0;
-	unsigned int temp_rb_rptr;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int good_cmd_start_idx = 0;
+	unsigned int *good_rb_buffer = rec_data->good_rb_buffer;
+	int *good_rb_size = &rec_data->good_rb_size;
 
 	/*
 	 * If the start index from where commands need to be copied is invalid
@@ -1144,9 +1160,11 @@
 		if (KGSL_CMD_IDENTIFIER == val1) {
 			/* Start is the NOP dword that comes before
 			 * KGSL_CMD_IDENTIFIER */
-			cmd_start_idx = bad_rb_idx - 1;
-			if (copy_rb_contents)
-				good_cmd_start_idx = good_rb_idx - 1;
+			cmd_start_idx = temp_rb_idx - 1;
+			if ((copy_rb_contents) && (good_rb_idx))
+				last_good_cmd_end_idx = good_rb_idx - 1;
+			if ((!copy_rb_contents) && (bad_rb_idx))
+				last_bad_cmd_end_idx = bad_rb_idx - 1;
 		}
 
 		/* check for context switch indicator */
@@ -1172,33 +1190,48 @@
 				!(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) ||
 				!k_ctxt)) {
 				for (temp_idx = cmd_start_idx;
-					temp_idx < bad_rb_idx;
+					temp_idx < temp_rb_idx;
 					temp_idx++)
-					temp_rb_buffer[good_rb_idx++] =
-						bad_rb_buffer[temp_idx];
+					good_rb_buffer[good_rb_idx++] =
+						temp_rb_buffer[temp_idx];
 				rec_data->last_valid_ctx_id = val2;
 				copy_rb_contents = 1;
+				/* remove the good commands from bad buffer */
+				bad_rb_idx = last_bad_cmd_end_idx;
 			} else if (copy_rb_contents && k_ctxt &&
 				(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) {
-				/* If we are changing to bad context then remove
-				 * the dwords we copied for this sequence from
-				 * the good buffer */
-				good_rb_idx = good_cmd_start_idx;
+
+				/* If we are changing back to a bad context
+				 * from good ctxt and were not copying commands
+				 * to bad ctxt then copy over commands to
+				 * the bad context */
+				for (temp_idx = cmd_start_idx;
+					temp_idx < temp_rb_idx;
+					temp_idx++)
+					bad_rb_buffer[bad_rb_idx++] =
+						temp_rb_buffer[temp_idx];
+				/* If we are changing to bad context then
+				 * remove the dwords we copied for this
+				 * sequence from the good buffer */
+				good_rb_idx = last_good_cmd_end_idx;
 				copy_rb_contents = 0;
 			}
 			}
 		}
 
 		if (copy_rb_contents)
-			temp_rb_buffer[good_rb_idx++] = val1;
-		/* Copy both good and bad commands for replay to the bad
-		 * buffer */
-		bad_rb_buffer[bad_rb_idx++] = val1;
+			good_rb_buffer[good_rb_idx++] = val1;
+		else
+			bad_rb_buffer[bad_rb_idx++] = val1;
+
+		/* Copy both good and bad commands to temp buffer */
+		temp_rb_buffer[temp_rb_idx++] = val1;
 
 		rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, size);
 	}
-	*rb_size = good_rb_idx;
+	*good_rb_size = good_rb_idx;
 	*bad_rb_size = bad_rb_idx;
+	*rb_size = temp_rb_idx;
 }
 
 void