msm: kgsl: Introduce recovery data structure for hang recovery

Create a new data structure that holds all the information required
for recovering from hangs. This data structure is initialized as soon
as hang is detected because if the GPU is under DOS attack then by
the time we start recovery the recovery data may have changed.

Change-Id: I862787267c582b9ca95bb191fb3503ceba1faa8c
Signed-off-by: Shubhraprakash Das <sadas@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index c0c7adb..7a6fc24 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -797,65 +797,59 @@
 }
 
 static int
-adreno_recover_hang(struct kgsl_device *device)
+adreno_recover_hang(struct kgsl_device *device,
+			struct adreno_recovery_data *rec_data)
 {
 	int ret;
-	unsigned int *rb_buffer;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	unsigned int timestamp;
-	unsigned int num_rb_contents;
 	unsigned int reftimestamp;
 	unsigned int enable_ts;
 	unsigned int soptimestamp;
 	unsigned int eoptimestamp;
-	unsigned int context_id;
 	struct kgsl_context *context;
 	struct adreno_context *adreno_context;
 	int next = 0;
 
-	KGSL_DRV_ERR(device, "Starting recovery from 3D GPU hang....\n");
-	rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!rb_buffer) {
-		KGSL_MEM_ERR(device,
-			"Failed to allocate memory for recovery: %x\n",
-			rb->buffer_desc.size);
-		return -ENOMEM;
-	}
+	KGSL_DRV_ERR(device,
+	"Starting recovery from 3D GPU hang. Recovery parameters: IB1: 0x%X, "
+	"Bad context_id: %u, global_eop: 0x%x\n", rec_data->ib1,
+	rec_data->context_id, rec_data->global_eop);
+
 	/* Extract valid contents from rb which can stil be executed after
 	 * hang */
-	ret = adreno_ringbuffer_extract(rb, rb_buffer, &num_rb_contents);
+	ret = adreno_ringbuffer_extract(rb, rec_data);
 	if (ret)
 		goto done;
-	kgsl_sharedmem_readl(&device->memstore, &context_id,
-				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-					current_context));
-	context = idr_find(&device->context_idr, context_id);
+
+	context = idr_find(&device->context_idr, rec_data->context_id);
 	if (context == NULL) {
 		KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
-				context_id);
-		context_id = KGSL_MEMSTORE_GLOBAL;
+				rec_data->context_id);
+		rec_data->context_id = KGSL_MEMSTORE_GLOBAL;
 	}
 
 	timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
 	KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
 
 	kgsl_sharedmem_readl(&device->memstore, &reftimestamp,
-				KGSL_MEMSTORE_OFFSET(context_id,
+				KGSL_MEMSTORE_OFFSET(rec_data->context_id,
 					ref_wait_ts));
 	kgsl_sharedmem_readl(&device->memstore, &enable_ts,
-				KGSL_MEMSTORE_OFFSET(context_id,
+				KGSL_MEMSTORE_OFFSET(rec_data->context_id,
 					ts_cmp_enable));
 	kgsl_sharedmem_readl(&device->memstore, &soptimestamp,
-				KGSL_MEMSTORE_OFFSET(context_id,
+				KGSL_MEMSTORE_OFFSET(rec_data->context_id,
 					soptimestamp));
 	kgsl_sharedmem_readl(&device->memstore, &eoptimestamp,
-				KGSL_MEMSTORE_OFFSET(context_id,
+				KGSL_MEMSTORE_OFFSET(rec_data->context_id,
 					eoptimestamp));
 	/* Make sure memory is synchronized before restarting the GPU */
 	mb();
 	KGSL_CTXT_ERR(device,
-		"Context id that caused a GPU hang: %d\n", context_id);
+		"Context id that caused a GPU hang: %d\n",
+		rec_data->context_id);
 	/* restart device */
 	ret = adreno_stop(device);
 	if (ret)
@@ -866,19 +860,19 @@
 	KGSL_DRV_ERR(device, "Device has been restarted after hang\n");
 	/* Restore timestamp states */
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp),
-			soptimestamp);
+			KGSL_MEMSTORE_OFFSET(rec_data->context_id,
+			soptimestamp), soptimestamp);
 	kgsl_sharedmem_writel(&device->memstore,
-			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp),
-			eoptimestamp);
+			KGSL_MEMSTORE_OFFSET(rec_data->context_id,
+			eoptimestamp), eoptimestamp);
 
-	if (num_rb_contents) {
+	if (rec_data->rb_size) {
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_MEMSTORE_OFFSET(context_id, ref_wait_ts),
-			reftimestamp);
+			KGSL_MEMSTORE_OFFSET(rec_data->context_id,
+			ref_wait_ts), reftimestamp);
 		kgsl_sharedmem_writel(&device->memstore,
-			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable),
-			enable_ts);
+			KGSL_MEMSTORE_OFFSET(rec_data->context_id,
+			ts_cmp_enable), enable_ts);
 	}
 	/* Make sure all writes are posted before the GPU reads them */
 	wmb();
@@ -900,7 +894,7 @@
 	while ((context = idr_get_next(&device->context_idr, &next))) {
 		if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
 			context->reset_status) {
-			if (context->id != context_id)
+			if (context->id != rec_data->context_id)
 				context->reset_status =
 				KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
 			else
@@ -911,18 +905,71 @@
 	}
 
 	/* Restore valid commands in ringbuffer */
-	adreno_ringbuffer_restore(rb, rb_buffer, num_rb_contents);
+	adreno_ringbuffer_restore(rb, rec_data->rb_buffer, rec_data->rb_size);
 	rb->timestamp[KGSL_MEMSTORE_GLOBAL] = timestamp;
 	/* wait for idle */
 	ret = adreno_idle(device, KGSL_TIMEOUT_DEFAULT);
 done:
-	vfree(rb_buffer);
+	return ret;
+}
+
+static void adreno_destroy_recovery_data(struct adreno_recovery_data *rec_data)
+{
+	vfree(rec_data->rb_buffer);
+	vfree(rec_data->bad_rb_buffer);
+}
+
+static int adreno_setup_recovery_data(struct kgsl_device *device,
+					struct adreno_recovery_data *rec_data)
+{
+	int ret = 0;
+	unsigned int ib1_sz, ib2_sz;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+
+	memset(rec_data, 0, sizeof(*rec_data));
+
+	adreno_regread(device, REG_CP_IB1_BUFSZ, &ib1_sz);
+	adreno_regread(device, REG_CP_IB2_BUFSZ, &ib2_sz);
+	if (ib1_sz || ib2_sz)
+		adreno_regread(device, REG_CP_IB1_BASE, &rec_data->ib1);
+
+	kgsl_sharedmem_readl(&device->memstore, &rec_data->context_id,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			current_context));
+
+	kgsl_sharedmem_readl(&device->memstore,
+				&rec_data->global_eop,
+				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				eoptimestamp));
+
+	rec_data->rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!rec_data->rb_buffer) {
+		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
+				rb->buffer_desc.size);
+		return -ENOMEM;
+	}
+
+	rec_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!rec_data->bad_rb_buffer) {
+		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
+				rb->buffer_desc.size);
+		ret = -ENOMEM;
+		goto done;
+	}
+
+done:
+	if (ret) {
+		vfree(rec_data->rb_buffer);
+		vfree(rec_data->bad_rb_buffer);
+	}
 	return ret;
 }
 
 int adreno_dump_and_recover(struct kgsl_device *device)
 {
 	int result = -ETIMEDOUT;
+	struct adreno_recovery_data rec_data;
 
 	if (device->state == KGSL_STATE_HUNG)
 		goto done;
@@ -937,7 +984,8 @@
 		INIT_COMPLETION(device->recovery_gate);
 		/* Detected a hang */
 
-
+		/* Get the recovery data as soon as hang is detected */
+		result = adreno_setup_recovery_data(device, &rec_data);
 		/*
 		 * Trigger an automatic dump of the state to
 		 * the console
@@ -950,7 +998,8 @@
 		 */
 		kgsl_device_snapshot(device, 1);
 
-		result = adreno_recover_hang(device);
+		result = adreno_recover_hang(device, &rec_data);
+		adreno_destroy_recovery_data(&rec_data);
 		if (result) {
 			kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
 		} else {