msm: kgsl: Renaming recovery to fault tolerance

Renaming recovery to fault tolerance and modifying
the functions and log messages accordingly.

Change-Id: I5f249806026ac514c4aff7da45c3a4e8cc2f8c34
Signed-off-by: Tarun Karra <tkarra@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 88756c0..893a6ed 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1199,7 +1199,7 @@
 	int status = -EINVAL;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	if (KGSL_STATE_DUMP_AND_RECOVER != device->state)
+	if (KGSL_STATE_DUMP_AND_FT != device->state)
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
 
 	/* Power up the device */
@@ -1284,9 +1284,9 @@
 
 	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram);
 	if (status == 0) {
-		/* While recovery is on we do not want timer to
+		/* While fault tolerance is on we do not want timer to
 		 * fire and attempt to change any device state */
-		if (KGSL_STATE_DUMP_AND_RECOVER != device->state)
+		if (KGSL_STATE_DUMP_AND_FT != device->state)
 			mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
 		return 0;
 	}
@@ -1325,26 +1325,26 @@
 }
 
 static void adreno_mark_context_status(struct kgsl_device *device,
-					int recovery_status)
+					int ft_status)
 {
 	struct kgsl_context *context;
 	int next = 0;
 	/*
 	 * Set the reset status of all contexts to
 	 * INNOCENT_CONTEXT_RESET_EXT except for the bad context
-	 * since thats the guilty party, if recovery failed then
+	 * since thats the guilty party, if fault tolerance failed then
 	 * mark all as guilty
 	 */
 	while ((context = idr_get_next(&device->context_idr, &next))) {
 		struct adreno_context *adreno_context = context->devctxt;
-		if (recovery_status) {
+		if (ft_status) {
 			context->reset_status =
 					KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
 			adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
 		} else if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
 			context->reset_status) {
 			if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG |
-				CTXT_FLAGS_GPU_HANG_RECOVERED))
+				CTXT_FLAGS_GPU_HANG_FT))
 				context->reset_status =
 				KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
 			else
@@ -1379,11 +1379,11 @@
 	}
 }
 
-static void adreno_destroy_recovery_data(struct adreno_recovery_data *rec_data)
+static void adreno_destroy_ft_data(struct adreno_ft_data *ft_data)
 {
-	vfree(rec_data->rb_buffer);
-	vfree(rec_data->bad_rb_buffer);
-	vfree(rec_data->good_rb_buffer);
+	vfree(ft_data->rb_buffer);
+	vfree(ft_data->bad_rb_buffer);
+	vfree(ft_data->good_rb_buffer);
 }
 
 static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
@@ -1479,13 +1479,13 @@
 		status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false);
 		if (!status) {
 			*rb_rptr = temp_rb_rptr;
-			KGSL_DRV_ERR(rb->device,
+			KGSL_FT_INFO(rb->device,
 			"Offset of cmd sequence after eop timestamp: 0x%x\n",
 			temp_rb_rptr / sizeof(unsigned int));
 		}
 	}
 	if (status)
-		KGSL_DRV_ERR(rb->device,
+		KGSL_FT_ERR(rb->device,
 		"Failed to find the command sequence after eop timestamp\n");
 	return status;
 }
@@ -1512,7 +1512,8 @@
 				/* go till start of command sequence */
 				status = _find_start_of_cmd_seq(rb,
 						&temp_rb_rptr, false);
-				KGSL_DRV_INFO(rb->device,
+
+				KGSL_FT_INFO(rb->device,
 				"Found the hanging IB at offset 0x%x\n",
 				temp_rb_rptr / sizeof(unsigned int));
 				break;
@@ -1526,7 +1527,7 @@
 		 * can point to the context switch */
 		if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
 			if (ctx_switch) {
-				KGSL_DRV_ERR(rb->device,
+				KGSL_FT_ERR(rb->device,
 				"Context switch encountered before bad "
 				"IB found\n");
 				break;
@@ -1544,8 +1545,8 @@
 	return status;
 }
 
-static int adreno_setup_recovery_data(struct kgsl_device *device,
-					struct adreno_recovery_data *rec_data)
+static int adreno_setup_ft_data(struct kgsl_device *device,
+					struct adreno_ft_data *ft_data)
 {
 	int ret = 0;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -1554,104 +1555,105 @@
 	struct adreno_context *adreno_context;
 	unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
 
-	memset(rec_data, 0, sizeof(*rec_data));
-	rec_data->start_of_replay_cmds = 0xFFFFFFFF;
-	rec_data->replay_for_snapshot = 0xFFFFFFFF;
+	memset(ft_data, 0, sizeof(*ft_data));
+	ft_data->start_of_replay_cmds = 0xFFFFFFFF;
+	ft_data->replay_for_snapshot = 0xFFFFFFFF;
 
-	adreno_regread(device, REG_CP_IB1_BASE, &rec_data->ib1);
+	adreno_regread(device, REG_CP_IB1_BASE, &ft_data->ib1);
 
-	kgsl_sharedmem_readl(&device->memstore, &rec_data->context_id,
+	kgsl_sharedmem_readl(&device->memstore, &ft_data->context_id,
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 			current_context));
 
 	kgsl_sharedmem_readl(&device->memstore,
-			&rec_data->global_eop,
+			&ft_data->global_eop,
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 			eoptimestamp));
 
-	rec_data->rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!rec_data->rb_buffer) {
+	ft_data->rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!ft_data->rb_buffer) {
 		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
 				rb->buffer_desc.size);
 		return -ENOMEM;
 	}
 
-	rec_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!rec_data->bad_rb_buffer) {
+	ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!ft_data->bad_rb_buffer) {
 		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
 				rb->buffer_desc.size);
 		ret = -ENOMEM;
 		goto done;
 	}
 
-	rec_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!rec_data->good_rb_buffer) {
+	ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
+	if (!ft_data->good_rb_buffer) {
 		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
 				rb->buffer_desc.size);
 		ret = -ENOMEM;
 		goto done;
 	}
-	rec_data->fault = device->mmu.fault;
-	rec_data->step =  adreno_dev->ft_policy;
+	ft_data->fault = device->mmu.fault;
+	ft_data->step =  adreno_dev->ft_policy;
 
 	/* find the start of bad command sequence in rb */
-	context = idr_find(&device->context_idr, rec_data->context_id);
+	context = idr_find(&device->context_idr, ft_data->context_id);
 	/* Look for the command stream that is right after the global eop */
 
 	if (!context) {
 		/*
-		 * If there is no context then recovery does not need to
+		 * If there is no context then fault tolerance does not need to
 		 * replay anything, just reset GPU and thats it
 		 */
 		goto done;
 	}
 	ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
-					rec_data->global_eop + 1, false);
+					ft_data->global_eop + 1, false);
 	if (ret)
 		goto done;
 
-	rec_data->start_of_replay_cmds = rb_rptr;
+	ft_data->start_of_replay_cmds = rb_rptr;
 
 	adreno_context = context->devctxt;
 	if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
-		if (rec_data->ib1) {
+		if (ft_data->ib1) {
 			ret = _find_hanging_ib_sequence(rb,
-					&rb_rptr, rec_data->ib1);
+					&rb_rptr, ft_data->ib1);
 			if (ret) {
-				KGSL_DRV_ERR(device,
+				KGSL_FT_ERR(device,
 				"Start not found for replay IB sequence\n");
 				ret = 0;
 				goto done;
 			}
-			rec_data->start_of_replay_cmds = rb_rptr;
-			rec_data->replay_for_snapshot = rb_rptr;
+			ft_data->start_of_replay_cmds = rb_rptr;
+			ft_data->replay_for_snapshot = rb_rptr;
 		}
 	}
 
 done:
 	if (ret) {
-		vfree(rec_data->rb_buffer);
-		vfree(rec_data->bad_rb_buffer);
-		vfree(rec_data->good_rb_buffer);
+		vfree(ft_data->rb_buffer);
+		vfree(ft_data->bad_rb_buffer);
+		vfree(ft_data->good_rb_buffer);
 	}
 	return ret;
 }
 
 static int
-_adreno_restart_device(struct kgsl_device *device,
-					   struct kgsl_context *context)
+_adreno_ft_restart_device(struct kgsl_device *device,
+					   struct kgsl_context *context,
+					   struct adreno_ft_data *ft_data)
 {
 
 	struct adreno_context *adreno_context = context->devctxt;
 
 	/* restart device */
 	if (adreno_stop(device)) {
-		KGSL_DRV_ERR(device, "Device stop failed in recovery\n");
+		KGSL_FT_ERR(device, "Device stop failed\n");
 		return 1;
 	}
 
 	if (adreno_start(device, true)) {
-		KGSL_DRV_ERR(device, "Device start failed in recovery\n");
+		KGSL_FT_ERR(device, "Device start failed\n");
 		return 1;
 	}
 
@@ -1671,15 +1673,15 @@
 }
 
 static int
-_adreno_recovery_resubmit(struct kgsl_device *device,
+_adreno_ft_resubmit_rb(struct kgsl_device *device,
 			struct adreno_ringbuffer *rb,
 			struct kgsl_context *context,
-			struct adreno_recovery_data *rec_data,
+			struct adreno_ft_data *ft_data,
 			unsigned int *buff, unsigned int size)
 {
 	unsigned int ret = 0;
 
-	if (_adreno_restart_device(device, context))
+	if (_adreno_ft_restart_device(device, context, ft_data))
 		return 1;
 
 	if (size) {
@@ -1695,8 +1697,8 @@
 
 
 static int
-_adreno_recover_hang(struct kgsl_device *device,
-			struct adreno_recovery_data *rec_data)
+_adreno_ft(struct kgsl_device *device,
+			struct adreno_ft_data *ft_data)
 {
 	int ret = 0, i;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -1705,10 +1707,10 @@
 	struct adreno_context *adreno_context = NULL;
 	struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
 
-	context = idr_find(&device->context_idr, rec_data->context_id);
+	context = idr_find(&device->context_idr, ft_data->context_id);
 	if (context == NULL) {
-		KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
-			rec_data->context_id);
+		KGSL_FT_CRIT(device, "Last context unknown id:%d\n",
+			ft_data->context_id);
 	} else {
 		adreno_context = context->devctxt;
 		adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
@@ -1717,119 +1719,124 @@
 		 * detected a hang for it
 		 */
 		context->wait_on_invalid_ts = false;
+
+		KGSL_FT_INFO(device, "Context found\n");
 	}
 
 	/* Extract valid contents from rb which can still be executed after
 	 * hang */
-	adreno_ringbuffer_extract(rb, rec_data);
+	adreno_ringbuffer_extract(rb, ft_data);
 
 	/* Do not try the bad commands if  hang is due to a fault */
-	if (rec_data->fault)
+	if (ft_data->fault) {
+		KGSL_FT_ERR(device, "Page fault no FT for bad context\n");
+
 		goto play_good_cmds;
+	}
 
-	if (rec_data->step == FT_REPLAY_BAD_CTXT_CMDS) {
+	if (ft_data->step == FT_REPLAY_BAD_CTXT_CMDS) {
 
-		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
-				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
+		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
 
 		if (ret)
-			rec_data->step = FT_NOT_IB_BAD_CTXT_CMDS;
+			KGSL_FT_INFO(device, "Replay unsuccessful\n");
 		else
 			goto play_good_cmds;
 
 	}
 
-	if (rec_data->step == FT_NOT_IB_BAD_CTXT_CMDS) {
+	if (ft_data->step == FT_NOP_IB_BAD_CTXT_CMDS) {
 
-		for (i = 0; i < rec_data->bad_rb_size; i++) {
-			if ((rec_data->bad_rb_buffer[i] ==
-				CP_HDR_INDIRECT_BUFFER_PFD) &&
-				(rec_data->bad_rb_buffer[i+1] ==
-				rec_data->ib1)) {
+		for (i = 0; i < ft_data->bad_rb_size; i++) {
+			if ((ft_data->bad_rb_buffer[i] ==
+					CP_HDR_INDIRECT_BUFFER_PFD) &&
+				(ft_data->bad_rb_buffer[i+1] == ft_data->ib1)) {
 
-				rec_data->bad_rb_buffer[i] = cp_nop_packet(2);
-				rec_data->bad_rb_buffer[i+1] =
+				ft_data->bad_rb_buffer[i] = cp_nop_packet(2);
+				ft_data->bad_rb_buffer[i+1] =
 							KGSL_NOP_IB_IDENTIFIER;
-				rec_data->bad_rb_buffer[i+2] =
+				ft_data->bad_rb_buffer[i+2] =
 							KGSL_NOP_IB_IDENTIFIER;
 				break;
 			}
 		}
 
-		if ((i == (rec_data->bad_rb_size)) || (!rec_data->ib1)) {
-			KGSL_DRV_ERR(device, "Bad IB to NOP not found\n");
-			rec_data->step = FT_FAIL_BAD_CTXT_CMDS;
+		if ((i == (ft_data->bad_rb_size)) || (!ft_data->ib1)) {
+			KGSL_FT_ERR(device, "Bad IB to NOP not found\n");
+			ft_data->step = FT_FAIL_BAD_CTXT_CMDS;
 			goto play_good_cmds;
 		}
 
-		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
-				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
+		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
 
 		if (ret) {
-			KGSL_DRV_ERR(device, "NOP faulty IB unsuccessful\n");
-			rec_data->step = FT_SKIP_EOF_BAD_CTXT_CMDS;
+			KGSL_FT_INFO(device, "NOP faulty IB unsuccessful\n");
+			ft_data->step = FT_SKIP_EOF_BAD_CTXT_CMDS;
 		} else
 			goto play_good_cmds;
 	}
 
-	if (rec_data->step == FT_SKIP_EOF_BAD_CTXT_CMDS) {
+	if (ft_data->step == FT_SKIP_EOF_BAD_CTXT_CMDS) {
 
-		for (i = 0; i < rec_data->bad_rb_size; i++) {
-			if (rec_data->bad_rb_buffer[i] ==
-				KGSL_END_OF_FRAME_IDENTIFIER) {
-				rec_data->bad_rb_buffer[0] = cp_nop_packet(i);
+		for (i = 0; i < ft_data->bad_rb_size; i++) {
+			if (ft_data->bad_rb_buffer[i] ==
+					KGSL_END_OF_FRAME_IDENTIFIER) {
+				ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
 				break;
 			}
 		}
 
 		/* EOF not found in RB, discard till EOF in
 		   next IB submission */
-		if (i == rec_data->bad_rb_size) {
+		if (i == ft_data->bad_rb_size) {
 			adreno_context->flags |= CTXT_FLAGS_SKIP_EOF;
-			rec_data->bad_rb_buffer[0] = cp_nop_packet(i);
+			ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
 		}
 
-		ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
-				rec_data->bad_rb_buffer, rec_data->bad_rb_size);
+		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
 
 		if (ret) {
-			KGSL_DRV_ERR(device, "Skip EOF unsuccessful\n");
-			rec_data->step = FT_FAIL_BAD_CTXT_CMDS;
+			KGSL_FT_INFO(device, "Skip EOF unsuccessful\n");
+			ft_data->step = FT_FAIL_BAD_CTXT_CMDS;
 		} else
 			goto play_good_cmds;
 	}
 
 play_good_cmds:
 
-	if (rec_data->step == FT_FAIL_BAD_CTXT_CMDS)
-		KGSL_DRV_ERR(device, "Bad context commands failed\n");
+	if (ft_data->step == FT_FAIL_BAD_CTXT_CMDS)
+		KGSL_FT_ERR(device, "Bad context commands failed\n");
 	else {
+		KGSL_FT_INFO(device, "Bad context commands success\n");
 
 		if (adreno_context) {
 			adreno_context->flags = (adreno_context->flags &
-			~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_RECOVERED;
+				~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_FT;
 		}
 		adreno_dev->drawctxt_active = last_active_ctx;
 	}
 
-	ret = _adreno_recovery_resubmit(device, rb, context, rec_data,
-			rec_data->good_rb_buffer, rec_data->good_rb_size);
+	ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+			ft_data->good_rb_buffer, ft_data->good_rb_size);
 
 	if (ret) {
 		/* If we fail here we can try to invalidate another
 		 * context and try fault tolerance again */
 		ret = -EAGAIN;
-		KGSL_DRV_ERR(device, "Playing good commands unsuccessful\n");
+		KGSL_FT_ERR(device, "Playing good commands unsuccessful\n");
 		goto done;
-	}
-
+	} else
+		KGSL_FT_INFO(device, "Playing good commands successful\n");
 
 	/* ringbuffer now has data from the last valid context id,
 	 * so restore the active_ctx to the last valid context */
-	if (rec_data->last_valid_ctx_id) {
+	if (ft_data->last_valid_ctx_id) {
 		struct kgsl_context *last_ctx =
 				idr_find(&device->context_idr,
-				rec_data->last_valid_ctx_id);
+				ft_data->last_valid_ctx_id);
 		if (last_ctx)
 			adreno_dev->drawctxt_active = last_ctx->devctxt;
 	}
@@ -1842,40 +1849,42 @@
 }
 
 static int
-adreno_recover_hang(struct kgsl_device *device,
-			struct adreno_recovery_data *rec_data)
+adreno_ft(struct kgsl_device *device,
+			struct adreno_ft_data *ft_data)
 {
 	int ret = 0;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	unsigned int timestamp;
 
-	KGSL_DRV_ERR(device,
-	"Starting recovery from 3D GPU hang. Recovery parameters: IB1: 0x%X, "
+	KGSL_FT_INFO(device,
+	"Start Parameters: IB1: 0x%X, "
 	"Bad context_id: %u, global_eop: 0x%x\n",
-	rec_data->ib1, rec_data->context_id, rec_data->global_eop);
+	ft_data->ib1, ft_data->context_id, ft_data->global_eop);
 
 	timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
-	KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
+	KGSL_FT_INFO(device, "Last issued global timestamp: %x\n", timestamp);
 
 	/* We may need to replay commands multiple times based on whether
 	 * multiple contexts hang the GPU */
 	while (true) {
 
-		ret = _adreno_recover_hang(device, rec_data);
+		ret = _adreno_ft(device, ft_data);
+
+		KGSL_FT_CRIT(device, "POLICY: 0x%X\n", ft_data->step);
 
 		if (-EAGAIN == ret) {
-			/* setup new recovery parameters and retry, this
+			/* setup new fault tolerance parameters and retry, this
 			 * means more than 1 contexts are causing hang */
-			adreno_destroy_recovery_data(rec_data);
-			ret = adreno_setup_recovery_data(device, rec_data);
+			adreno_destroy_ft_data(ft_data);
+			ret = adreno_setup_ft_data(device, ft_data);
 			if (ret)
 				goto done;
-			KGSL_DRV_ERR(device,
-			"Retry recovery from 3D GPU hang. Recovery parameters: "
+			KGSL_FT_INFO(device,
+			"Retry. Parameters: "
 			"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
-			rec_data->ib1, rec_data->context_id,
-			rec_data->global_eop);
+			ft_data->ib1, ft_data->context_id,
+			ft_data->global_eop);
 		} else {
 			break;
 		}
@@ -1884,7 +1893,7 @@
 	if (ret)
 		goto done;
 
-	/* Restore correct states after recovery */
+	/* Restore correct states after fault tolerance */
 	if (adreno_dev->drawctxt_active)
 		device->mmu.hwpagetable =
 			adreno_dev->drawctxt_active->pagetable;
@@ -1903,34 +1912,32 @@
 done:
 	adreno_set_max_ts_for_bad_ctxs(device);
 	adreno_mark_context_status(device, ret);
-	if (!ret)
-		KGSL_DRV_ERR(device, "Recovery succeeded\n");
-	else
-		KGSL_DRV_ERR(device, "Recovery failed\n");
+	if (ret)
+		KGSL_FT_ERR(device, "Fault Tolerance failed\n");
 	return ret;
 }
 
 int
-adreno_dump_and_recover(struct kgsl_device *device)
+adreno_dump_and_exec_ft(struct kgsl_device *device)
 {
 	int result = -ETIMEDOUT;
-	struct adreno_recovery_data rec_data;
+	struct adreno_ft_data ft_data;
 
 	if (device->state == KGSL_STATE_HUNG)
 		goto done;
-	if (device->state == KGSL_STATE_DUMP_AND_RECOVER) {
+	if (device->state == KGSL_STATE_DUMP_AND_FT) {
 		mutex_unlock(&device->mutex);
-		wait_for_completion(&device->recovery_gate);
+		wait_for_completion(&device->ft_gate);
 		mutex_lock(&device->mutex);
 		if (device->state != KGSL_STATE_HUNG)
 			result = 0;
 	} else {
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_RECOVER);
-		INIT_COMPLETION(device->recovery_gate);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_FT);
+		INIT_COMPLETION(device->ft_gate);
 		/* Detected a hang */
 
-		/* Get the recovery data as soon as hang is detected */
-		result = adreno_setup_recovery_data(device, &rec_data);
+		/* Get the fault tolerance data as soon as hang is detected */
+		result = adreno_setup_ft_data(device, &ft_data);
 		/*
 		 * Trigger an automatic dump of the state to
 		 * the console
@@ -1944,8 +1951,8 @@
 		kgsl_device_snapshot(device, 1);
 
 		if (!result) {
-			result = adreno_recover_hang(device, &rec_data);
-			adreno_destroy_recovery_data(&rec_data);
+			result = adreno_ft(device, &ft_data);
+			adreno_destroy_ft_data(&ft_data);
 		}
 		if (result) {
 			kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
@@ -1953,12 +1960,12 @@
 			kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 			mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
 		}
-		complete_all(&device->recovery_gate);
+		complete_all(&device->ft_gate);
 	}
 done:
 	return result;
 }
-EXPORT_SYMBOL(adreno_dump_and_recover);
+EXPORT_SYMBOL(adreno_dump_and_exec_ft);
 
 static int adreno_getproperty(struct kgsl_device *device,
 				enum kgsl_property_type type,
@@ -2190,9 +2197,9 @@
 
 err:
 	KGSL_DRV_ERR(device, "spun too long waiting for RB to idle\n");
-	if (KGSL_STATE_DUMP_AND_RECOVER != device->state &&
-		!adreno_dump_and_recover(device)) {
-		wait_time = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
+	if (KGSL_STATE_DUMP_AND_FT != device->state &&
+		!adreno_dump_and_exec_ft(device)) {
+		wait_time = jiffies + ADRENO_IDLE_TIMEOUT;
 		goto retry;
 	}
 	return -ETIMEDOUT;
@@ -2593,7 +2600,8 @@
  * @context - pointer to the active KGSL context
  * @timestamp - the timestamp that the process was waiting for
  *
- * Process a possible GPU hang and try to recover from it cleanly
+ * Process a possible GPU hang and try fault tolerance from it
+ * cleanly
  */
 static int adreno_handle_hang(struct kgsl_device *device,
 	struct kgsl_context *context, unsigned int timestamp)
@@ -2621,8 +2629,8 @@
 			KGSL_TIMESTAMP_RETIRED),
 		      adreno_dev->ringbuffer.wptr, rptr);
 
-	/* Return 0 after a successful recovery */
-	if (!adreno_dump_and_recover(device))
+	/* Return 0 after a successful fault tolerance */
+	if (!adreno_dump_and_exec_ft(device))
 		return 0;
 
 	return -ETIMEDOUT;
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 03c82bf..23fce2f 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -134,8 +134,8 @@
 };
 
 /*
- * struct adreno_recovery_data - Structure that contains all information to
- * perform gpu recovery from hangs
+ * struct adreno_ft_data - Structure that contains all information to
+ * perform gpu fault tolerance
  * @ib1 - IB1 that the GPU was executing when hang happened
  * @context_id - Context which caused the hang
  * @global_eop - eoptimestamp at time of hang
@@ -147,15 +147,15 @@
  * good_rb_size - Number of valid dwords in good_rb_buffer
  * @last_valid_ctx_id - The last context from which commands were placed in
  * ringbuffer before the GPU hung
- * @step - Current recovery step being executed
- * @err_code - Recovery error code
+ * @step - Current fault tolerance step being executed
+ * @err_code - Fault tolerance error code
  * @fault - Indicates whether the hang was caused due to a pagefault
  * @start_of_replay_cmds - Offset in ringbuffer from where commands can be
- * replayed during recovery
+ * replayed during fault tolerance
  * @replay_for_snapshot - Offset in ringbuffer where IB's can be saved for
  * replaying with snapshot
  */
-struct adreno_recovery_data {
+struct adreno_ft_data {
 	unsigned int ib1;
 	unsigned int context_id;
 	unsigned int global_eop;
@@ -167,7 +167,6 @@
 	unsigned int good_rb_size;
 	unsigned int last_valid_ctx_id;
 	unsigned int step;
-	unsigned int err_code;
 	int fault;
 	unsigned int start_of_replay_cmds;
 	unsigned int replay_for_snapshot;
@@ -175,13 +174,12 @@
 
 enum ft_steps {
 	FT_REPLAY_BAD_CTXT_CMDS = 0,
-	FT_NOT_IB_BAD_CTXT_CMDS,
+	FT_NOP_IB_BAD_CTXT_CMDS,
 	FT_SKIP_EOF_BAD_CTXT_CMDS,
 	FT_FAIL_BAD_CTXT_CMDS,
 	FT_PLAY_GOOD_CTXT_CMDS
 };
 
-
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
 
@@ -229,7 +227,10 @@
 void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
 		int hang);
 
-int adreno_dump_and_recover(struct kgsl_device *device);
+int adreno_dump_and_exec_ft(struct kgsl_device *device);
+
+void adreno_dump_rb(struct kgsl_device *device, const void *buf,
+			 size_t len, int start, int size);
 
 unsigned int adreno_hang_detect(struct kgsl_device *device,
 						unsigned int *prev_reg_val);
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 6276a59..0778ccb 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -70,7 +70,7 @@
 const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
 
 /* Removed the following HLSQ register ranges from being read during
- * recovery since reading the registers may cause the device to hang:
+ * fault tolerance since reading the registers may cause the device to hang:
  */
 const unsigned int a3xx_hlsq_registers[] = {
 	0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 133f92f..2e8a0c1 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -44,8 +44,8 @@
 #define CTXT_FLAGS_TRASHSTATE		BIT(10)
 /* per context timestamps enabled */
 #define CTXT_FLAGS_PER_CONTEXT_TS	BIT(11)
-/* Context has caused a GPU hang and recovered properly */
-#define CTXT_FLAGS_GPU_HANG_RECOVERED	BIT(12)
+/* Context has caused a GPU hang and fault tolerance successful */
+#define CTXT_FLAGS_GPU_HANG_FT	BIT(12)
 /* Context is being destroyed so dont save it */
 #define CTXT_FLAGS_BEING_DESTROYED	BIT(13)
 /* User mode generated timestamps enabled */
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 2367bb9..164b607 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -268,7 +268,7 @@
 #endif
 }
 
-static void adreno_dump_rb(struct kgsl_device *device, const void *buf,
+void adreno_dump_rb(struct kgsl_device *device, const void *buf,
 			 size_t len, int start, int size)
 {
 	const uint32_t *ptr = buf;
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 6a8bffb..6fda86d 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -129,7 +129,7 @@
 		continue;
 
 err:
-		if (!adreno_dump_and_recover(rb->device)) {
+		if (!adreno_dump_and_exec_ft(rb->device)) {
 			if (context && context->flags & CTXT_FLAGS_GPU_HANG) {
 				KGSL_CTXT_WARN(rb->device,
 				"Context %p caused a gpu hang. Will not accept commands for context %d\n",
@@ -138,7 +138,7 @@
 			}
 			wait_time = jiffies + wait_timeout;
 		} else {
-			/* GPU is hung and we cannot recover */
+			/* GPU is hung and fault tolerance failed */
 			BUG();
 		}
 	}
@@ -572,7 +572,7 @@
 	if (adreno_is_a3xx(adreno_dev))
 		total_sizedwords += 7;
 
-	total_sizedwords += 2; /* scratchpad ts for recovery */
+	total_sizedwords += 2; /* scratchpad ts for fault tolerance */
 	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
 			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		total_sizedwords += 3; /* sop timestamp */
@@ -580,7 +580,7 @@
 		total_sizedwords += 3; /* global timestamp without cache
 					* flush for non-zero context */
 	} else {
-		total_sizedwords += 4; /* global timestamp for recovery*/
+		total_sizedwords += 4; /* global timestamp for fault tolerance*/
 	}
 
 	ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
@@ -632,7 +632,7 @@
 	}
 	timestamp = rb->timestamp[context_id];
 
-	/* scratchpad ts for recovery */
+	/* scratchpad ts for fault tolerance */
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 
@@ -982,7 +982,7 @@
 	drawctxt = context->devctxt;
 
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
-		KGSL_CTXT_ERR(device, "Context %p caused a gpu hang.."
+		KGSL_CTXT_ERR(device, "Context %p failed fault tolerance"
 			" will not accept commands for context %d\n",
 			drawctxt, drawctxt->id);
 		return -EDEADLK;
@@ -990,7 +990,7 @@
 
 	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
 		KGSL_CTXT_ERR(device,
-			"Context %p caused a gpu hang.."
+			"Context %p triggered fault tolerance"
 			" skipping commands for context till EOF %d\n",
 			drawctxt, drawctxt->id);
 		if (flags & KGSL_CMD_FLAGS_EOF)
@@ -1064,11 +1064,14 @@
 	adreno_idle(device);
 #endif
 
-	/* If context hung and recovered then return error so that the
-	 * application may handle it */
-	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_RECOVERED)
-		return -EAGAIN;
-	else
+	/*
+	 * If context hung and recovered then return error so that the
+	 * application may handle it
+	 */
+	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) {
+		drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT;
+		return -EPROTO;
+	} else
 		return 0;
 }
 
@@ -1094,7 +1097,7 @@
 				kgsl_sharedmem_writel(&rb->buffer_desc,
 					temp_rb_rptr, cp_nop_packet(1));
 			}
-			KGSL_DRV_ERR(rb->device,
+			KGSL_FT_INFO(rb->device,
 			"Turned preamble on at offset 0x%x\n",
 			temp_rb_rptr / 4);
 			break;
@@ -1117,10 +1120,10 @@
 }
 
 void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
-				struct adreno_recovery_data *rec_data)
+				struct adreno_ft_data *ft_data)
 {
 	struct kgsl_device *device = rb->device;
-	unsigned int rb_rptr = rec_data->start_of_replay_cmds;
+	unsigned int rb_rptr = ft_data->start_of_replay_cmds;
 	unsigned int good_rb_idx = 0, bad_rb_idx = 0, temp_rb_idx = 0;
 	unsigned int last_good_cmd_end_idx = 0, last_bad_cmd_end_idx = 0;
 	unsigned int cmd_start_idx = 0;
@@ -1130,21 +1133,21 @@
 	struct kgsl_context *k_ctxt;
 	struct adreno_context *a_ctxt;
 	unsigned int size = rb->buffer_desc.size;
-	unsigned int *temp_rb_buffer = rec_data->rb_buffer;
-	int *rb_size = &rec_data->rb_size;
-	unsigned int *bad_rb_buffer = rec_data->bad_rb_buffer;
-	int *bad_rb_size = &rec_data->bad_rb_size;
-	unsigned int *good_rb_buffer = rec_data->good_rb_buffer;
-	int *good_rb_size = &rec_data->good_rb_size;
+	unsigned int *temp_rb_buffer = ft_data->rb_buffer;
+	int *rb_size = &ft_data->rb_size;
+	unsigned int *bad_rb_buffer = ft_data->bad_rb_buffer;
+	int *bad_rb_size = &ft_data->bad_rb_size;
+	unsigned int *good_rb_buffer = ft_data->good_rb_buffer;
+	int *good_rb_size = &ft_data->good_rb_size;
 
 	/*
 	 * If the start index from where commands need to be copied is invalid
 	 * then no need to save off any commands
 	 */
-	if (0xFFFFFFFF == rec_data->start_of_replay_cmds)
+	if (0xFFFFFFFF == ft_data->start_of_replay_cmds)
 		return;
 
-	k_ctxt = idr_find(&device->context_idr, rec_data->context_id);
+	k_ctxt = idr_find(&device->context_idr, ft_data->context_id);
 	if (k_ctxt) {
 		a_ctxt = k_ctxt->devctxt;
 		if (a_ctxt->flags & CTXT_FLAGS_PREAMBLE)
@@ -1194,7 +1197,7 @@
 					temp_idx++)
 					good_rb_buffer[good_rb_idx++] =
 						temp_rb_buffer[temp_idx];
-				rec_data->last_valid_ctx_id = val2;
+				ft_data->last_valid_ctx_id = val2;
 				copy_rb_contents = 1;
 				/* remove the good commands from bad buffer */
 				bad_rb_idx = last_bad_cmd_end_idx;
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index ebbeb65..d65b91f 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -27,7 +27,7 @@
 
 struct kgsl_device;
 struct kgsl_device_private;
-struct adreno_recovery_data;
+struct adreno_ft_data;
 
 #define GSL_RB_MEMPTRS_SCRATCH_COUNT	 8
 struct kgsl_rbmemptrs {
@@ -120,7 +120,7 @@
 void kgsl_cp_intrcallback(struct kgsl_device *device);
 
 void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
-				struct adreno_recovery_data *rec_data);
+				struct adreno_ft_data *ft_data);
 
 void
 adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff,
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index c61da62..1d32302 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -2631,7 +2631,7 @@
 	/* On a manual trigger, turn on the interrupts and put
 	   the clocks to sleep.  They will recover themselves
 	   on the next event.  For a hang, leave things as they
-	   are until recovery kicks in. */
+	   are until fault tolerance kicks in. */
 
 	if (manual) {
 		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index d4721bb..991570b 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -105,6 +105,7 @@
 KGSL_DEBUGFS_LOG(ctxt_log);
 KGSL_DEBUGFS_LOG(mem_log);
 KGSL_DEBUGFS_LOG(pwr_log);
+KGSL_DEBUGFS_LOG(ft_log);
 
 void kgsl_device_debugfs_init(struct kgsl_device *device)
 {
@@ -120,6 +121,7 @@
 	device->drv_log = KGSL_LOG_LEVEL_DEFAULT;
 	device->mem_log = KGSL_LOG_LEVEL_DEFAULT;
 	device->pwr_log = KGSL_LOG_LEVEL_DEFAULT;
+	device->ft_log = KGSL_LOG_LEVEL_DEFAULT;
 
 	debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device,
 			    &cmd_log_fops);
@@ -131,6 +133,8 @@
 				&mem_log_fops);
 	debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
 				&pwr_log_fops);
+	debugfs_create_file("log_level_ft", 0644, device->d_debugfs, device,
+				&ft_log_fops);
 
 	/* Create postmortem dump control files */
 
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 322ad08..805e54b 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -46,7 +46,7 @@
 #define KGSL_STATE_SLEEP	0x00000008
 #define KGSL_STATE_SUSPEND	0x00000010
 #define KGSL_STATE_HUNG		0x00000020
-#define KGSL_STATE_DUMP_AND_RECOVER	0x00000040
+#define KGSL_STATE_DUMP_AND_FT	0x00000040
 #define KGSL_STATE_SLUMBER	0x00000080
 
 #define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK  0x1000000
@@ -169,7 +169,7 @@
 	wait_queue_head_t wait_queue;
 	struct workqueue_struct *work_queue;
 	struct device *parentdev;
-	struct completion recovery_gate;
+	struct completion ft_gate;
 	struct dentry *d_debugfs;
 	struct idr context_idr;
 	struct early_suspend display_off;
@@ -195,6 +195,7 @@
 	int drv_log;
 	int mem_log;
 	int pwr_log;
+	int ft_log;
 	struct kgsl_pwrscale pwrscale;
 	struct kobject pwrscale_kobj;
 	struct pm_qos_request pm_qos_req_dma;
@@ -212,7 +213,7 @@
 #define KGSL_DEVICE_COMMON_INIT(_dev) \
 	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
 	.suspend_gate = COMPLETION_INITIALIZER((_dev).suspend_gate),\
-	.recovery_gate = COMPLETION_INITIALIZER((_dev).recovery_gate),\
+	.ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\
 	.ts_notifier_list = ATOMIC_NOTIFIER_INIT((_dev).ts_notifier_list),\
 	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
 			kgsl_idle_check),\
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
index 81a35e0..83d14f7 100644
--- a/drivers/gpu/msm/kgsl_log.h
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -103,6 +103,15 @@
 #define KGSL_PWR_CRIT(_dev, fmt, args...) \
 KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args)
 
+#define KGSL_FT_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_ERR(_dev, fmt, args...) \
+KGSL_LOG_ERR(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->ft_log, fmt, ##args)
+
 /* Core error messages - these are for core KGSL functions that have
    no device associated with them (such as memory) */
 
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 10737c9..d489119 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1024,7 +1024,7 @@
 			}
 		}
 	} else if (device->state & (KGSL_STATE_HUNG |
-					KGSL_STATE_DUMP_AND_RECOVER)) {
+					KGSL_STATE_DUMP_AND_FT)) {
 		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 	}
 
@@ -1063,7 +1063,7 @@
 		break;
 	case KGSL_STATE_INIT:
 	case KGSL_STATE_HUNG:
-	case KGSL_STATE_DUMP_AND_RECOVER:
+	case KGSL_STATE_DUMP_AND_FT:
 		if (test_bit(KGSL_PWRFLAGS_CLK_ON,
 					 &device->pwrctrl.power_flags))
 			break;
@@ -1087,9 +1087,9 @@
 		mutex_unlock(&device->mutex);
 		wait_for_completion(&device->hwaccess_gate);
 		mutex_lock(&device->mutex);
-	} else if (device->state == KGSL_STATE_DUMP_AND_RECOVER) {
+	} else if (device->state == KGSL_STATE_DUMP_AND_FT) {
 		mutex_unlock(&device->mutex);
-		wait_for_completion(&device->recovery_gate);
+		wait_for_completion(&device->ft_gate);
 		mutex_lock(&device->mutex);
 	} else if (device->state == KGSL_STATE_SLUMBER)
 		kgsl_pwrctrl_wake(device);
@@ -1312,7 +1312,7 @@
 		return "SUSPEND";
 	case KGSL_STATE_HUNG:
 		return "HUNG";
-	case KGSL_STATE_DUMP_AND_RECOVER:
+	case KGSL_STATE_DUMP_AND_FT:
 		return "DNR";
 	case KGSL_STATE_SLUMBER:
 		return "SLUMBER";
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index 1adcf55..d0fd54c 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -539,7 +539,7 @@
 	/* Freeze the snapshot on a hang until it gets read */
 	device->snapshot_frozen = (hang) ? 1 : 0;
 
-	/* log buffer info to aid in ramdump recovery */
+	/* log buffer info to aid in ramdump fault tolerance */
 	KGSL_DRV_ERR(device, "snapshot created at va %p pa %lx size %d\n",
 			device->snapshot, __pa(device->snapshot),
 			device->snapshot_size);