msm: kgsl: Prevent fault tolerance memory free twice
There is a possibility of fault tolerance data to be freed twice
if fault tolerance fails. Do not free the data when the data allocation
fails, take the decision to free the data only at the end of
fault tolerance.
CRs-Fixed: 469807
Change-Id: Ifcc9a75e70440be6471b32d8413f4faa01a9ee47
Signed-off-by: Tarun Karra <tkarra@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 5578b12..b49261c 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -18,6 +18,7 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/msm_kgsl.h>
+#include <linux/delay.h>
#include <mach/socinfo.h>
#include <mach/msm_bus_board.h>
@@ -1559,7 +1560,7 @@
return status;
}
-static int adreno_setup_ft_data(struct kgsl_device *device,
+static void adreno_setup_ft_data(struct kgsl_device *device,
struct adreno_ft_data *ft_data)
{
int ret = 0;
@@ -1591,26 +1592,24 @@
if (!ft_data->rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- return -ENOMEM;
+ return;
}
ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
if (!ft_data->bad_rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- ret = -ENOMEM;
- goto done;
+ return;
}
ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
if (!ft_data->good_rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- ret = -ENOMEM;
- goto done;
+ return;
}
- ft_data->status = 0;
+ ft_data->status = 0;
/* find the start of bad command sequence in rb */
context = idr_find(&device->context_idr, ft_data->context_id);
@@ -1621,20 +1620,23 @@
* If there is no context then fault tolerance does not need to
* replay anything, just reset GPU and thats it
*/
- goto done;
+ return;
}
- ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
- ft_data->global_eop + 1, false);
- if (ret)
- goto done;
- ft_data->start_of_replay_cmds = rb_rptr;
+ ft_data->ft_policy = adreno_dev->ft_policy;
if (!adreno_dev->ft_policy)
adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
- ft_data->ft_policy = adreno_dev->ft_policy;
+ ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
+ ft_data->global_eop + 1, false);
+ if (ret) {
+ ft_data->ft_policy |= KGSL_FT_TEMP_DISABLE;
+ return;
+ } else
+ ft_data->ft_policy &= ~KGSL_FT_TEMP_DISABLE;
+ ft_data->start_of_replay_cmds = rb_rptr;
adreno_context = context->devctxt;
if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
@@ -1644,21 +1646,12 @@
if (ret) {
KGSL_FT_ERR(device,
"Start not found for replay IB sequence\n");
- ret = 0;
- goto done;
+ return;
}
ft_data->start_of_replay_cmds = rb_rptr;
ft_data->replay_for_snapshot = rb_rptr;
}
}
-
-done:
- if (ret) {
- vfree(ft_data->rb_buffer);
- vfree(ft_data->bad_rb_buffer);
- vfree(ft_data->good_rb_buffer);
- }
- return ret;
}
static int
@@ -1691,8 +1684,7 @@
static int
_adreno_ft_restart_device(struct kgsl_device *device,
- struct kgsl_context *context,
- struct adreno_ft_data *ft_data)
+ struct kgsl_context *context)
{
struct adreno_context *adreno_context = context->devctxt;
@@ -1762,11 +1754,30 @@
unsigned int *buff, unsigned int size)
{
unsigned int ret = 0;
+ unsigned int retry_num = 0;
_adreno_debug_ft_info(device, ft_data);
- if (_adreno_ft_restart_device(device, context, ft_data))
- return 1;
+ do {
+ ret = _adreno_ft_restart_device(device, context);
+ if (ret == 0)
+ break;
+ /*
+ * If device restart fails sleep for 20ms before
+ * attempting restart. This allows GPU HW to settle
+ * and improve the chances of next restart to be
+ * successful.
+ */
+ msleep(20);
+ KGSL_FT_ERR(device, "Retry device restart %d\n", retry_num);
+ retry_num++;
+ } while (retry_num < 4);
+
+ if (ret) {
+ KGSL_FT_ERR(device, "Device restart failed\n");
+ BUG_ON(1);
+ goto done;
+ }
if (size) {
@@ -1776,10 +1787,10 @@
ret = adreno_idle(device);
}
+done:
return ret;
}
-
static int
_adreno_ft(struct kgsl_device *device,
struct adreno_ft_data *ft_data)
@@ -1790,11 +1801,13 @@
struct kgsl_context *context;
struct adreno_context *adreno_context = NULL;
struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
+ unsigned int long_ib = 0;
context = idr_find(&device->context_idr, ft_data->context_id);
if (context == NULL) {
- KGSL_FT_CRIT(device, "Last context unknown id:%d\n",
+ KGSL_FT_ERR(device, "Last context unknown id:%d\n",
ft_data->context_id);
+ goto play_good_cmds;
} else {
adreno_context = context->devctxt;
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
@@ -1804,6 +1817,12 @@
*/
context->wait_on_invalid_ts = false;
+ if (!(adreno_context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
+ ft_data->status = 1;
+ KGSL_FT_ERR(device, "Fault tolerance not supported\n");
+ goto play_good_cmds;
+ }
+
/*
* This flag will be set by userspace for contexts
* that do not want to be fault tolerant (ex: OPENCL)
@@ -1817,23 +1836,34 @@
}
+ /* Check if we detected a long running IB,
+ * if true do not attempt replay of bad cmds */
+ if (adreno_dev->long_ib) {
+ long_ib = _adreno_check_long_ib(device);
+ if (!long_ib) {
+ adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
+ return 0;
+ }
+ }
+
/*
* Extract valid contents from rb which can still be executed after
* hang
*/
adreno_ringbuffer_extract(rb, ft_data);
- /* Check if we detected a long running IB,
- * if true do not attempt replay of bad cmds */
- if (adreno_dev->long_ib) {
- if (_adreno_check_long_ib(device)) {
- ft_data->status = 1;
- _adreno_debug_ft_info(device, ft_data);
- goto play_good_cmds;
- } else {
- adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
- return 0;
- }
+ /* If long IB detected do not attempt replay of bad cmds */
+ if (long_ib) {
+ ft_data->status = 1;
+ _adreno_debug_ft_info(device, ft_data);
+ goto play_good_cmds;
+ }
+
+ if ((ft_data->ft_policy & KGSL_FT_DISABLE) ||
+ (ft_data->ft_policy & KGSL_FT_TEMP_DISABLE)) {
+ KGSL_FT_ERR(device, "NO FT policy play only good cmds\n");
+ ft_data->status = 1;
+ goto play_good_cmds;
}
/* Do not try the reply if hang is due to a pagefault */
@@ -1847,26 +1877,18 @@
adreno_context->pagefault = 0;
}
- if (ft_data->ft_policy & KGSL_FT_DISABLE) {
- KGSL_FT_ERR(device, "NO FT policy play only good cmds\n");
- ft_data->status = 1;
- goto play_good_cmds;
- }
-
if (ft_data->ft_policy & KGSL_FT_REPLAY) {
-
ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "Replay unsuccessful\n");
+ KGSL_FT_ERR(device, "Replay status: 1\n");
ft_data->status = 1;
} else
goto play_good_cmds;
}
if (ft_data->ft_policy & KGSL_FT_SKIPIB) {
-
for (i = 0; i < ft_data->bad_rb_size; i++) {
if ((ft_data->bad_rb_buffer[i] ==
CP_HDR_INDIRECT_BUFFER_PFD) &&
@@ -1891,7 +1913,7 @@
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "NOP faulty IB unsuccessful\n");
+ KGSL_FT_ERR(device, "NOP faulty IB status: 1\n");
ft_data->status = 1;
} else {
ft_data->status = 0;
@@ -1900,7 +1922,6 @@
}
if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) {
-
for (i = 0; i < ft_data->bad_rb_size; i++) {
if (ft_data->bad_rb_buffer[i] ==
KGSL_END_OF_FRAME_IDENTIFIER) {
@@ -1922,7 +1943,7 @@
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "Skip EOF unsuccessful\n");
+ KGSL_FT_ERR(device, "Skip EOF status: 1\n");
ft_data->status = 1;
} else {
ft_data->status = 0;
@@ -2000,9 +2021,7 @@
/* setup new fault tolerance parameters and retry, this
* means more than 1 contexts are causing hang */
adreno_destroy_ft_data(ft_data);
- ret = adreno_setup_ft_data(device, ft_data);
- if (ret)
- goto done;
+ adreno_setup_ft_data(device, ft_data);
KGSL_FT_INFO(device,
"Retry. Parameters: "
"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
@@ -2067,7 +2086,7 @@
kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel);
/* Get the fault tolerance data as soon as hang is detected */
- result = adreno_setup_ft_data(device, &ft_data);
+ adreno_setup_ft_data(device, &ft_data);
/*
* If long ib is detected, do not attempt postmortem or
@@ -2089,10 +2108,8 @@
kgsl_device_snapshot(device, 1);
}
- if (!result) {
- result = adreno_ft(device, &ft_data);
- adreno_destroy_ft_data(&ft_data);
- }
+ result = adreno_ft(device, &ft_data);
+ adreno_destroy_ft_data(&ft_data);
/* restore power level */
kgsl_pwrctrl_pwrlevel_change(device, curr_pwrlevel);
@@ -2245,39 +2262,6 @@
status = 0;
}
break;
- case KGSL_PROP_FAULT_TOLERANCE: {
- struct kgsl_ft_config ftd;
-
- if (adreno_dev->ft_user_control == 0)
- break;
-
- if (sizebytes != sizeof(ftd))
- break;
-
- if (copy_from_user(&ftd, (void __user *) value,
- sizeof(ftd))) {
- status = -EFAULT;
- break;
- }
-
- if (ftd.ft_policy)
- adreno_dev->ft_policy = ftd.ft_policy;
- else
- adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
-
- if (ftd.ft_pf_policy)
- adreno_dev->ft_pf_policy = ftd.ft_policy;
- else
- adreno_dev->ft_pf_policy =
- KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
-
- if (ftd.ft_pm_dump)
- device->pm_dump_enable = 1;
- else
- device->pm_dump_enable = 0;
-
- }
- break;
default:
break;
}
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index d319c98..8d16bfa 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -179,6 +179,22 @@
unsigned int replay_for_snapshot;
};
+/* Fault Tolerance policy flags */
+#define KGSL_FT_DISABLE BIT(0)
+#define KGSL_FT_REPLAY BIT(1)
+#define KGSL_FT_SKIPIB BIT(2)
+#define KGSL_FT_SKIPFRAME BIT(3)
+#define KGSL_FT_TEMP_DISABLE BIT(4)
+#define KGSL_FT_DEFAULT_POLICY (KGSL_FT_REPLAY + KGSL_FT_SKIPIB)
+
+/* Pagefault policy flags */
+#define KGSL_FT_PAGEFAULT_INT_ENABLE 0x00000001
+#define KGSL_FT_PAGEFAULT_GPUHALT_ENABLE 0x00000002
+#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE 0x00000004
+#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT 0x00000008
+#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY (KGSL_FT_PAGEFAULT_INT_ENABLE + \
+ KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+
extern struct adreno_gpudev adreno_a2xx_gpudev;
extern struct adreno_gpudev adreno_a3xx_gpudev;
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index cf1cf90..5fdcf19 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -728,7 +728,7 @@
if (!device->pm_dump_enable) {
KGSL_LOG_DUMP(device,
- "RBBM STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
+ "STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
" | RPTR: %04X | WPTR: %04X\n",
rbbm_status, cp_ib1_base, cp_ib1_bufsz, cp_ib2_base,
cp_ib2_bufsz, cp_rb_rptr, cp_rb_wptr);