msm: kgsl: On pagefault stall GPU so that it hangs
Prevent the iommu fault handler from clearing the stalled status
of GPU on page fault. This will cause a GPU hang and print out
the snapshot that will help in fault analysis.
Change-Id: I9dcab83a098a988f86a0c03c46b0dbe6624de937
Signed-off-by: Shubhraprakash Das <sadas@codeaurora.org>
Signed-off-by: Rajeev Kulkarni <krajeev@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index f394422..6a8bde8 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1464,6 +1464,7 @@
ret = -ENOMEM;
goto done;
}
+ rec_data->fault = device->mmu.fault;
done:
if (ret) {
@@ -1526,9 +1527,9 @@
goto done;
}
- /* Do not try the bad caommands if recovery has failed bad commands
- * once already */
- if (!try_bad_commands)
+ /* Do not try the bad commands if recovery has failed bad commands
+ * once already or if hang is due to a fault */
+ if (!try_bad_commands || rec_data->fault)
rec_data->bad_rb_size = 0;
if (rec_data->bad_rb_size) {
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 66402fe..75c52e2 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -138,6 +138,7 @@
* bad_rb_size - Number of valid dwords in bad_rb_buffer
* @last_valid_ctx_id - The last context from which commands were placed in
* ringbuffer before the GPU hung
+ * @fault - Indicates whether the hang was caused due to a pagefault
*/
struct adreno_recovery_data {
unsigned int ib1;
@@ -148,6 +149,7 @@
unsigned int *bad_rb_buffer;
unsigned int bad_rb_size;
unsigned int last_valid_ctx_id;
+ int fault;
};
extern struct adreno_gpudev adreno_a2xx_gpudev;
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 87e8746..5eabc4d 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -34,6 +34,7 @@
{ 0x14, 0x0003FFFF, 14 }, /* TTBR1 */
{ 0x20, 0, 0 }, /* FSR */
{ 0x800, 0, 0 }, /* TLBIALL */
+ { 0x820, 0, 0 }, /* RESUME */
};
static struct kgsl_iommu_register_list kgsl_iommuv2_reg[KGSL_IOMMU_REG_MAX] = {
@@ -41,7 +42,8 @@
{ 0x20, 0x00FFFFFF, 14 }, /* TTBR0 */
{ 0x28, 0x00FFFFFF, 14 }, /* TTBR1 */
{ 0x58, 0, 0 }, /* FSR */
- { 0x618, 0, 0 } /* TLBIALL */
+ { 0x618, 0, 0 }, /* TLBIALL */
+ { 0x008, 0, 0 } /* RESUME */
};
static int get_iommu_unit(struct device *dev, struct kgsl_mmu **mmu_out,
@@ -124,9 +126,19 @@
KGSL_MEM_CRIT(iommu_dev->kgsldev, "context = %d FSR = %X\n",
iommu_dev->ctx_id, fsr);
+ mmu->fault = 1;
+ iommu_dev->fault = 1;
+
trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase), 0);
+ /*
+ * We do not want the h/w to resume fetching data from an iommu unit
+ * that has faulted, this is better for debugging as it will stall
+ * the GPU and trigger a snapshot. To stall the transaction return
+ * EBUSY error.
+ */
+ ret = -EBUSY;
done:
return ret;
}
@@ -859,12 +871,13 @@
*/
for (i = 0; i < iommu->unit_count; i++) {
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
- for (j = 0; j < iommu_unit->dev_count; j++)
+ for (j = 0; j < iommu_unit->dev_count; j++) {
iommu_unit->dev[j].pt_lsb = KGSL_IOMMMU_PT_LSB(iommu,
KGSL_IOMMU_GET_CTX_REG(iommu,
iommu_unit,
iommu_unit->dev[j].ctx_id,
TTBR0));
+ }
}
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
@@ -945,6 +958,7 @@
static void kgsl_iommu_stop(struct kgsl_mmu *mmu)
{
struct kgsl_iommu *iommu = mmu->priv;
+ int i, j;
/*
* stop device mmu
*
@@ -957,8 +971,25 @@
mmu->hwpagetable = NULL;
mmu->flags &= ~KGSL_FLAGS_STARTED;
- }
+ if (mmu->fault) {
+ for (i = 0; i < iommu->unit_count; i++) {
+ struct kgsl_iommu_unit *iommu_unit =
+ &iommu->iommu_units[i];
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ if (iommu_unit->dev[j].fault) {
+ kgsl_iommu_enable_clk(mmu, j);
+ KGSL_IOMMU_SET_CTX_REG(iommu,
+ iommu_unit,
+ iommu_unit->dev[j].ctx_id,
+ RESUME, 1);
+ iommu_unit->dev[j].fault = 0;
+ }
+ }
+ }
+ mmu->fault = 0;
+ }
+ }
/* switch off MMU clocks and cancel any events it has queued */
iommu->clk_event_queued = false;
kgsl_cancel_events(mmu->device, mmu);
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index eafba7b..661b4f0 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -25,6 +25,7 @@
KGSL_IOMMU_CTX_TTBR1,
KGSL_IOMMU_CTX_FSR,
KGSL_IOMMU_CTX_TLBIALL,
+ KGSL_IOMMU_CTX_RESUME,
KGSL_IOMMU_REG_MAX
};
@@ -77,6 +78,8 @@
* @ctx_id: This iommu units context id. It can be either 0 or 1
* @clk_enabled: If set indicates that iommu clocks of this iommu context
* are on, else the clocks are off
+ * fault: Flag when set indicates that this iommu device has caused a page
+ * fault
*/
struct kgsl_iommu_device {
struct device *dev;
@@ -85,6 +88,7 @@
enum kgsl_iommu_context_id ctx_id;
bool clk_enabled;
struct kgsl_device *kgsldev;
+ int fault;
};
/*
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index 234629b..b8b9149 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -175,6 +175,7 @@
struct kgsl_pagetable *hwpagetable;
const struct kgsl_mmu_ops *mmu_ops;
void *priv;
+ int fault;
};
#include "kgsl_gpummu.h"