msm: kgsl: Capture more interesting GPU buffers from a hang
Freeze more essential GPU buffers and push them into the snapshot
dump. Add visibilty stream buffers, VBOs, index buffers and general
purpose GPU buffers to the list of frozen buffers. Parse the captured
IBs for type0 and type3 writes that indicate a buffer of interest
and add them to the dump list.
Change-Id: Ic0dedbad1ee978b951abedacb0c70481a1a7e38f
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index 7e684c0..970f377 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -197,8 +197,10 @@
#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
#define A3XX_VFD_CONTROL_0 0x2240
#define A3XX_VFD_INDEX_MIN 0x2242
+#define A3XX_VFD_INDEX_MAX 0x2243
#define A3XX_VFD_FETCH_INSTR_0_0 0x2246
#define A3XX_VFD_FETCH_INSTR_0_4 0x224E
+#define A3XX_VFD_FETCH_INSTR_1_F 0x2265
#define A3XX_VFD_DECODE_INSTR_0 0x2266
#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E
#define A3XX_VPC_ATTR 0x2280
@@ -210,11 +212,13 @@
#define A3XX_SP_VS_OUT_REG_7 0x22CE
#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
#define A3XX_SP_VS_LENGTH_REG 0x22DF
#define A3XX_SP_FS_CTRL_REG0 0x22E0
#define A3XX_SP_FS_CTRL_REG1 0x22E1
#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index 420a941..6e85ec6 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -58,6 +58,9 @@
/* register read/modify/write */
#define CP_REG_RMW 0x21
+/* Set binning configuration registers */
+#define CP_SET_BIN_DATA 0x2f
+
/* reads register in chip and writes to memory */
#define CP_REG_TO_MEM 0x3e
@@ -197,7 +200,13 @@
#define cp_nop_packet(cnt) \
(CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8))
-#define pkt_is_type3(pkt) ((pkt) & CP_TYPE3_PKT)
+#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT)
+
+#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+
+#define pkt_is_type3(pkt) (((pkt) & 0xC0000000) == CP_TYPE3_PKT)
+
#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index 082df4b..855db6d 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -17,6 +17,7 @@
#include "adreno.h"
#include "adreno_pm4types.h"
#include "a2xx_reg.h"
+#include "a3xx_reg.h"
/* Number of dwords of ringbuffer history to record */
#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
@@ -107,6 +108,59 @@
return 0;
}
+/*
+ * This structure keeps track of type0 writes to VSC_PIPE_DATA_ADDRESS_x and
+ * VSC_PIPE_DATA_LENGTH_x. When a draw initator is called these registers
+ * point to buffers that we need to freeze for a snapshot
+ */
+
+static struct {
+ unsigned int base;
+ unsigned int size;
+} vsc_pipe[8];
+
+/*
+ * This is the cached value of type0 writes to the VSC_SIZE_ADDRESS which
+ * contains the buffer address of the visiblity stream size buffer during a
+ * binning pass
+ */
+
+static unsigned int vsc_size_address;
+
+/*
+ * This struct keeps track of type0 writes to VFD_FETCH_INSTR_0_X and
+ * VFD_FETCH_INSTR_1_X registers. When a draw initator is called the addresses
+ * and sizes in these registers point to VBOs that we need to freeze for a
+ * snapshot
+ */
+
+static struct {
+ unsigned int base;
+ unsigned int stride;
+} vbo[16];
+
+/*
+ * This is the cached value of type0 writes to VFD_INDEX_MAX. This will be used
+ * to calculate the size of the VBOs when the draw initator is called
+ */
+
+static unsigned int vfd_index_max;
+
+/*
+ * This is the cached value of type0 writes to VFD_CONTROL_0 which tells us how
+ * many VBOs are active when the draw initator is called
+ */
+
+static unsigned int vfd_control_0;
+
+/*
+ * Cached value of type0 writes to SP_VS_PVT_MEM_ADDR and SP_FS_PVT_MEM_ADDR.
+ * This is a buffer that contains private stack information for the shader
+ */
+
+static unsigned int sp_vs_pvt_mem_addr;
+static unsigned int sp_fs_pvt_mem_addr;
+
static void ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
unsigned int ptbase)
{
@@ -153,8 +207,146 @@
}
/*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
+
+static void ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
+ unsigned int ptbase)
+{
+ int ret;
+
+ if (type3_pkt_size(pkt[0]) < 2)
+ return;
+
+ /* Visiblity stream buffer */
+ ret = kgsl_snapshot_get_object(device, ptbase, pkt[1], 0,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+
+ /* visiblity stream size buffer (fixed size 8 dwords) */
+ ret = kgsl_snapshot_get_object(device, ptbase, pkt[2], 32,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+}
+
+/*
+ * This opcode writes to GPU memory - if the buffer is written to, there is a
+ * good chance that it would be valuable to capture in the snapshot, so mark all
+ * buffers that are written to as frozen
+ */
+
+static void ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
+ unsigned int ptbase)
+{
+ int ret;
+
+ if (type3_pkt_size(pkt[0]) < 1)
+ return;
+
+ /*
+ * The address is where the data in the rest of this packet is written
+ * to, but since that might be an offset into the larger buffer we need
+ * to get the whole thing. Pass a size of 0 kgsl_snapshot_get_object to
+ * capture the entire buffer.
+ */
+
+ ret = kgsl_snapshot_get_object(device, ptbase, pkt[1] & 0xFFFFFFFC, 0,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+
+ snapshot_frozen_objsize += ret;
+}
+
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid". The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
+
+static void ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+ unsigned int ptbase)
+{
+ int ret, i;
+
+ if (type3_pkt_size(pkt[0]) < 3)
+ return;
+
+ /* DRAW_IDX may have a index buffer pointer */
+
+ if (type3_pkt_size(pkt[0]) > 3) {
+ ret = kgsl_snapshot_get_object(device, ptbase, pkt[4], pkt[5],
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+ }
+
+ /*
+ * All of the type0 writes are valid at a draw initiator, so freeze
+ * the various buffers that we are tracking
+ */
+
+ /* First up the visiblity stream buffer */
+
+ for (i = 0; i < ARRAY_SIZE(vsc_pipe); i++) {
+ if (vsc_pipe[i].base != 0 && vsc_pipe[i].size != 0) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ vsc_pipe[i].base, vsc_pipe[i].size,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+ }
+ }
+
+ /* Next the visibility stream size buffer */
+
+ if (vsc_size_address) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ vsc_size_address, 32,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+ }
+
+ /* Next private shader buffer memory */
+ if (sp_vs_pvt_mem_addr) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ sp_vs_pvt_mem_addr, 8192,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+
+ snapshot_frozen_objsize += ret;
+ }
+
+ if (sp_fs_pvt_mem_addr) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ sp_fs_pvt_mem_addr, 8192,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+ }
+
+ /* Finally: VBOs */
+
+ /* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
+ for (i = 0; i < (vfd_control_0) >> 27; i++) {
+ int size;
+
+ /*
+ * The size of the VBO is the stride stored in
+ * VFD_FETCH_INSTR_0_X.BUFSTRIDE * VFD_INDEX_MAX. The base
+ * is stored in VFD_FETCH_INSTR_1_X
+ */
+
+ if (vbo[i].base != 0) {
+ size = vbo[i].stride * vfd_index_max;
+
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ vbo[i].base,
+ 0, SNAPSHOT_GPU_OBJECT_GENERIC);
+ snapshot_frozen_objsize += ret;
+ }
+ }
+}
+
+/*
* Parse all the type3 opcode packets that may contain important information,
- * such as additional GPU buffers to grab
+ * such as additional GPU buffers to grab or a draw initator
*/
static void ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
@@ -164,6 +356,96 @@
case CP_LOAD_STATE:
ib_parse_load_state(device, ptr, ptbase);
break;
+ case CP_SET_BIN_DATA:
+ ib_parse_set_bin_data(device, ptr, ptbase);
+ break;
+ case CP_MEM_WRITE:
+ ib_parse_mem_write(device, ptr, ptbase);
+ break;
+ case CP_DRAW_INDX:
+ ib_parse_draw_indx(device, ptr, ptbase);
+ break;
+ }
+}
+
+/*
+ * Parse type0 packets found in the stream. Some of the registers that are
+ * written are clues for GPU buffers that we need to freeze. Register writes
+ * are considred valid when a draw initator is called, so just cache the values
+ * here and freeze them when a CP_DRAW_INDX is seen. This protects against
+ * needlessly caching buffers that won't be used during a draw call
+ */
+
+static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
+ unsigned int ptbase)
+{
+ int size = type0_pkt_size(*ptr);
+ int offset = type0_pkt_offset(*ptr);
+ int i;
+
+ for (i = 0; i < (size + 1); i++, offset++) {
+
+ /* Visiblity stream buffer */
+
+ if (offset >= A3XX_VSC_PIPE_DATA_ADDRESS_0 &&
+ offset <= A3XX_VSC_PIPE_DATA_LENGTH_7) {
+ int index = offset - A3XX_VSC_PIPE_DATA_ADDRESS_0;
+
+ /* Each bank of address and length registers are
+ * interleaved with an empty register:
+ *
+ * address 0
+ * length 0
+ * empty
+ * address 1
+ * length 1
+ * empty
+ * ...
+ */
+
+ if (index % 3 == 0)
+ vsc_pipe[index / 3].base = ptr[i + 1];
+ else if (index % 3 == 1)
+ vsc_pipe[index / 3].size = ptr[i + 1];
+ } else if ((offset >= A3XX_VFD_FETCH_INSTR_0_0) &&
+ (offset <= A3XX_VFD_FETCH_INSTR_1_F)) {
+ int index = offset - A3XX_VFD_FETCH_INSTR_0_0;
+
+ /*
+ * FETCH_INSTR_0_X and FETCH_INSTR_1_X banks are
+ * interleaved as above but without the empty register
+ * in between
+ */
+
+ if (index % 2 == 0)
+ vbo[index >> 1].stride =
+ (ptr[i + 1] >> 7) & 0x1FF;
+ else
+ vbo[index >> 1].base = ptr[i + 1];
+ } else {
+ /*
+ * Cache various support registers for calculating
+ * buffer sizes
+ */
+
+ switch (offset) {
+ case A3XX_VFD_CONTROL_0:
+ vfd_control_0 = ptr[i + 1];
+ break;
+ case A3XX_VFD_INDEX_MAX:
+ vfd_index_max = ptr[i + 1];
+ break;
+ case A3XX_VSC_SIZE_ADDRESS:
+ vsc_size_address = ptr[i + 1];
+ break;
+ case A3XX_SP_VS_PVT_MEM_ADDR_REG:
+ sp_vs_pvt_mem_addr = ptr[i + 1];
+ break;
+ case A3XX_SP_FS_PVT_MEM_ADDR_REG:
+ sp_fs_pvt_mem_addr = ptr[i + 1];
+ break;
+ }
+ }
}
}
@@ -172,24 +454,33 @@
static void ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
unsigned int gpuaddr, unsigned int dwords)
{
- int i, ret;
+ int i = 0, ret;
unsigned int *src = (unsigned int *) adreno_convertaddr(device, ptbase,
gpuaddr, dwords << 2);
if (src == NULL)
return;
- for (i = 0; i < dwords; i++) {
+ while (i < dwords) {
+
if (pkt_is_type3(src[i])) {
if ((dwords - i) < type3_pkt_size(src[i]) + 1)
- continue;
+ goto skip;
if (adreno_cmd_is_ib(src[i]))
ib_add_gpu_object(device, ptbase,
src[i + 1], src[i + 2]);
else
ib_parse_type3(device, &src[i], ptbase);
+
+ i += type3_pkt_size(src[i]);
+ } else if (pkt_is_type0(src[i])) {
+ ib_parse_type0(device, &src[i], ptbase);
+ i += type0_pkt_size(src[i]);
}
+
+skip:
+ i++;
}
ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
@@ -388,6 +679,7 @@
return size + sizeof(*header);
}
+/* Snapshot the memory for an indirect buffer */
static int snapshot_ib(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
@@ -469,6 +761,15 @@
snapshot_frozen_objsize = 0;
+ /* Clear the caches for the visibilty stream and VBO parsing */
+
+ vfd_control_0 = 0;
+ vfd_index_max = 0;
+ vsc_size_address = 0;
+
+ memset(vsc_pipe, 0, sizeof(vsc_pipe));
+ memset(vbo, 0, sizeof(vbo));
+
/* Get the physical address of the MMU pagetable */
ptbase = kgsl_mmu_get_current_ptbase(device);
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
index bd5be74..304f4bb 100644
--- a/drivers/gpu/msm/kgsl_snapshot.h
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -151,8 +151,9 @@
int count; /* Number of dwords in the dump */
} __packed;
-#define SNAPSHOT_GPU_OBJECT_SHADER 1
-#define SNAPSHOT_GPU_OBJECT_IB 2
+#define SNAPSHOT_GPU_OBJECT_SHADER 1
+#define SNAPSHOT_GPU_OBJECT_IB 2
+#define SNAPSHOT_GPU_OBJECT_GENERIC 3
struct kgsl_snapshot_gpu_object {
int type; /* Type of GPU object */