msm: kgsl: Capture more interesting GPU buffers from a hang

Freeze more essential GPU buffers and push them into the snapshot
dump.  Add visibilty stream buffers, VBOs, index buffers and general
purpose GPU buffers to the list of frozen buffers. Parse the captured
IBs for type0 and type3 writes that indicate a buffer of interest
and add them to the dump list.

Change-Id: Ic0dedbad1ee978b951abedacb0c70481a1a7e38f
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index 082df4b..855db6d 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -17,6 +17,7 @@
 #include "adreno.h"
 #include "adreno_pm4types.h"
 #include "a2xx_reg.h"
+#include "a3xx_reg.h"
 
 /* Number of dwords of ringbuffer history to record */
 #define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
@@ -107,6 +108,59 @@
 	return 0;
 }
 
+/*
+ * This structure keeps track of type0 writes to VSC_PIPE_DATA_ADDRESS_x and
+ * VSC_PIPE_DATA_LENGTH_x. When a draw initator is called these registers
+ * point to buffers that we need to freeze for a snapshot
+ */
+
+static struct {
+	unsigned int base;
+	unsigned int size;
+} vsc_pipe[8];
+
+/*
+ * This is the cached value of type0 writes to the VSC_SIZE_ADDRESS which
+ * contains the buffer address of the visiblity stream size buffer during a
+ * binning pass
+ */
+
+static unsigned int vsc_size_address;
+
+/*
+ * This struct keeps track of type0 writes to VFD_FETCH_INSTR_0_X and
+ * VFD_FETCH_INSTR_1_X registers. When a draw initator is called the addresses
+ * and sizes in these registers point to VBOs that we need to freeze for a
+ * snapshot
+ */
+
+static struct {
+	unsigned int base;
+	unsigned int stride;
+} vbo[16];
+
+/*
+ * This is the cached value of type0 writes to VFD_INDEX_MAX.  This will be used
+ * to calculate the size of the VBOs when the draw initator is called
+ */
+
+static unsigned int vfd_index_max;
+
+/*
+ * This is the cached value of type0 writes to VFD_CONTROL_0 which tells us how
+ * many VBOs are active when the draw initator is called
+ */
+
+static unsigned int vfd_control_0;
+
+/*
+ * Cached value of type0 writes to SP_VS_PVT_MEM_ADDR and SP_FS_PVT_MEM_ADDR.
+ * This is a buffer that contains private stack information for the shader
+ */
+
+static unsigned int sp_vs_pvt_mem_addr;
+static unsigned int sp_fs_pvt_mem_addr;
+
 static void ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
 	unsigned int ptbase)
 {
@@ -153,8 +207,146 @@
 }
 
 /*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
+
+static void ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
+	unsigned int ptbase)
+{
+	int ret;
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return;
+
+	/* Visiblity stream buffer */
+	ret = kgsl_snapshot_get_object(device, ptbase, pkt[1], 0,
+			SNAPSHOT_GPU_OBJECT_GENERIC);
+	snapshot_frozen_objsize += ret;
+
+	/* visiblity stream size buffer (fixed size 8 dwords) */
+	ret = kgsl_snapshot_get_object(device, ptbase, pkt[2], 32,
+			SNAPSHOT_GPU_OBJECT_GENERIC);
+	snapshot_frozen_objsize += ret;
+}
+
+/*
+ * This opcode writes to GPU memory - if the buffer is written to, there is a
+ * good chance that it would be valuable to capture in the snapshot, so mark all
+ * buffers that are written to as frozen
+ */
+
+static void ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
+	unsigned int ptbase)
+{
+	int ret;
+
+	if (type3_pkt_size(pkt[0]) < 1)
+		return;
+
+	/*
+	 * The address is where the data in the rest of this packet is written
+	 * to, but since that might be an offset into the larger buffer we need
+	 * to get the whole thing. Pass a size of 0 kgsl_snapshot_get_object to
+	 * capture the entire buffer.
+	 */
+
+	ret = kgsl_snapshot_get_object(device, ptbase, pkt[1] & 0xFFFFFFFC, 0,
+		SNAPSHOT_GPU_OBJECT_GENERIC);
+
+	snapshot_frozen_objsize += ret;
+}
+
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid".  The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
+
+static void ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+	unsigned int ptbase)
+{
+	int ret, i;
+
+	if (type3_pkt_size(pkt[0]) < 3)
+		return;
+
+	/*  DRAW_IDX may have a index buffer pointer */
+
+	if (type3_pkt_size(pkt[0]) > 3) {
+		ret = kgsl_snapshot_get_object(device, ptbase, pkt[4], pkt[5],
+			SNAPSHOT_GPU_OBJECT_GENERIC);
+		snapshot_frozen_objsize += ret;
+	}
+
+	/*
+	 * All of the type0 writes are valid at a draw initiator, so freeze
+	 * the various buffers that we are tracking
+	 */
+
+	/* First up the visiblity stream buffer */
+
+	for (i = 0; i < ARRAY_SIZE(vsc_pipe); i++) {
+		if (vsc_pipe[i].base != 0 && vsc_pipe[i].size != 0) {
+			ret = kgsl_snapshot_get_object(device, ptbase,
+				vsc_pipe[i].base, vsc_pipe[i].size,
+				SNAPSHOT_GPU_OBJECT_GENERIC);
+			snapshot_frozen_objsize += ret;
+		}
+	}
+
+	/* Next the visibility stream size buffer */
+
+	if (vsc_size_address) {
+		ret = kgsl_snapshot_get_object(device, ptbase,
+				vsc_size_address, 32,
+				SNAPSHOT_GPU_OBJECT_GENERIC);
+		snapshot_frozen_objsize += ret;
+	}
+
+	/* Next private shader buffer memory */
+	if (sp_vs_pvt_mem_addr) {
+		ret = kgsl_snapshot_get_object(device, ptbase,
+				sp_vs_pvt_mem_addr, 8192,
+				SNAPSHOT_GPU_OBJECT_GENERIC);
+
+		snapshot_frozen_objsize += ret;
+	}
+
+	if (sp_fs_pvt_mem_addr) {
+		ret = kgsl_snapshot_get_object(device, ptbase,
+				sp_fs_pvt_mem_addr, 8192,
+				SNAPSHOT_GPU_OBJECT_GENERIC);
+		snapshot_frozen_objsize += ret;
+	}
+
+	/* Finally: VBOs */
+
+	/* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
+	for (i = 0; i < (vfd_control_0) >> 27; i++) {
+		int size;
+
+		/*
+		 * The size of the VBO is the stride stored in
+		 * VFD_FETCH_INSTR_0_X.BUFSTRIDE * VFD_INDEX_MAX. The base
+		 * is stored in VFD_FETCH_INSTR_1_X
+		 */
+
+		if (vbo[i].base != 0) {
+			size = vbo[i].stride * vfd_index_max;
+
+			ret = kgsl_snapshot_get_object(device, ptbase,
+				vbo[i].base,
+				0, SNAPSHOT_GPU_OBJECT_GENERIC);
+			snapshot_frozen_objsize += ret;
+		}
+	}
+}
+
+/*
  * Parse all the type3 opcode packets that may contain important information,
- * such as additional GPU buffers to grab
+ * such as additional GPU buffers to grab or a draw initator
  */
 
 static void ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
@@ -164,6 +356,96 @@
 	case CP_LOAD_STATE:
 		ib_parse_load_state(device, ptr, ptbase);
 		break;
+	case CP_SET_BIN_DATA:
+		ib_parse_set_bin_data(device, ptr, ptbase);
+		break;
+	case CP_MEM_WRITE:
+		ib_parse_mem_write(device, ptr, ptbase);
+		break;
+	case CP_DRAW_INDX:
+		ib_parse_draw_indx(device, ptr, ptbase);
+		break;
+	}
+}
+
+/*
+ * Parse type0 packets found in the stream.  Some of the registers that are
+ * written are clues for GPU buffers that we need to freeze.  Register writes
+ * are considred valid when a draw initator is called, so just cache the values
+ * here and freeze them when a CP_DRAW_INDX is seen.  This protects against
+ * needlessly caching buffers that won't be used during a draw call
+ */
+
+static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
+	unsigned int ptbase)
+{
+	int size = type0_pkt_size(*ptr);
+	int offset = type0_pkt_offset(*ptr);
+	int i;
+
+	for (i = 0; i < (size + 1); i++, offset++) {
+
+		/* Visiblity stream buffer */
+
+		if (offset >= A3XX_VSC_PIPE_DATA_ADDRESS_0 &&
+			offset <= A3XX_VSC_PIPE_DATA_LENGTH_7) {
+			int index = offset - A3XX_VSC_PIPE_DATA_ADDRESS_0;
+
+			/* Each bank of address and length registers are
+			 * interleaved with an empty register:
+			 *
+			 * address 0
+			 * length 0
+			 * empty
+			 * address 1
+			 * length 1
+			 * empty
+			 * ...
+			 */
+
+			if (index % 3 == 0)
+				vsc_pipe[index / 3].base = ptr[i + 1];
+			else if (index % 3 == 1)
+				vsc_pipe[index / 3].size = ptr[i + 1];
+		} else if ((offset >= A3XX_VFD_FETCH_INSTR_0_0) &&
+			(offset <= A3XX_VFD_FETCH_INSTR_1_F)) {
+			int index = offset - A3XX_VFD_FETCH_INSTR_0_0;
+
+			/*
+			 * FETCH_INSTR_0_X and FETCH_INSTR_1_X banks are
+			 * interleaved as above but without the empty register
+			 * in between
+			 */
+
+			if (index % 2 == 0)
+				vbo[index >> 1].stride =
+					(ptr[i + 1] >> 7) & 0x1FF;
+			else
+				vbo[index >> 1].base = ptr[i + 1];
+		} else {
+			/*
+			 * Cache various support registers for calculating
+			 * buffer sizes
+			 */
+
+			switch (offset) {
+			case A3XX_VFD_CONTROL_0:
+				vfd_control_0 = ptr[i + 1];
+				break;
+			case A3XX_VFD_INDEX_MAX:
+				vfd_index_max = ptr[i + 1];
+				break;
+			case A3XX_VSC_SIZE_ADDRESS:
+				vsc_size_address = ptr[i + 1];
+				break;
+			case A3XX_SP_VS_PVT_MEM_ADDR_REG:
+				sp_vs_pvt_mem_addr = ptr[i + 1];
+				break;
+			case A3XX_SP_FS_PVT_MEM_ADDR_REG:
+				sp_fs_pvt_mem_addr = ptr[i + 1];
+				break;
+			}
+		}
 	}
 }
 
@@ -172,24 +454,33 @@
 static void ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
 		unsigned int gpuaddr, unsigned int dwords)
 {
-	int i, ret;
+	int i = 0, ret;
 	unsigned int *src = (unsigned int *) adreno_convertaddr(device, ptbase,
 		gpuaddr, dwords << 2);
 
 	if (src == NULL)
 		return;
 
-	for (i = 0; i < dwords; i++) {
+	while (i < dwords) {
+
 		if (pkt_is_type3(src[i])) {
 			if ((dwords - i) < type3_pkt_size(src[i]) + 1)
-				continue;
+				goto skip;
 
 			if (adreno_cmd_is_ib(src[i]))
 				ib_add_gpu_object(device, ptbase,
 					src[i + 1], src[i + 2]);
 			else
 				ib_parse_type3(device, &src[i], ptbase);
+
+			i += type3_pkt_size(src[i]);
+		} else if (pkt_is_type0(src[i])) {
+			ib_parse_type0(device, &src[i], ptbase);
+			i += type0_pkt_size(src[i]);
 		}
+
+skip:
+		i++;
 	}
 
 	ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
@@ -388,6 +679,7 @@
 	return size + sizeof(*header);
 }
 
+/* Snapshot the memory for an indirect buffer */
 static int snapshot_ib(struct kgsl_device *device, void *snapshot,
 	int remain, void *priv)
 {
@@ -469,6 +761,15 @@
 
 	snapshot_frozen_objsize = 0;
 
+	/* Clear the caches for the visibilty stream and VBO parsing */
+
+	vfd_control_0 = 0;
+	vfd_index_max = 0;
+	vsc_size_address = 0;
+
+	memset(vsc_pipe, 0, sizeof(vsc_pipe));
+	memset(vbo, 0, sizeof(vbo));
+
 	/* Get the physical address of the MMU pagetable */
 	ptbase = kgsl_mmu_get_current_ptbase(device);