msm: kgsl: rework ib checking

Separate ib parse checking from cffdump as it is useful
in other situations. This is controlled by a new debugfs
file, ib_check. All ib checking is off (0) by default,
because parsing and mem_entry lookup can have a performance
impact on some benchmarks. Level 1 checking verifies the
IB1's. Level 2 checking also verifies the IB2.

Change-Id: Ibf3c6d1e0d7522e75b41e1a6dbb92020ae9ace8d
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 356df0d..fb672e4 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -113,6 +113,7 @@
 	.pfp_fw = NULL,
 	.pm4_fw = NULL,
 	.wait_timeout = 10000, /* in milliseconds */
+	.ib_check_level = 0,
 };
 
 
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index c370190..4885312 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -77,6 +77,7 @@
 	unsigned int istore_size;
 	unsigned int pix_shader_start;
 	unsigned int instruction_size;
+	unsigned int ib_check_level;
 };
 
 struct adreno_gpudev {
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index 3461316..9e022b9 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -113,6 +113,8 @@
 			    &kgsl_cff_dump_enable_fops);
 	debugfs_create_u32("wait_timeout", 0644, device->d_debugfs,
 		&adreno_dev->wait_timeout);
+	debugfs_create_u32("ib_check", 0644, device->d_debugfs,
+			   &adreno_dev->ib_check_level);
 
 	/* Create post mortem control files */
 
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index 6e85ec6..fb44b25 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -29,11 +29,6 @@
 /* skip N 32-bit words to get to the next packet */
 #define CP_NOP			0x10
 
-/* indirect buffer dispatch.  prefetch parser uses this packet type to determine
-*  whether to pre-fetch the IB
-*/
-#define CP_INDIRECT_BUFFER	0x3f
-
 /* indirect buffer dispatch.  same as IB, but init is pipelined */
 #define CP_INDIRECT_BUFFER_PFD	0x37
 
@@ -120,6 +115,9 @@
 /* load constants from a location in memory */
 #define CP_LOAD_CONSTANT_CONTEXT 0x2e
 
+/* (A2x) sets binning configuration registers */
+#define CP_SET_BIN_DATA             0x2f
+
 /* selective invalidation of state pointers */
 #define CP_INVALIDATE_STATE	0x3b
 
@@ -213,7 +211,7 @@
 /* packet headers */
 #define CP_HDR_ME_INIT	cp_type3_packet(CP_ME_INIT, 18)
 #define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)
-#define CP_HDR_INDIRECT_BUFFER	cp_type3_packet(CP_INDIRECT_BUFFER, 2)
+#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2)
 
 /* dword base address of the GFX decode space */
 #define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 5b197b4..d97659c 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -54,7 +54,7 @@
 	{CP_IM_LOAD,			"IN__LOAD"},
 	{CP_IM_LOAD_IMMEDIATE,		"IM_LOADI"},
 	{CP_IM_STORE,			"IM_STORE"},
-	{CP_INDIRECT_BUFFER,		"IND_BUF_"},
+	{CP_INDIRECT_BUFFER_PFE,	"IND_BUF_"},
 	{CP_INDIRECT_BUFFER_PFD,	"IND_BUFP"},
 	{CP_INTERRUPT,			"PM4_INTR"},
 	{CP_INVALIDATE_STATE,		"INV_STAT"},
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 9542dfc..b9c0a28 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -22,6 +22,7 @@
 #include "adreno.h"
 #include "adreno_pm4types.h"
 #include "adreno_ringbuffer.h"
+#include "adreno_debugfs.h"
 
 #include "a2xx_reg.h"
 #include "a3xx_reg.h"
@@ -545,6 +546,198 @@
 	adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords);
 }
 
+static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
+			   int sizedwords);
+
+static bool
+_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+	unsigned int opcode = cp_type3_opcode(*hostaddr);
+	switch (opcode) {
+	case CP_INDIRECT_BUFFER_PFD:
+	case CP_INDIRECT_BUFFER_PFE:
+	case CP_COND_INDIRECT_BUFFER_PFE:
+	case CP_COND_INDIRECT_BUFFER_PFD:
+		return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]);
+	case CP_NOP:
+	case CP_WAIT_FOR_IDLE:
+	case CP_WAIT_REG_MEM:
+	case CP_WAIT_REG_EQ:
+	case CP_WAT_REG_GTE:
+	case CP_WAIT_UNTIL_READ:
+	case CP_WAIT_IB_PFD_COMPLETE:
+	case CP_REG_RMW:
+	case CP_REG_TO_MEM:
+	case CP_MEM_WRITE:
+	case CP_MEM_WRITE_CNTR:
+	case CP_COND_EXEC:
+	case CP_COND_WRITE:
+	case CP_EVENT_WRITE:
+	case CP_EVENT_WRITE_SHD:
+	case CP_EVENT_WRITE_CFL:
+	case CP_EVENT_WRITE_ZPD:
+	case CP_DRAW_INDX:
+	case CP_DRAW_INDX_2:
+	case CP_DRAW_INDX_BIN:
+	case CP_DRAW_INDX_2_BIN:
+	case CP_VIZ_QUERY:
+	case CP_SET_STATE:
+	case CP_SET_CONSTANT:
+	case CP_IM_LOAD:
+	case CP_IM_LOAD_IMMEDIATE:
+	case CP_LOAD_CONSTANT_CONTEXT:
+	case CP_INVALIDATE_STATE:
+	case CP_SET_SHADER_BASES:
+	case CP_SET_BIN_MASK:
+	case CP_SET_BIN_SELECT:
+	case CP_SET_BIN_BASE_OFFSET:
+	case CP_SET_BIN_DATA:
+	case CP_CONTEXT_UPDATE:
+	case CP_INTERRUPT:
+	case CP_IM_STORE:
+	case CP_LOAD_STATE:
+		break;
+	/* these shouldn't come from userspace */
+	case CP_ME_INIT:
+	case CP_SET_PROTECTED_MODE:
+	default:
+		KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode);
+		return false;
+		break;
+	}
+
+	return true;
+}
+
+static bool
+_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+	unsigned int reg = type0_pkt_offset(*hostaddr);
+	unsigned int cnt = type0_pkt_size(*hostaddr);
+	if (reg < 0x0192 || (reg + cnt) >= 0x8000) {
+		KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n",
+			     reg, cnt);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Traverse IBs and dump them to test vector. Detect swap by inspecting
+ * register writes, keeping note of the current state, and dump
+ * framebuffer config to test vector
+ */
+static bool _parse_ibs(struct kgsl_device_private *dev_priv,
+			   uint gpuaddr, int sizedwords)
+{
+	static uint level; /* recursion level */
+	bool ret = false;
+	uint *hostaddr, *hoststart;
+	int dwords_left = sizedwords; /* dwords left in the current command
+					 buffer */
+	struct kgsl_mem_entry *entry;
+
+	spin_lock(&dev_priv->process_priv->mem_lock);
+	entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
+					   gpuaddr, sizedwords * sizeof(uint));
+	spin_unlock(&dev_priv->process_priv->mem_lock);
+	if (entry == NULL) {
+		KGSL_CMD_ERR(dev_priv->device,
+			     "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+		return false;
+	}
+
+	hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+	if (hostaddr == NULL) {
+		KGSL_CMD_ERR(dev_priv->device,
+			     "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+		return false;
+	}
+
+	hoststart = hostaddr;
+
+	level++;
+
+	KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
+		gpuaddr, sizedwords, hostaddr);
+
+	mb();
+	while (dwords_left > 0) {
+		bool cur_ret = true;
+		int count = 0; /* dword count including packet header */
+
+		switch (*hostaddr >> 30) {
+		case 0x0: /* type-0 */
+			count = (*hostaddr >> 16)+2;
+			cur_ret = _handle_type0(dev_priv, hostaddr);
+			break;
+		case 0x1: /* type-1 */
+			count = 2;
+			break;
+		case 0x3: /* type-3 */
+			count = ((*hostaddr >> 16) & 0x3fff) + 2;
+			cur_ret = _handle_type3(dev_priv, hostaddr);
+			break;
+		default:
+			KGSL_CMD_ERR(dev_priv->device, "unexpected type: "
+				"type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
+				*hostaddr >> 30, *hostaddr, hostaddr,
+				gpuaddr+4*(sizedwords-dwords_left));
+			cur_ret = false;
+			count = dwords_left;
+			break;
+		}
+
+		if (!cur_ret) {
+			KGSL_CMD_ERR(dev_priv->device,
+				"bad sub-type: #:%d/%d, v:0x%08x"
+				" @ 0x%p[gb:0x%08x], level:%d\n",
+				sizedwords-dwords_left, sizedwords, *hostaddr,
+				hostaddr, gpuaddr+4*(sizedwords-dwords_left),
+				level);
+
+			if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+				>= 2)
+				print_hex_dump(KERN_ERR,
+					level == 1 ? "IB1:" : "IB2:",
+					DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+					sizedwords*4, 0);
+			goto done;
+		}
+
+		/* jump to next packet */
+		dwords_left -= count;
+		hostaddr += count;
+		if (dwords_left < 0) {
+			KGSL_CMD_ERR(dev_priv->device,
+				"bad count: c:%d, #:%d/%d, "
+				"v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
+				count, sizedwords-(dwords_left+count),
+				sizedwords, *(hostaddr-count), hostaddr-count,
+				gpuaddr+4*(sizedwords-(dwords_left+count)),
+				level);
+			if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+				>= 2)
+				print_hex_dump(KERN_ERR,
+					level == 1 ? "IB1:" : "IB2:",
+					DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+					sizedwords*4, 0);
+			goto done;
+		}
+	}
+
+	ret = true;
+done:
+	if (!ret)
+		KGSL_DRV_ERR(dev_priv->device,
+			"parsing failed: gpuaddr:0x%08x, "
+			"host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
+
+	level--;
+
+	return ret;
+}
+
 int
 adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 				struct kgsl_context *context,
@@ -603,9 +796,12 @@
 		*cmds++ = ibdesc[0].sizedwords;
 	}
 	for (i = start_index; i < numibs; i++) {
-		(void)kgsl_cffdump_parse_ibs(dev_priv, NULL,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false);
-
+		if (unlikely(adreno_dev->ib_check_level >= 1 &&
+		    !_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
+				ibdesc[i].sizedwords))) {
+			kfree(link);
+			return -EINVAL;
+		}
 		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
 		*cmds++ = ibdesc[i].gpuaddr;
 		*cmds++ = ibdesc[i].sizedwords;
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 547727f..8ac575c 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -938,40 +938,6 @@
 
 	return result;
 }
-static bool check_ibdesc(struct kgsl_device_private *dev_priv,
-			 struct kgsl_ibdesc *ibdesc, unsigned int numibs,
-			 bool parse)
-{
-	bool result = true;
-	unsigned int i;
-	for (i = 0; i < numibs; i++) {
-		struct kgsl_mem_entry *entry;
-		spin_lock(&dev_priv->process_priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint));
-		spin_unlock(&dev_priv->process_priv->mem_lock);
-		if (entry == NULL) {
-			KGSL_DRV_ERR(dev_priv->device,
-				"invalid cmd buffer gpuaddr %08x " \
-				"sizedwords %d\n", ibdesc[i].gpuaddr,
-				ibdesc[i].sizedwords);
-			result = false;
-			break;
-		}
-
-		if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc,
-			ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) {
-			KGSL_DRV_ERR(dev_priv->device,
-				"invalid cmd buffer gpuaddr %08x " \
-				"sizedwords %d numibs %d/%d\n",
-				ibdesc[i].gpuaddr,
-				ibdesc[i].sizedwords, i+1, numibs);
-			result = false;
-			break;
-		}
-	}
-	return result;
-}
 
 static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
 				      unsigned int cmd, void *data)
@@ -1037,12 +1003,6 @@
 		param->numibs = 1;
 	}
 
-	if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) {
-		KGSL_DRV_ERR(dev_priv->device, "bad ibdesc");
-		result = -EINVAL;
-		goto free_ibdesc;
-	}
-
 	result = dev_priv->device->ftbl->issueibcmds(dev_priv,
 					     context,
 					     ibdesc,
@@ -1052,18 +1012,6 @@
 
 	trace_kgsl_issueibcmds(dev_priv->device, param, result);
 
-	if (result != 0)
-		goto free_ibdesc;
-
-	/* this is a check to try to detect if a command buffer was freed
-	 * during issueibcmds().
-	 */
-	if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) {
-		KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue");
-		result = -EINVAL;
-		goto free_ibdesc;
-	}
-
 free_ibdesc:
 	kfree(ibdesc);
 done:
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
index a972f69..4e354d0 100644
--- a/drivers/gpu/msm/kgsl_cffdump.c
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -497,190 +497,6 @@
 }
 EXPORT_SYMBOL(kgsl_cffdump_waitirq);
 
-#define ADDRESS_STACK_SIZE 256
-#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF)
-static unsigned int kgsl_cffdump_addr_count;
-
-static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv,
-	uint *hostaddr, bool check_only)
-{
-	static uint addr_stack[ADDRESS_STACK_SIZE];
-	static uint size_stack[ADDRESS_STACK_SIZE];
-
-	switch (GET_PM4_TYPE3_OPCODE(hostaddr)) {
-	case CP_INDIRECT_BUFFER_PFD:
-	case CP_INDIRECT_BUFFER:
-	{
-		/* traverse indirect buffers */
-		int i;
-		uint ibaddr = hostaddr[1];
-		uint ibsize = hostaddr[2];
-
-		/* is this address already in encountered? */
-		for (i = 0;
-			i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr;
-			++i)
-			;
-
-		if (kgsl_cffdump_addr_count == i) {
-			addr_stack[kgsl_cffdump_addr_count] = ibaddr;
-			size_stack[kgsl_cffdump_addr_count++] = ibsize;
-
-			if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) {
-				KGSL_CORE_ERR("stack overflow\n");
-				return false;
-			}
-
-			return kgsl_cffdump_parse_ibs(dev_priv, NULL,
-				ibaddr, ibsize, check_only);
-		} else if (size_stack[i] != ibsize) {
-			KGSL_CORE_ERR("gpuaddr: 0x%08x, "
-				"wc: %u, with size wc: %u already on the "
-				"stack\n", ibaddr, ibsize, size_stack[i]);
-			return false;
-		}
-	}
-	break;
-	}
-
-	return true;
-}
-
-/*
- * Traverse IBs and dump them to test vector. Detect swap by inspecting
- * register writes, keeping note of the current state, and dump
- * framebuffer config to test vector
- */
-bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv,
-	const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords,
-	bool check_only)
-{
-	static uint level; /* recursion level */
-	bool ret = true;
-	uint *hostaddr, *hoststart;
-	int dwords_left = sizedwords; /* dwords left in the current command
-					 buffer */
-
-	if (level == 0)
-		kgsl_cffdump_addr_count = 0;
-
-	if (memdesc == NULL) {
-		struct kgsl_mem_entry *entry;
-		spin_lock(&dev_priv->process_priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
-			gpuaddr, sizedwords * sizeof(uint));
-		spin_unlock(&dev_priv->process_priv->mem_lock);
-		if (entry == NULL) {
-			KGSL_CORE_ERR("did not find mapping "
-				"for gpuaddr: 0x%08x\n", gpuaddr);
-			return true;
-		}
-		memdesc = &entry->memdesc;
-	}
-	hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr);
-	if (hostaddr == NULL) {
-		KGSL_CORE_ERR("no kernel mapping for "
-			"gpuaddr: 0x%08x\n", gpuaddr);
-		return true;
-	}
-
-	hoststart = hostaddr;
-
-	level++;
-
-	mb();
-	kgsl_cache_range_op((struct kgsl_memdesc *)memdesc,
-				KGSL_CACHE_OP_INV);
-#ifdef DEBUG
-	pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
-		gpuaddr, sizedwords, hostaddr);
-#endif
-
-	while (dwords_left > 0) {
-		int count = 0; /* dword count including packet header */
-		bool cur_ret = true;
-
-		switch (*hostaddr >> 30) {
-		case 0x0: /* type-0 */
-			count = (*hostaddr >> 16)+2;
-			break;
-		case 0x1: /* type-1 */
-			count = 2;
-			break;
-		case 0x3: /* type-3 */
-			count = ((*hostaddr >> 16) & 0x3fff) + 2;
-			cur_ret = kgsl_cffdump_handle_type3(dev_priv,
-				hostaddr, check_only);
-			break;
-		default:
-			pr_warn("kgsl: cffdump: parse-ib: unexpected type: "
-				"type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
-				*hostaddr >> 30, *hostaddr, hostaddr,
-				gpuaddr+4*(sizedwords-dwords_left));
-			cur_ret = false;
-			count = dwords_left;
-			break;
-		}
-
-#ifdef DEBUG
-		if (!cur_ret) {
-			pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x"
-				" @ 0x%p[gb:0x%08x], level:%d\n",
-				sizedwords-dwords_left, sizedwords, *hostaddr,
-				hostaddr, gpuaddr+4*(sizedwords-dwords_left),
-				level);
-
-			print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
-				DUMP_PREFIX_OFFSET, 32, 4, hoststart,
-				sizedwords*4, 0);
-		}
-#endif
-		ret = ret && cur_ret;
-
-		/* jump to next packet */
-		dwords_left -= count;
-		hostaddr += count;
-		cur_ret = dwords_left >= 0;
-
-#ifdef DEBUG
-		if (!cur_ret) {
-			pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, "
-				"v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
-				count, sizedwords-(dwords_left+count),
-				sizedwords, *(hostaddr-count), hostaddr-count,
-				gpuaddr+4*(sizedwords-(dwords_left+count)),
-				level);
-
-			print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
-				DUMP_PREFIX_OFFSET, 32, 4, hoststart,
-				sizedwords*4, 0);
-		}
-#endif
-
-		ret = ret && cur_ret;
-	}
-
-	if (!ret)
-		pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, "
-			"host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
-
-	if (!check_only) {
-#ifdef DEBUG
-		uint offset = gpuaddr - memdesc->gpuaddr;
-		pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, "
-			"physaddr:%08x, offset:%d, size:%d", hoststart,
-			gpuaddr, memdesc->physaddr + offset, offset,
-			sizedwords*4);
-#endif
-		kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4,
-			false);
-	}
-
-	level--;
-
-	return ret;
-}
-
 static int subbuf_start_handler(struct rchan_buf *buf,
 	void *subbuf, void *prev_subbuf, uint prev_padding)
 {