msm: kgsl: rework ib checking
Separate ib parse checking from cffdump as it is useful
in other situations. This is controlled by a new debugfs
file, ib_check. All ib checking is off (0) by default,
because parsing and mem_entry lookup can have a performance
impact on some benchmarks. Level 1 checking verifies the
IB1's. Level 2 checking also verifies the IB2.
Change-Id: Ibf3c6d1e0d7522e75b41e1a6dbb92020ae9ace8d
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 356df0d..fb672e4 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -113,6 +113,7 @@
.pfp_fw = NULL,
.pm4_fw = NULL,
.wait_timeout = 10000, /* in milliseconds */
+ .ib_check_level = 0,
};
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index c370190..4885312 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -77,6 +77,7 @@
unsigned int istore_size;
unsigned int pix_shader_start;
unsigned int instruction_size;
+ unsigned int ib_check_level;
};
struct adreno_gpudev {
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index 3461316..9e022b9 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -113,6 +113,8 @@
&kgsl_cff_dump_enable_fops);
debugfs_create_u32("wait_timeout", 0644, device->d_debugfs,
&adreno_dev->wait_timeout);
+ debugfs_create_u32("ib_check", 0644, device->d_debugfs,
+ &adreno_dev->ib_check_level);
/* Create post mortem control files */
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index 6e85ec6..fb44b25 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -29,11 +29,6 @@
/* skip N 32-bit words to get to the next packet */
#define CP_NOP 0x10
-/* indirect buffer dispatch. prefetch parser uses this packet type to determine
-* whether to pre-fetch the IB
-*/
-#define CP_INDIRECT_BUFFER 0x3f
-
/* indirect buffer dispatch. same as IB, but init is pipelined */
#define CP_INDIRECT_BUFFER_PFD 0x37
@@ -120,6 +115,9 @@
/* load constants from a location in memory */
#define CP_LOAD_CONSTANT_CONTEXT 0x2e
+/* (A2x) sets binning configuration registers */
+#define CP_SET_BIN_DATA 0x2f
+
/* selective invalidation of state pointers */
#define CP_INVALIDATE_STATE 0x3b
@@ -213,7 +211,7 @@
/* packet headers */
#define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18)
#define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)
-#define CP_HDR_INDIRECT_BUFFER cp_type3_packet(CP_INDIRECT_BUFFER, 2)
+#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2)
/* dword base address of the GFX decode space */
#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 5b197b4..d97659c 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -54,7 +54,7 @@
{CP_IM_LOAD, "IN__LOAD"},
{CP_IM_LOAD_IMMEDIATE, "IM_LOADI"},
{CP_IM_STORE, "IM_STORE"},
- {CP_INDIRECT_BUFFER, "IND_BUF_"},
+ {CP_INDIRECT_BUFFER_PFE, "IND_BUF_"},
{CP_INDIRECT_BUFFER_PFD, "IND_BUFP"},
{CP_INTERRUPT, "PM4_INTR"},
{CP_INVALIDATE_STATE, "INV_STAT"},
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 9542dfc..b9c0a28 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -22,6 +22,7 @@
#include "adreno.h"
#include "adreno_pm4types.h"
#include "adreno_ringbuffer.h"
+#include "adreno_debugfs.h"
#include "a2xx_reg.h"
#include "a3xx_reg.h"
@@ -545,6 +546,198 @@
adreno_ringbuffer_addcmds(rb, flags, cmds, sizedwords);
}
+static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
+ int sizedwords);
+
+static bool
+_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+ unsigned int opcode = cp_type3_opcode(*hostaddr);
+ switch (opcode) {
+ case CP_INDIRECT_BUFFER_PFD:
+ case CP_INDIRECT_BUFFER_PFE:
+ case CP_COND_INDIRECT_BUFFER_PFE:
+ case CP_COND_INDIRECT_BUFFER_PFD:
+ return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]);
+ case CP_NOP:
+ case CP_WAIT_FOR_IDLE:
+ case CP_WAIT_REG_MEM:
+ case CP_WAIT_REG_EQ:
+ case CP_WAT_REG_GTE:
+ case CP_WAIT_UNTIL_READ:
+ case CP_WAIT_IB_PFD_COMPLETE:
+ case CP_REG_RMW:
+ case CP_REG_TO_MEM:
+ case CP_MEM_WRITE:
+ case CP_MEM_WRITE_CNTR:
+ case CP_COND_EXEC:
+ case CP_COND_WRITE:
+ case CP_EVENT_WRITE:
+ case CP_EVENT_WRITE_SHD:
+ case CP_EVENT_WRITE_CFL:
+ case CP_EVENT_WRITE_ZPD:
+ case CP_DRAW_INDX:
+ case CP_DRAW_INDX_2:
+ case CP_DRAW_INDX_BIN:
+ case CP_DRAW_INDX_2_BIN:
+ case CP_VIZ_QUERY:
+ case CP_SET_STATE:
+ case CP_SET_CONSTANT:
+ case CP_IM_LOAD:
+ case CP_IM_LOAD_IMMEDIATE:
+ case CP_LOAD_CONSTANT_CONTEXT:
+ case CP_INVALIDATE_STATE:
+ case CP_SET_SHADER_BASES:
+ case CP_SET_BIN_MASK:
+ case CP_SET_BIN_SELECT:
+ case CP_SET_BIN_BASE_OFFSET:
+ case CP_SET_BIN_DATA:
+ case CP_CONTEXT_UPDATE:
+ case CP_INTERRUPT:
+ case CP_IM_STORE:
+ case CP_LOAD_STATE:
+ break;
+ /* these shouldn't come from userspace */
+ case CP_ME_INIT:
+ case CP_SET_PROTECTED_MODE:
+ default:
+ KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode);
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr)
+{
+ unsigned int reg = type0_pkt_offset(*hostaddr);
+ unsigned int cnt = type0_pkt_size(*hostaddr);
+ if (reg < 0x0192 || (reg + cnt) >= 0x8000) {
+ KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n",
+ reg, cnt);
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Traverse IBs and dump them to test vector. Detect swap by inspecting
+ * register writes, keeping note of the current state, and dump
+ * framebuffer config to test vector
+ */
+static bool _parse_ibs(struct kgsl_device_private *dev_priv,
+ uint gpuaddr, int sizedwords)
+{
+ static uint level; /* recursion level */
+ bool ret = false;
+ uint *hostaddr, *hoststart;
+ int dwords_left = sizedwords; /* dwords left in the current command
+ buffer */
+ struct kgsl_mem_entry *entry;
+
+ spin_lock(&dev_priv->process_priv->mem_lock);
+ entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
+ gpuaddr, sizedwords * sizeof(uint));
+ spin_unlock(&dev_priv->process_priv->mem_lock);
+ if (entry == NULL) {
+ KGSL_CMD_ERR(dev_priv->device,
+ "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+ return false;
+ }
+
+ hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+ if (hostaddr == NULL) {
+ KGSL_CMD_ERR(dev_priv->device,
+ "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
+ return false;
+ }
+
+ hoststart = hostaddr;
+
+ level++;
+
+ KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
+ gpuaddr, sizedwords, hostaddr);
+
+ mb();
+ while (dwords_left > 0) {
+ bool cur_ret = true;
+ int count = 0; /* dword count including packet header */
+
+ switch (*hostaddr >> 30) {
+ case 0x0: /* type-0 */
+ count = (*hostaddr >> 16)+2;
+ cur_ret = _handle_type0(dev_priv, hostaddr);
+ break;
+ case 0x1: /* type-1 */
+ count = 2;
+ break;
+ case 0x3: /* type-3 */
+ count = ((*hostaddr >> 16) & 0x3fff) + 2;
+ cur_ret = _handle_type3(dev_priv, hostaddr);
+ break;
+ default:
+ KGSL_CMD_ERR(dev_priv->device, "unexpected type: "
+ "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
+ *hostaddr >> 30, *hostaddr, hostaddr,
+ gpuaddr+4*(sizedwords-dwords_left));
+ cur_ret = false;
+ count = dwords_left;
+ break;
+ }
+
+ if (!cur_ret) {
+ KGSL_CMD_ERR(dev_priv->device,
+ "bad sub-type: #:%d/%d, v:0x%08x"
+ " @ 0x%p[gb:0x%08x], level:%d\n",
+ sizedwords-dwords_left, sizedwords, *hostaddr,
+ hostaddr, gpuaddr+4*(sizedwords-dwords_left),
+ level);
+
+ if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+ >= 2)
+ print_hex_dump(KERN_ERR,
+ level == 1 ? "IB1:" : "IB2:",
+ DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+ sizedwords*4, 0);
+ goto done;
+ }
+
+ /* jump to next packet */
+ dwords_left -= count;
+ hostaddr += count;
+ if (dwords_left < 0) {
+ KGSL_CMD_ERR(dev_priv->device,
+ "bad count: c:%d, #:%d/%d, "
+ "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
+ count, sizedwords-(dwords_left+count),
+ sizedwords, *(hostaddr-count), hostaddr-count,
+ gpuaddr+4*(sizedwords-(dwords_left+count)),
+ level);
+ if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
+ >= 2)
+ print_hex_dump(KERN_ERR,
+ level == 1 ? "IB1:" : "IB2:",
+ DUMP_PREFIX_OFFSET, 32, 4, hoststart,
+ sizedwords*4, 0);
+ goto done;
+ }
+ }
+
+ ret = true;
+done:
+ if (!ret)
+ KGSL_DRV_ERR(dev_priv->device,
+ "parsing failed: gpuaddr:0x%08x, "
+ "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
+
+ level--;
+
+ return ret;
+}
+
int
adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
struct kgsl_context *context,
@@ -603,9 +796,12 @@
*cmds++ = ibdesc[0].sizedwords;
}
for (i = start_index; i < numibs; i++) {
- (void)kgsl_cffdump_parse_ibs(dev_priv, NULL,
- ibdesc[i].gpuaddr, ibdesc[i].sizedwords, false);
-
+ if (unlikely(adreno_dev->ib_check_level >= 1 &&
+ !_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
+ ibdesc[i].sizedwords))) {
+ kfree(link);
+ return -EINVAL;
+ }
*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
*cmds++ = ibdesc[i].gpuaddr;
*cmds++ = ibdesc[i].sizedwords;
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 547727f..8ac575c 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -938,40 +938,6 @@
return result;
}
-static bool check_ibdesc(struct kgsl_device_private *dev_priv,
- struct kgsl_ibdesc *ibdesc, unsigned int numibs,
- bool parse)
-{
- bool result = true;
- unsigned int i;
- for (i = 0; i < numibs; i++) {
- struct kgsl_mem_entry *entry;
- spin_lock(&dev_priv->process_priv->mem_lock);
- entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
- ibdesc[i].gpuaddr, ibdesc[i].sizedwords * sizeof(uint));
- spin_unlock(&dev_priv->process_priv->mem_lock);
- if (entry == NULL) {
- KGSL_DRV_ERR(dev_priv->device,
- "invalid cmd buffer gpuaddr %08x " \
- "sizedwords %d\n", ibdesc[i].gpuaddr,
- ibdesc[i].sizedwords);
- result = false;
- break;
- }
-
- if (parse && !kgsl_cffdump_parse_ibs(dev_priv, &entry->memdesc,
- ibdesc[i].gpuaddr, ibdesc[i].sizedwords, true)) {
- KGSL_DRV_ERR(dev_priv->device,
- "invalid cmd buffer gpuaddr %08x " \
- "sizedwords %d numibs %d/%d\n",
- ibdesc[i].gpuaddr,
- ibdesc[i].sizedwords, i+1, numibs);
- result = false;
- break;
- }
- }
- return result;
-}
static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
@@ -1037,12 +1003,6 @@
param->numibs = 1;
}
- if (!check_ibdesc(dev_priv, ibdesc, param->numibs, true)) {
- KGSL_DRV_ERR(dev_priv->device, "bad ibdesc");
- result = -EINVAL;
- goto free_ibdesc;
- }
-
result = dev_priv->device->ftbl->issueibcmds(dev_priv,
context,
ibdesc,
@@ -1052,18 +1012,6 @@
trace_kgsl_issueibcmds(dev_priv->device, param, result);
- if (result != 0)
- goto free_ibdesc;
-
- /* this is a check to try to detect if a command buffer was freed
- * during issueibcmds().
- */
- if (!check_ibdesc(dev_priv, ibdesc, param->numibs, false)) {
- KGSL_DRV_ERR(dev_priv->device, "bad ibdesc AFTER issue");
- result = -EINVAL;
- goto free_ibdesc;
- }
-
free_ibdesc:
kfree(ibdesc);
done:
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
index a972f69..4e354d0 100644
--- a/drivers/gpu/msm/kgsl_cffdump.c
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -497,190 +497,6 @@
}
EXPORT_SYMBOL(kgsl_cffdump_waitirq);
-#define ADDRESS_STACK_SIZE 256
-#define GET_PM4_TYPE3_OPCODE(x) ((*(x) >> 8) & 0xFF)
-static unsigned int kgsl_cffdump_addr_count;
-
-static bool kgsl_cffdump_handle_type3(struct kgsl_device_private *dev_priv,
- uint *hostaddr, bool check_only)
-{
- static uint addr_stack[ADDRESS_STACK_SIZE];
- static uint size_stack[ADDRESS_STACK_SIZE];
-
- switch (GET_PM4_TYPE3_OPCODE(hostaddr)) {
- case CP_INDIRECT_BUFFER_PFD:
- case CP_INDIRECT_BUFFER:
- {
- /* traverse indirect buffers */
- int i;
- uint ibaddr = hostaddr[1];
- uint ibsize = hostaddr[2];
-
- /* is this address already in encountered? */
- for (i = 0;
- i < kgsl_cffdump_addr_count && addr_stack[i] != ibaddr;
- ++i)
- ;
-
- if (kgsl_cffdump_addr_count == i) {
- addr_stack[kgsl_cffdump_addr_count] = ibaddr;
- size_stack[kgsl_cffdump_addr_count++] = ibsize;
-
- if (kgsl_cffdump_addr_count >= ADDRESS_STACK_SIZE) {
- KGSL_CORE_ERR("stack overflow\n");
- return false;
- }
-
- return kgsl_cffdump_parse_ibs(dev_priv, NULL,
- ibaddr, ibsize, check_only);
- } else if (size_stack[i] != ibsize) {
- KGSL_CORE_ERR("gpuaddr: 0x%08x, "
- "wc: %u, with size wc: %u already on the "
- "stack\n", ibaddr, ibsize, size_stack[i]);
- return false;
- }
- }
- break;
- }
-
- return true;
-}
-
-/*
- * Traverse IBs and dump them to test vector. Detect swap by inspecting
- * register writes, keeping note of the current state, and dump
- * framebuffer config to test vector
- */
-bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv,
- const struct kgsl_memdesc *memdesc, uint gpuaddr, int sizedwords,
- bool check_only)
-{
- static uint level; /* recursion level */
- bool ret = true;
- uint *hostaddr, *hoststart;
- int dwords_left = sizedwords; /* dwords left in the current command
- buffer */
-
- if (level == 0)
- kgsl_cffdump_addr_count = 0;
-
- if (memdesc == NULL) {
- struct kgsl_mem_entry *entry;
- spin_lock(&dev_priv->process_priv->mem_lock);
- entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
- gpuaddr, sizedwords * sizeof(uint));
- spin_unlock(&dev_priv->process_priv->mem_lock);
- if (entry == NULL) {
- KGSL_CORE_ERR("did not find mapping "
- "for gpuaddr: 0x%08x\n", gpuaddr);
- return true;
- }
- memdesc = &entry->memdesc;
- }
- hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr);
- if (hostaddr == NULL) {
- KGSL_CORE_ERR("no kernel mapping for "
- "gpuaddr: 0x%08x\n", gpuaddr);
- return true;
- }
-
- hoststart = hostaddr;
-
- level++;
-
- mb();
- kgsl_cache_range_op((struct kgsl_memdesc *)memdesc,
- KGSL_CACHE_OP_INV);
-#ifdef DEBUG
- pr_info("kgsl: cffdump: ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
- gpuaddr, sizedwords, hostaddr);
-#endif
-
- while (dwords_left > 0) {
- int count = 0; /* dword count including packet header */
- bool cur_ret = true;
-
- switch (*hostaddr >> 30) {
- case 0x0: /* type-0 */
- count = (*hostaddr >> 16)+2;
- break;
- case 0x1: /* type-1 */
- count = 2;
- break;
- case 0x3: /* type-3 */
- count = ((*hostaddr >> 16) & 0x3fff) + 2;
- cur_ret = kgsl_cffdump_handle_type3(dev_priv,
- hostaddr, check_only);
- break;
- default:
- pr_warn("kgsl: cffdump: parse-ib: unexpected type: "
- "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
- *hostaddr >> 30, *hostaddr, hostaddr,
- gpuaddr+4*(sizedwords-dwords_left));
- cur_ret = false;
- count = dwords_left;
- break;
- }
-
-#ifdef DEBUG
- if (!cur_ret) {
- pr_info("kgsl: cffdump: bad sub-type: #:%d/%d, v:0x%08x"
- " @ 0x%p[gb:0x%08x], level:%d\n",
- sizedwords-dwords_left, sizedwords, *hostaddr,
- hostaddr, gpuaddr+4*(sizedwords-dwords_left),
- level);
-
- print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
- DUMP_PREFIX_OFFSET, 32, 4, hoststart,
- sizedwords*4, 0);
- }
-#endif
- ret = ret && cur_ret;
-
- /* jump to next packet */
- dwords_left -= count;
- hostaddr += count;
- cur_ret = dwords_left >= 0;
-
-#ifdef DEBUG
- if (!cur_ret) {
- pr_info("kgsl: cffdump: bad count: c:%d, #:%d/%d, "
- "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
- count, sizedwords-(dwords_left+count),
- sizedwords, *(hostaddr-count), hostaddr-count,
- gpuaddr+4*(sizedwords-(dwords_left+count)),
- level);
-
- print_hex_dump(KERN_ERR, level == 1 ? "IB1:" : "IB2:",
- DUMP_PREFIX_OFFSET, 32, 4, hoststart,
- sizedwords*4, 0);
- }
-#endif
-
- ret = ret && cur_ret;
- }
-
- if (!ret)
- pr_info("kgsl: cffdump: parsing failed: gpuaddr:0x%08x, "
- "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
-
- if (!check_only) {
-#ifdef DEBUG
- uint offset = gpuaddr - memdesc->gpuaddr;
- pr_info("kgsl: cffdump: ib-dump: hostptr:%p, gpuaddr:%08x, "
- "physaddr:%08x, offset:%d, size:%d", hoststart,
- gpuaddr, memdesc->physaddr + offset, offset,
- sizedwords*4);
-#endif
- kgsl_cffdump_syncmem(dev_priv, memdesc, gpuaddr, sizedwords*4,
- false);
- }
-
- level--;
-
- return ret;
-}
-
static int subbuf_start_handler(struct rchan_buf *buf,
void *subbuf, void *prev_subbuf, uint prev_padding)
{