msm: kgsl: Add a new API to allow sharing of GPU performance counters
Adreno uses programmable performance counters, meaning that while there
are a limited number of physical counters each counter can be programmed
to count a vast number of different measurements (we refer to these as
countables). This could cause problems if multiple apps want to use
the performance counters, so this API and infrastructure allows the
counters to be safely shared.
The kernel tracks which countable is selected for each of the physical
counters for each counter group (where groups closely match hardware
blocks). If the desired countable is already in use, or there is an
open physical counter, then the process is allowed to use the counter.
The get ioctl reserves the counter and returns the dword offset of the
register associated with that physical counter. The put ioctl
releases the physical counter. The query ioctl gets the countables
used for all of the counters in the block - up to 8 values can be
returned. The read ioctl gets the current hardware value in the counter
Change-Id: Ic0dedbadae1dedadba60f8a3e685e2ce7d84fb33
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Carter Cooper <ccooper@codeaurora.org>
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index 9a1f676..ddc6e58 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -66,15 +66,103 @@
#define A3XX_RBBM_INT_0_MASK 0x063
#define A3XX_RBBM_INT_0_STATUS 0x064
#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
#define A3XX_RBBM_RBBM_CTL 0x100
-#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
@@ -90,6 +178,7 @@
#define A3XX_CP_MERCIU_DATA2 0x1D3
#define A3XX_CP_MEQ_ADDR 0x1DA
#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
#define A3XX_CP_HW_FAULT 0x45C
#define A3XX_CP_AHB_FAULT 0x54D
#define A3XX_CP_PROTECT_CTRL 0x45E
@@ -138,6 +227,14 @@
#define A3XX_VSC_PIPE_CONFIG_7 0xC1B
#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
@@ -163,14 +260,42 @@
#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
@@ -271,8 +396,10 @@
#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
/* Bit flags for RBBM_CTL */
-#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1)
-#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (1 << 17)
+#define RBBM_RBBM_CTL_RESET_PWR_CTR0 BIT(0)
+#define RBBM_RBBM_CTL_RESET_PWR_CTR1 BIT(1)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR0 BIT(16)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 BIT(17)
/* Various flags used by the context switch code */
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 2d9f05d..5d643ee 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -211,6 +211,313 @@
512, 0, 2, SZ_1M, NO_VER, NO_VER },
};
+/**
+ * adreno_perfcounter_init: Reserve kernel performance counters
+ * @device: device to configure
+ *
+ * The kernel needs/wants a certain group of performance counters for
+ * its own activities. Reserve these performance counters at init time
+ * to ensure that they are always reserved for the kernel. The performance
+ * counters used by the kernel can be obtained by the user, but these
+ * performance counters will remain active as long as the device is alive.
+ */
+
+static void adreno_perfcounter_init(struct kgsl_device *device)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+ if (adreno_dev->gpudev->perfcounter_init)
+ adreno_dev->gpudev->perfcounter_init(adreno_dev);
+};
+
+/**
+ * adreno_perfcounter_start: Enable performance counters
+ * @adreno_dev: Adreno device to configure
+ *
+ * Ensure all performance counters are enabled that are allocated. Since
+ * the device was most likely stopped, we can't trust that the counters
+ * are still valid so make it so.
+ */
+
+static void adreno_perfcounter_start(struct adreno_device *adreno_dev)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i, j;
+
+ /* group id iter */
+ for (i = 0; i < counters->group_count; i++) {
+ group = &(counters->groups[i]);
+
+ /* countable iter */
+ for (j = 0; j < group->reg_count; j++) {
+ if (group->regs[j].countable ==
+ KGSL_PERFCOUNTER_NOT_USED)
+ continue;
+
+ if (adreno_dev->gpudev->perfcounter_enable)
+ adreno_dev->gpudev->perfcounter_enable(
+ adreno_dev, i, j,
+ group->regs[j].countable);
+ }
+ }
+}
+
+/**
+ * adreno_perfcounter_read_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @reads: List of kgsl_perfcounter_read_groups
+ * @count: Length of list
+ *
+ * Read the performance counters for the groupid/countable pairs and return
+ * the 64 bit result for each pair
+ */
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+ struct kgsl_perfcounter_read_group *reads, unsigned int count)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ struct kgsl_perfcounter_read_group *list = NULL;
+ unsigned int i, j;
+ int ret = 0;
+
+ /* perfcounter get/put/query/read not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ /* sanity check for later */
+ if (!adreno_dev->gpudev->perfcounter_read)
+ return -EINVAL;
+
+ /* sanity check params passed in */
+ if (reads == NULL || count == 0 || count > 100)
+ return -EINVAL;
+
+ /* verify valid inputs group ids and countables */
+ for (i = 0; i < count; i++) {
+ if (reads[i].groupid >= counters->group_count)
+ return -EINVAL;
+ }
+
+ list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count,
+ GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ if (copy_from_user(list, reads,
+ sizeof(struct kgsl_perfcounter_read_group) * count)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* list iterator */
+ for (j = 0; j < count; j++) {
+ list[j].value = 0;
+
+ group = &(counters->groups[list[j].groupid]);
+
+ /* group/counter iterator */
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == list[j].countable) {
+ list[j].value =
+ adreno_dev->gpudev->perfcounter_read(
+ adreno_dev, list[j].groupid,
+ i, group->regs[i].offset);
+ break;
+ }
+ }
+ }
+
+ /* write the data */
+ if (copy_to_user(reads, list,
+ sizeof(struct kgsl_perfcounter_read_group) *
+ count) != 0)
+ ret = -EFAULT;
+
+done:
+ kfree(list);
+ return ret;
+}
+
+/**
+ * adreno_perfcounter_query_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countables: Return list of all countables in the groups counters
+ * @count: Max length of the array
+ * @max_counters: max counters for the groupid
+ *
+ * Query the current state of counters for the group.
+ */
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int *countables, unsigned int count,
+ unsigned int *max_counters)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i;
+
+ *max_counters = 0;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+ *max_counters = group->reg_count;
+
+ /*
+ * if NULL countable or *count of zero, return max reg_count in
+ * *max_counters and return success
+ */
+ if (countables == NULL || count == 0)
+ return 0;
+
+ /*
+ * Go through all available counters. Write upto *count * countable
+ * values.
+ */
+ for (i = 0; i < group->reg_count && i < count; i++) {
+ if (copy_to_user(&countables[i], &(group->regs[i].countable),
+ sizeof(unsigned int)) != 0)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * adreno_perfcounter_get: Try to put a countable in an available counter
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be in a counter
+ * @offset: Return offset of the countable
+ * @flags: Used to setup kernel perf counters
+ *
+ * Try to place a countable in an available counter. If the countable is
+ * already in a counter, reference count the counter/countable pair resource
+ * and return success
+ */
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable, unsigned int *offset,
+ unsigned int flags)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i, empty = -1;
+
+ /* always clear return variables */
+ if (offset)
+ *offset = 0;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+
+ /*
+ * Check if the countable is already associated with a counter.
+ * Refcount and return the offset, otherwise, try and find an empty
+ * counter and assign the countable to it.
+ */
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == countable) {
+ /* Countable already associated with counter */
+ group->regs[i].refcount++;
+ group->regs[i].flags |= flags;
+ if (offset)
+ *offset = group->regs[i].offset;
+ return 0;
+ } else if (group->regs[i].countable ==
+ KGSL_PERFCOUNTER_NOT_USED) {
+ /* keep track of unused counter */
+ empty = i;
+ }
+ }
+
+ /* no available counters, so do nothing else */
+ if (empty == -1)
+ return -EBUSY;
+
+ /* initialize the new counter */
+ group->regs[empty].countable = countable;
+ group->regs[empty].refcount = 1;
+
+ /* enable the new counter */
+ adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty,
+ countable);
+
+ group->regs[empty].flags = flags;
+
+ if (offset)
+ *offset = group->regs[empty].offset;
+
+ return 0;
+}
+
+
+/**
+ * adreno_perfcounter_put: Release a countable from counter resource
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be freed from a counter
+ *
+ * Put a performance counter/countable pair that was previously received. If
+ * noone else is using the countable, free up the counter for others.
+ */
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+
+ unsigned int i;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == countable) {
+ if (group->regs[i].refcount > 0) {
+ group->regs[i].refcount--;
+
+ /*
+ * book keeping to ensure we never free a
+ * perf counter used by kernel
+ */
+ if (group->regs[i].flags &&
+ group->regs[i].refcount == 0)
+ group->regs[i].refcount++;
+
+ /* make available if not used */
+ if (group->regs[i].refcount == 0)
+ group->regs[i].countable =
+ KGSL_PERFCOUNTER_NOT_USED;
+ }
+
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
{
irqreturn_t result;
@@ -1266,6 +1573,12 @@
rb->global_ts = 0;
+ /* Assign correct RBBM status register to hang detect regs
+ */
+ ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
+
+ adreno_perfcounter_init(device);
+
/* Power down the device */
kgsl_pwrctrl_disable(device);
@@ -1341,6 +1654,8 @@
if (KGSL_STATE_DUMP_AND_FT != device->state)
mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
+ adreno_perfcounter_start(adreno_dev);
+
device->reset_counter++;
return 0;
@@ -3169,30 +3484,58 @@
static long adreno_ioctl(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
+ struct kgsl_device *device = dev_priv->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
int result = 0;
- struct kgsl_drawctxt_set_bin_base_offset *binbase;
- struct kgsl_context *context;
switch (cmd) {
- case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET:
+ case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET: {
+ struct kgsl_drawctxt_set_bin_base_offset *binbase = data;
+ struct kgsl_context *context;
+
binbase = data;
context = kgsl_context_get_owner(dev_priv,
binbase->drawctxt_id);
if (context) {
adreno_drawctxt_set_bin_base_offset(
- dev_priv->device, context, binbase->offset);
+ device, context, binbase->offset);
} else {
result = -EINVAL;
- KGSL_DRV_ERR(dev_priv->device,
+ KGSL_DRV_ERR(device,
"invalid drawctxt drawctxt_id %d "
"device_id=%d\n",
- binbase->drawctxt_id, dev_priv->device->id);
+ binbase->drawctxt_id, device->id);
}
kgsl_context_put(context);
break;
-
+ }
+ case IOCTL_KGSL_PERFCOUNTER_GET: {
+ struct kgsl_perfcounter_get *get = data;
+ result = adreno_perfcounter_get(adreno_dev, get->groupid,
+ get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_PUT: {
+ struct kgsl_perfcounter_put *put = data;
+ result = adreno_perfcounter_put(adreno_dev, put->groupid,
+ put->countable);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_QUERY: {
+ struct kgsl_perfcounter_query *query = data;
+ result = adreno_perfcounter_query_group(adreno_dev,
+ query->groupid, query->countables,
+ query->count, &query->max_counters);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_READ: {
+ struct kgsl_perfcounter_read *read = data;
+ result = adreno_perfcounter_read_group(adreno_dev,
+ read->reads, read->count);
+ break;
+ }
default:
KGSL_DRV_INFO(dev_priv->device,
"invalid ioctl code %08x\n", cmd);
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 1e22be9..1966078 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -116,6 +116,44 @@
unsigned int gpu_cycles;
};
+#define PERFCOUNTER_FLAG_NONE 0x0
+#define PERFCOUNTER_FLAG_KERNEL 0x1
+
+/* Structs to maintain the list of active performance counters */
+
+/**
+ * struct adreno_perfcount_register: register state
+ * @countable: countable the register holds
+ * @refcount: number of users of the register
+ * @offset: register hardware offset
+ */
+struct adreno_perfcount_register {
+ unsigned int countable;
+ unsigned int refcount;
+ unsigned int offset;
+ unsigned int flags;
+};
+
+/**
+ * struct adreno_perfcount_group: registers for a hardware group
+ * @regs: available registers for this group
+ * @reg_count: total registers for this group
+ */
+struct adreno_perfcount_group {
+ struct adreno_perfcount_register *regs;
+ unsigned int reg_count;
+};
+
+/**
+ * adreno_perfcounts: all available perfcounter groups
+ * @groups: available groups for this device
+ * @group_count: total groups for this device
+ */
+struct adreno_perfcounters {
+ struct adreno_perfcount_group *groups;
+ unsigned int group_count;
+};
+
struct adreno_gpudev {
/*
* These registers are in a different location on A3XX, so define
@@ -127,6 +165,8 @@
/* keeps track of when we need to execute the draw workaround code */
int ctx_switches_since_last_draw;
+ struct adreno_perfcounters *perfcounters;
+
/* GPU specific function hooks */
int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
@@ -138,8 +178,14 @@
unsigned int (*irq_pending)(struct adreno_device *);
void * (*snapshot)(struct adreno_device *, void *, int *, int);
int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
+ void (*perfcounter_init)(struct adreno_device *);
void (*start)(struct adreno_device *);
unsigned int (*busy_cycles)(struct adreno_device *);
+ void (*perfcounter_enable)(struct adreno_device *, unsigned int group,
+ unsigned int counter, unsigned int countable);
+ uint64_t (*perfcounter_read)(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter,
+ unsigned int offset);
};
/*
@@ -255,6 +301,13 @@
unsigned int adreno_ft_detect(struct kgsl_device *device,
unsigned int *prev_reg_val);
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable, unsigned int *offset,
+ unsigned int flags);
+
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable);
+
static inline int adreno_is_a200(struct adreno_device *adreno_dev)
{
return (adreno_dev->gpurev == ADRENO_REV_A200);
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 6d29e68..61c8664f 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2614,6 +2614,213 @@
queue_work(device->work_queue, &device->ts_expired_ws);
}
+/**
+ * struct a3xx_perfcounter_register - Define a performance counter register
+ * @load_bit: the bit to set in RBBM_LOAD_CMD0/RBBM_LOAD_CMD1 to force the RBBM
+ * to load the reset value into the appropriate counter
+ * @select: The dword offset of the register to write the selected
+ * countable into
+ */
+
+struct a3xx_perfcounter_register {
+ unsigned int load_bit;
+ unsigned int select;
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_cp[] = {
+ { 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rbbm[] = {
+ { 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+ { 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_pc[] = {
+ { 3, A3XX_PC_PERFCOUNTER0_SELECT },
+ { 4, A3XX_PC_PERFCOUNTER1_SELECT },
+ { 5, A3XX_PC_PERFCOUNTER2_SELECT },
+ { 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vfd[] = {
+ { 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+ { 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_hlsq[] = {
+ { 9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
+ { 10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
+ { 11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
+ { 12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
+ { 13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
+ { 14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vpc[] = {
+ { 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+ { 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tse[] = {
+ { 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+ { 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_ras[] = {
+ { 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+ { 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_uche[] = {
+ { 21, A3XX_UCHE_PERFCOUNTER0_SELECT },
+ { 22, A3XX_UCHE_PERFCOUNTER1_SELECT },
+ { 23, A3XX_UCHE_PERFCOUNTER2_SELECT },
+ { 24, A3XX_UCHE_PERFCOUNTER3_SELECT },
+ { 25, A3XX_UCHE_PERFCOUNTER4_SELECT },
+ { 26, A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tp[] = {
+ { 27, A3XX_TP_PERFCOUNTER0_SELECT },
+ { 28, A3XX_TP_PERFCOUNTER1_SELECT },
+ { 29, A3XX_TP_PERFCOUNTER2_SELECT },
+ { 30, A3XX_TP_PERFCOUNTER3_SELECT },
+ { 31, A3XX_TP_PERFCOUNTER4_SELECT },
+ { 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_sp[] = {
+ { 33, A3XX_SP_PERFCOUNTER0_SELECT },
+ { 34, A3XX_SP_PERFCOUNTER1_SELECT },
+ { 35, A3XX_SP_PERFCOUNTER2_SELECT },
+ { 36, A3XX_SP_PERFCOUNTER3_SELECT },
+ { 37, A3XX_SP_PERFCOUNTER4_SELECT },
+ { 38, A3XX_SP_PERFCOUNTER5_SELECT },
+ { 39, A3XX_SP_PERFCOUNTER6_SELECT },
+ { 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rb[] = {
+ { 41, A3XX_RB_PERFCOUNTER0_SELECT },
+ { 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+#define REGCOUNTER_GROUP(_x) { (_x), ARRAY_SIZE((_x)) }
+
+static struct {
+ struct a3xx_perfcounter_register *regs;
+ int count;
+} a3xx_perfcounter_reglist[] = {
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_cp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_rbbm),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_pc),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_vfd),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_hlsq),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_vpc),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_tse),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_ras),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_uche),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_tp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_sp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_rb),
+};
+
+static void a3xx_perfcounter_enable_pwr(struct kgsl_device *device,
+ unsigned int countable)
+{
+ unsigned int in, out;
+
+ adreno_regread(device, A3XX_RBBM_RBBM_CTL, &in);
+
+ if (countable == 0)
+ out = in | RBBM_RBBM_CTL_RESET_PWR_CTR0;
+ else
+ out = in | RBBM_RBBM_CTL_RESET_PWR_CTR1;
+
+ adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
+
+ if (countable == 0)
+ out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR0;
+ else
+ out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+
+ adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
+
+ return;
+}
+
+/*
+ * a3xx_perfcounter_enable - Configure a performance counter for a countable
+ * @adreno_dev - Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ */
+
+static void a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter, unsigned int countable)
+{
+ struct kgsl_device *device = &adreno_dev->dev;
+ unsigned int val = 0;
+ struct a3xx_perfcounter_register *reg;
+
+ if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
+ return;
+
+ if (counter > a3xx_perfcounter_reglist[group].count)
+ return;
+
+ /* Special case - power */
+ if (group == KGSL_PERFCOUNTER_GROUP_PWR)
+ return a3xx_perfcounter_enable_pwr(device, countable);
+
+ reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+
+ /* Select the desired perfcounter */
+ adreno_regwrite(device, reg->select, countable);
+
+ if (reg->load_bit < 32) {
+ val = 1 << reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val);
+ } else {
+ val = 1 << (reg->load_bit - 32);
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val);
+ }
+}
+
+static uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter,
+ unsigned int offset)
+{
+ struct kgsl_device *device = &adreno_dev->dev;
+ struct a3xx_perfcounter_register *reg = NULL;
+ unsigned int lo = 0, hi = 0;
+ unsigned int val;
+
+ if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
+ return 0;
+
+ reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+
+ /* Freeze the counter */
+ adreno_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
+ val &= ~reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+ /* Read the values */
+ adreno_regread(device, offset, &lo);
+ adreno_regread(device, offset + 1, &hi);
+
+ /* Re-Enable the counter */
+ val |= reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+ return (((uint64_t) hi) << 32) | lo;
+}
+
#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
#define A3XX_INT_MASK \
@@ -2839,12 +3046,45 @@
{ adreno_is_a330, a330_vbif },
};
+static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+ /*
+ * Set SP to count SP_ALU_ACTIVE_CYCLES, it includes
+ * all ALU instruction execution regardless precision or shader ID.
+ * Set SP to count SP0_ICL1_MISSES, It counts
+ * USP L1 instruction miss request.
+ * Set SP to count SP_FS_FULL_ALU_INSTRUCTIONS, it
+ * counts USP flow control instruction execution.
+ * we will use this to augment our hang detection
+ */
+ if (adreno_dev->fast_hang_detect) {
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[7] = ft_detect_regs[6] + 1;
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP0_ICL1_MISSES, &ft_detect_regs[8],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[9] = ft_detect_regs[8] + 1;
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[11] = ft_detect_regs[10] + 1;
+ }
+
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL);
+
+ /* Reserve and start countable 1 in the PWR perfcounter group */
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+ NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
static void a3xx_start(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
struct a3xx_vbif_data *vbif = NULL;
int i;
- unsigned int reg;
for (i = 0; i < ARRAY_SIZE(a3xx_vbif_platforms); i++) {
if (a3xx_vbif_platforms[i].devfunc(adreno_dev)) {
@@ -2907,39 +3147,121 @@
/* Turn on performance counters */
adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
- /*
- * Set SP perfcounter 5 to count SP_ALU_ACTIVE_CYCLES, it includes
- * all ALU instruction execution regardless precision or shader ID.
- * Set SP perfcounter 6 to count SP0_ICL1_MISSES, It counts
- * USP L1 instruction miss request.
- * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS, it
- * counts USP flow control instruction execution.
- * we will use this to augment our hang detection
- */
- if (adreno_dev->fast_hang_detect) {
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER5_SELECT,
- SP_ALU_ACTIVE_CYCLES);
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER6_SELECT,
- SP0_ICL1_MISSES);
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
- SP_FS_CFLOW_INSTRUCTIONS);
- }
-
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
- SP_FS_FULL_ALU_INSTRUCTIONS);
-
/* Turn on the GPU busy counter and let it run free */
adreno_dev->gpu_cycles = 0;
-
- adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®);
- reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
- adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
- reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
- reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
- adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
}
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_CP_0_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_3_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_5_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_6_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_7_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_1_LO, 0 },
+};
+
+static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
+ { a3xx_perfcounters_cp, ARRAY_SIZE(a3xx_perfcounters_cp) },
+ { a3xx_perfcounters_rbbm, ARRAY_SIZE(a3xx_perfcounters_rbbm) },
+ { a3xx_perfcounters_pc, ARRAY_SIZE(a3xx_perfcounters_pc) },
+ { a3xx_perfcounters_vfd, ARRAY_SIZE(a3xx_perfcounters_vfd) },
+ { a3xx_perfcounters_hlsq, ARRAY_SIZE(a3xx_perfcounters_hlsq) },
+ { a3xx_perfcounters_vpc, ARRAY_SIZE(a3xx_perfcounters_vpc) },
+ { a3xx_perfcounters_tse, ARRAY_SIZE(a3xx_perfcounters_tse) },
+ { a3xx_perfcounters_ras, ARRAY_SIZE(a3xx_perfcounters_ras) },
+ { a3xx_perfcounters_uche, ARRAY_SIZE(a3xx_perfcounters_uche) },
+ { a3xx_perfcounters_tp, ARRAY_SIZE(a3xx_perfcounters_tp) },
+ { a3xx_perfcounters_sp, ARRAY_SIZE(a3xx_perfcounters_sp) },
+ { a3xx_perfcounters_rb, ARRAY_SIZE(a3xx_perfcounters_rb) },
+ { a3xx_perfcounters_pwr, ARRAY_SIZE(a3xx_perfcounters_pwr) },
+};
+
+static struct adreno_perfcounters a3xx_perfcounters = {
+ a3xx_perfcounter_groups,
+ ARRAY_SIZE(a3xx_perfcounter_groups),
+};
+
/* Defined in adreno_a3xx_snapshot.c */
void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang);
@@ -2948,16 +3270,20 @@
.reg_rbbm_status = A3XX_RBBM_STATUS,
.reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
.reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
+ .perfcounters = &a3xx_perfcounters,
.ctxt_create = a3xx_drawctxt_create,
.ctxt_save = a3xx_drawctxt_save,
.ctxt_restore = a3xx_drawctxt_restore,
.ctxt_draw_workaround = NULL,
.rb_init = a3xx_rb_init,
+ .perfcounter_init = a3xx_perfcounter_init,
.irq_control = a3xx_irq_control,
.irq_handler = a3xx_irq_handler,
.irq_pending = a3xx_irq_pending,
.busy_cycles = a3xx_busy_cycles,
.start = a3xx_start,
.snapshot = a3xx_snapshot,
+ .perfcounter_enable = a3xx_perfcounter_enable,
+ .perfcounter_read = a3xx_perfcounter_read,
};
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index b2bae10..e19f338 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -209,6 +209,26 @@
unsigned int dev_minor;
};
+/* Performance counter groups */
+
+#define KGSL_PERFCOUNTER_GROUP_CP 0x0
+#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1
+#define KGSL_PERFCOUNTER_GROUP_PC 0x2
+#define KGSL_PERFCOUNTER_GROUP_VFD 0x3
+#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4
+#define KGSL_PERFCOUNTER_GROUP_VPC 0x5
+#define KGSL_PERFCOUNTER_GROUP_TSE 0x6
+#define KGSL_PERFCOUNTER_GROUP_RAS 0x7
+#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8
+#define KGSL_PERFCOUNTER_GROUP_TP 0x9
+#define KGSL_PERFCOUNTER_GROUP_SP 0xA
+#define KGSL_PERFCOUNTER_GROUP_RB 0xB
+#define KGSL_PERFCOUNTER_GROUP_PWR 0xC
+#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD
+#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE
+
+#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF
+
/* structure holds list of ibs */
struct kgsl_ibdesc {
unsigned int gpuaddr;
@@ -659,6 +679,110 @@
#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \
_IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache)
+/**
+ * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET
+ * @groupid: Performance counter group ID
+ * @countable: Countable to select within the group
+ * @offset: Return offset of the reserved counter
+ *
+ * Get an available performance counter from a specified groupid. The offset
+ * of the performance counter will be returned after successfully assigning
+ * the countable to the counter for the specified group. An error will be
+ * returned and an offset of 0 if the groupid is invalid or there are no
+ * more counters left. After successfully getting a perfcounter, the user
+ * must call kgsl_perfcounter_put(groupid, contable) when finished with
+ * the perfcounter to clear up perfcounter resources.
+ *
+ */
+struct kgsl_perfcounter_get {
+ unsigned int groupid;
+ unsigned int countable;
+ unsigned int offset;
+/* private: reserved for future use */
+ unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_GET \
+ _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get)
+
+/**
+ * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT
+ * @groupid: Performance counter group ID
+ * @countable: Countable to release within the group
+ *
+ * Put an allocated performance counter to allow others to have access to the
+ * resource that was previously taken. This is only to be called after
+ * successfully getting a performance counter from kgsl_perfcounter_get().
+ *
+ */
+struct kgsl_perfcounter_put {
+ unsigned int groupid;
+ unsigned int countable;
+/* private: reserved for future use */
+ unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_PUT \
+ _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put)
+
+/**
+ * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY
+ * @groupid: Performance counter group ID
+ * @countable: Return active countables array
+ * @size: Size of active countables array
+ * @max_counters: Return total number counters for the group ID
+ *
+ * Query the available performance counters given a groupid. The array
+ * *countables is used to return the current active countables in counters.
+ * The size of the array is passed in so the kernel will only write at most
+ * size or counter->size for the group id. The total number of available
+ * counters for the group ID is returned in max_counters.
+ * If the array or size passed in are invalid, then only the maximum number
+ * of counters will be returned, no data will be written to *countables.
+ * If the groupid is invalid an error code will be returned.
+ *
+ */
+struct kgsl_perfcounter_query {
+ unsigned int groupid;
+ /* Array to return the current countable for up to size counters */
+ unsigned int *countables;
+ unsigned int count;
+ unsigned int max_counters;
+/* private: reserved for future use */
+ unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_QUERY \
+ _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query)
+
+/**
+ * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY
+ * @groupid: Performance counter group IDs
+ * @countable: Performance counter countable IDs
+ * @value: Return performance counter reads
+ * @size: Size of all arrays (groupid/countable pair and return value)
+ *
+ * Read in the current value of a performance counter given by the groupid
+ * and countable.
+ *
+ */
+
+struct kgsl_perfcounter_read_group {
+ unsigned int groupid;
+ unsigned int countable;
+ uint64_t value;
+};
+
+struct kgsl_perfcounter_read {
+ struct kgsl_perfcounter_read_group *reads;
+ unsigned int count;
+/* private: reserved for future use */
+ unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_READ \
+ _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read)
+
#ifdef __KERNEL__
#ifdef CONFIG_MSM_KGSL_DRM
int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,