Execute shader on wakeup from power collapse
After wakeup from power-collapse, a shader execution is needed
to reset the GPU to a known and stable state on certain targets.
This change implements the required shader execution only
on initialization and on recovery from power-collapse on A3xx
GPU targets.
CRs-fixed: 526544
Change-Id: Ibd33578ecbb94df5ff276d1e460d94235a82dc43
Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
Signed-off-by: Ananta Kishore K <akollipa@codeaurora.org>
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index 21d4759..f81fc67 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -231,6 +231,7 @@
#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81
#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
@@ -268,6 +269,10 @@
#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
+#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1
+#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7
+#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0
#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
@@ -299,6 +304,9 @@
#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049
+#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A
+#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B
#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
@@ -314,30 +322,75 @@
#define A3XX_RB_MODE_CONTROL 0x20C0
#define A3XX_RB_RENDER_CONTROL 0x20C1
#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_ALPHA_REFERENCE 0x20C3
#define A3XX_RB_MRT_CONTROL0 0x20C4
#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BUF_BASE0 0x20C6
#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_CONTROL1 0x20C8
+#define A3XX_RB_MRT_BUF_INFO1 0x20C9
+#define A3XX_RB_MRT_BUF_BASE1 0x20CA
#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_CONTROL2 0x20CC
+#define A3XX_RB_MRT_BUF_INFO2 0x20CD
+#define A3XX_RB_MRT_BUF_BASE2 0x20CE
#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_CONTROL3 0x20D0
+#define A3XX_RB_MRT_BUF_INFO3 0x20D1
+#define A3XX_RB_MRT_BUF_BASE3 0x20D2
#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_BLEND_GREEN 0x20E5
+#define A3XX_RB_BLEND_BLUE 0x20E6
+#define A3XX_RB_BLEND_ALPHA 0x20E7
+#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8
+#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9
+#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA
+#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB
#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_BASE 0x20ED
+#define A3XX_RB_COPY_DEST_PITCH 0x20EE
#define A3XX_RB_COPY_DEST_INFO 0x20EF
#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_DEPTH_CLEAR 0x2101
+#define A3XX_RB_DEPTH_BUF_INFO 0x2102
+#define A3XX_RB_DEPTH_BUF_PITCH 0x2103
#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_RB_STENCIL_CLEAR 0x2105
+#define A3XX_RB_STENCIL_BUF_INFO 0x2106
+#define A3XX_RB_STENCIL_BUF_PITCH 0x2107
+#define A3XX_RB_STENCIL_REF_MASK 0x2108
+#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109
+#define A3XX_RB_LRZ_VSC_CONTROL 0x210C
+#define A3XX_RB_WINDOW_OFFSET 0x210E
+#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110
+#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111
+#define A3XX_RB_Z_CLAMP_MIN 0x2114
+#define A3XX_RB_Z_CLAMP_MAX 0x2115
#define A3XX_PC_VSTREAM_CONTROL 0x21E4
#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA
#define A3XX_PC_PRIM_VTX_CNTL 0x21EC
#define A3XX_PC_RESTART_INDEX 0x21ED
#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_CONTROL_1_REG 0x2201
+#define A3XX_HLSQ_CONTROL_2_REG 0x2202
+#define A3XX_HLSQ_CONTROL_3_REG 0x2203
#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_FS_CONTROL_REG 0x2205
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B
#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D
+#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E
+#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F
+#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210
#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216
#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
#define A3XX_VFD_CONTROL_0 0x2240
@@ -354,10 +407,21 @@
#define A3XX_SP_VS_CTRL_REG0 0x22C4
#define A3XX_SP_VS_CTRL_REG1 0x22C5
#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_0 0x22C7
+#define A3XX_SP_VS_OUT_REG_1 0x22C8
+#define A3XX_SP_VS_OUT_REG_2 0x22C9
+#define A3XX_SP_VS_OUT_REG_3 0x22CA
+#define A3XX_SP_VS_OUT_REG_4 0x22CB
+#define A3XX_SP_VS_OUT_REG_5 0x22CC
+#define A3XX_SP_VS_OUT_REG_6 0x22CD
#define A3XX_SP_VS_OUT_REG_7 0x22CE
#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1
+#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2
+#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3
#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
#define A3XX_SP_VS_OBJ_START_REG 0x22D5
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6
#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
#define A3XX_SP_VS_LENGTH_REG 0x22DF
@@ -365,13 +429,19 @@
#define A3XX_SP_FS_CTRL_REG1 0x22E1
#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
#define A3XX_SP_FS_OBJ_START_REG 0x22E3
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4
#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
#define A3XX_SP_FS_OUTPUT_REG 0x22EC
#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_MRT_REG_1 0x22F1
+#define A3XX_SP_FS_MRT_REG_2 0x22F2
+#define A3XX_SP_FS_MRT_REG_3 0x22F3
#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6
#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
#define A3XX_SP_FS_LENGTH_REG 0x22FF
#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index cf8970f..bb7de19 100755
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -572,13 +572,15 @@
kgsl_mmu_unmap(pagetable, &device->memstore);
+ kgsl_mmu_unmap(pagetable, &adreno_dev->pwron_fixup);
+
kgsl_mmu_unmap(pagetable, &device->mmu.setstate_memory);
}
static int adreno_setup_pt(struct kgsl_device *device,
struct kgsl_pagetable *pagetable)
{
- int result = 0;
+ int result;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
@@ -594,6 +596,10 @@
if (result)
goto unmap_memptrs_desc;
+ result = kgsl_mmu_map_global(pagetable, &adreno_dev->pwron_fixup);
+ if (result)
+ goto unmap_pwron_fixup_desc;
+
result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory);
if (result)
goto unmap_memstore_desc;
@@ -622,6 +628,9 @@
unmap_memstore_desc:
kgsl_mmu_unmap(pagetable, &device->memstore);
+unmap_pwron_fixup_desc:
+ kgsl_mmu_unmap(pagetable, &adreno_dev->pwron_fixup);
+
unmap_memptrs_desc:
kgsl_mmu_unmap(pagetable, &rb->memptrs_desc);
@@ -1654,6 +1663,15 @@
kgsl_pwrctrl_enable(device);
/* Set up a2xx special case */
+
+ /* Certain targets need the fixup. You know who you are */
+ if (adreno_is_a305(adreno_dev) || adreno_is_a320(adreno_dev))
+ adreno_a3xx_pwron_fixup_init(adreno_dev);
+
+ /* Set the bit to indicate that we've just powered on */
+ set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv);
+
+ /* Set up the MMU */
if (adreno_is_a2xx(adreno_dev)) {
/*
* the MH_CLNT_INTF_CTRL_CONFIG registers aren't present
@@ -3241,6 +3259,9 @@
if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size))
return &device->memstore;
+ if (kgsl_gpuaddr_in_memdesc(&adreno_dev->pwron_fixup, gpuaddr, size))
+ return &adreno_dev->pwron_fixup;
+
if (kgsl_gpuaddr_in_memdesc(&device->mmu.setstate_memory, gpuaddr,
size))
return &device->mmu.setstate_memory;
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 79ea40c..1444851 100755
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -35,6 +35,7 @@
#define KGSL_CMD_FLAGS_PMODE 0x00000001
#define KGSL_CMD_FLAGS_INTERNAL_ISSUE 0x00000002
#define KGSL_CMD_FLAGS_GET_INT 0x00000004
+#define KGSL_CMD_FLAGS_PWRON_FIXUP 0x00000008
#define KGSL_CMD_FLAGS_EOF 0x00000100
/* Command identifiers */
@@ -45,6 +46,7 @@
#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD
#define KGSL_END_OF_FRAME_IDENTIFIER 0x2E0F2E0F
#define KGSL_NOP_IB_IDENTIFIER 0x20F20F20
+#define KGSL_PWRON_FIXUP_IDENTIFIER 0x2AFAFAFA
#ifdef CONFIG_MSM_SCM
#define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz)
@@ -80,6 +82,7 @@
struct adreno_device {
struct kgsl_device dev; /* Must be first field in this struct */
+ unsigned long priv;
unsigned int chip_id;
enum adreno_gpurev gpurev;
unsigned long gmem_base;
@@ -115,6 +118,8 @@
struct kgsl_memdesc on_resume_cmd;
unsigned int on_resume_ib[3];
bool on_resume_issueib;
+ struct kgsl_memdesc pwron_fixup;
+ unsigned int pwron_fixup_dwords;
};
#define PERFCOUNTER_FLAG_NONE 0x0
@@ -155,6 +160,17 @@
unsigned int group_count;
};
+/**
+ * enum adreno_device_flags - Private flags for the adreno_device
+ * @ADRENO_DEVICE_PWRON - Set during init after a power collapse
+ * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup
+ * after power collapse
+ */
+enum adreno_device_flags {
+ ADRENO_DEVICE_PWRON = 0,
+ ADRENO_DEVICE_PWRON_FIXUP = 1,
+};
+
struct adreno_gpudev {
/*
* These registers are in a different location on A3XX, so define
@@ -313,6 +329,8 @@
int adreno_ft_init_sysfs(struct kgsl_device *device);
void adreno_ft_uninit_sysfs(struct kgsl_device *device);
+int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev);
+
static inline int adreno_is_a200(struct adreno_device *adreno_dev)
{
return (adreno_dev->gpurev == ADRENO_REV_A200);
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index aa0f614..8db9524 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2507,6 +2507,270 @@
}
}
+static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = {
+ 0x00000000, 0x10000400, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x03000000,
+};
+
+/**
+ * adreno_a3xx_pwron_fixup_init() - Initalize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * A3xx targets require a CL Exec after recovery from power-collapse.
+ * Construct the IB once at init time and keep it handy.
+ *
+ * Returns: 0 on success or negative on error
+ */
+int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev)
+{
+ unsigned int *cmds;
+ int count = sizeof(_a3xx_pwron_fixup_fs_instructions) >> 2;
+ int ret;
+ /* Return if the fixup is already in place */
+ if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+ return 0;
+
+ ret = kgsl_allocate_contiguous(&adreno_dev->pwron_fixup, PAGE_SIZE);
+
+ if (ret)
+ return ret;
+ adreno_dev->pwron_fixup.flags |= KGSL_MEMFLAGS_GPUREADONLY;
+ cmds = adreno_dev->pwron_fixup.hostptr;
+
+ *cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ *cmds++ = 0x00000000;
+ *cmds++ = 0x90000000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+ *cmds++ = A3XX_RBBM_CLOCK_CTL;
+ *cmds++ = 0xFFFCFFFF;
+ *cmds++ = 0x00010000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+ *cmds++ = 0x1E000150;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+ *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG) | (0x1 << 30);
+ *cmds++ = 0x1E000150;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+ *cmds++ = 0x1E000150;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1);
+ *cmds++ = 0x00000040;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1);
+ *cmds++ = 0x80000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1);
+ *cmds++ = 0x00001002 | (count >> 3) << 24;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1);
+ *cmds++ = 0x00401101;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1);
+ *cmds++ = 0x00000400;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1);
+ *cmds++ = 0x00000010;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1);
+ *cmds++ = 0x00040000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
+ *cmds++ = 0x0000000A;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1);
+ *cmds++ = 0x00000001;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1);
+ *cmds++ = 0x00000004;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1);
+ *cmds++ = 0x04008001;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
+ *cmds++ = 0x00B0400A | (count >> 3) << 24;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1);
+ *cmds++ = 0x00300402;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+ *cmds++ = 0x00010000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1);
+ *cmds++ = 0x04008001;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1);
+ *cmds++ = count >> 3;
+ *cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1);
+ *cmds++ = 0x00008000;
+ *cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1);
+ *cmds++ = 0x00000000;
+
+ *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+
+ *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+ *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+ ((count >> 3) << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+ *cmds++ = 0x00000000;
+ memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2);
+ cmds += count;
+
+ *cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_nop_packet(1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+ *cmds++ = 0x1E000150;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+ *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+ *cmds++ = 0x1E000050;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+ *cmds++ = A3XX_RBBM_CLOCK_CTL;
+ *cmds++ = 0xFFFCFFFF;
+ *cmds++ = 0x00000000;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+
+ /*
+ * Remember the number of dwords in the command buffer for when we
+ * program the indirect buffer call in the ringbuffer
+ */
+ adreno_dev->pwron_fixup_dwords =
+ (cmds - (unsigned int *)adreno_dev->pwron_fixup.hostptr);
+
+ /* Mark the flag in ->priv to show that we have the fix */
+ set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+ return 0;
+}
+
#define QUAD_RESTORE_LEN 14
static unsigned int gmem_restore_quad[QUAD_RESTORE_LEN] = {
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index a3fa312..e6ec91d 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -177,6 +177,8 @@
/* Load a buffer with pre-fetch enabled */
#define CP_INDIRECT_BUFFER_PFE 0x3F
+#define CP_EXEC_CL 0x31
+
#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000
#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010
#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index bc7a5c2..c4d12ec 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -584,6 +584,10 @@
if (flags & KGSL_CMD_FLAGS_EOF)
total_sizedwords += 2;
+ /* Add space for the power on shader fixup if we need it */
+ if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
+ total_sizedwords += 5;
+
ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
if (!ringcmds)
return -ENOSPC;
@@ -591,6 +595,18 @@
rcmd_gpu = rb->buffer_desc.gpuaddr
+ sizeof(uint)*(rb->wptr-total_sizedwords);
+ if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ KGSL_PWRON_FIXUP_IDENTIFIER);
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ CP_HDR_INDIRECT_BUFFER_PFD);
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ adreno_dev->pwron_fixup.gpuaddr);
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ adreno_dev->pwron_fixup_dwords);
+ }
+
GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
@@ -1069,9 +1085,20 @@
} else
drawctxt->timestamp++;
+ /*
+ * For some targets, we need to execute a dummy shader operation after a
+ * power collapse
+ */
+
+ if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
+ test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+ {
+ flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
+ }
+
ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
drawctxt,
- (flags & KGSL_CMD_FLAGS_EOF),
+ flags,
&link[0], (cmds - link));
if (ret)
goto done;