blob: 1cef0762083efecbabe7b5a2d5e4c308a6bec89a [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Steve Mucklef132c6c2012-06-06 18:30:57 -070015#include <linux/sched.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053016#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070017
18#include "kgsl.h"
19#include "adreno.h"
20#include "kgsl_sharedmem.h"
21#include "kgsl_cffdump.h"
22#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060023#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070024
Jordan Crouse0c2761a2012-02-01 22:11:12 -070025/*
26 * Set of registers to dump for A3XX on postmortem and snapshot.
27 * Registers in pairs - first value is the start offset, second
28 * is the stop offset (inclusive)
29 */
30
31const unsigned int a3xx_registers[] = {
32 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
33 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
34 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
35 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
36 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
37 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
38 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
39 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070040 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070041 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
42 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
43 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
44 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
45 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
46 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
47 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
48 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
49 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
50 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
51 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
52 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
53 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
54 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
55 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
56 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
57 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
58 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
59 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
60 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
61 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
62 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
63 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
64 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070065 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
66 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
67 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070068};
69
70const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
71
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070072/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
73 * functions.
74 */
75
76#define _SET(_shift, _val) ((_val) << (_shift))
77
78/*
79 ****************************************************************************
80 *
81 * Context state shadow structure:
82 *
83 * +---------------------+------------+-------------+---------------------+---+
84 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
85 * +---------------------+------------+-------------+---------------------+---+
86 *
87 * 8K - ALU Constant Shadow (8K aligned)
88 * 4K - H/W Register Shadow (8K aligned)
89 * 5K - Command and Vertex Buffers
90 * 8K - Shader Instruction Shadow
91 * ~6K - Texture Constant Shadow
92 *
93 *
94 ***************************************************************************
95 */
96
97/* Sizes of all sections in state shadow memory */
98#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
99#define REG_SHADOW_SIZE (4*1024) /* 4KB */
100#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
101#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
102#define TEX_SIZE_MIPMAP 1936 /* bytes */
103#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
104#define TEX_SHADOW_SIZE \
105 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
106 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
107#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
108
109/* Total context size, excluding GMEM shadow */
110#define CONTEXT_SIZE \
111 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
112 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
113 TEX_SHADOW_SIZE)
114
115/* Offsets to different sections in context shadow memory */
116#define REG_OFFSET ALU_SHADOW_SIZE
117#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
118#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
119#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
120#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
121#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
122#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
123#define FS_TEX_OFFSET_MEM_OBJECTS \
124 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
125#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
126#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
127
128/* The offset for fragment shader data in HLSQ context */
129#define SSIZE (16*1024)
130
131#define HLSQ_SAMPLER_OFFSET 0x000
132#define HLSQ_MEMOBJ_OFFSET 0x400
133#define HLSQ_MIPMAP_OFFSET 0x800
134
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700135/* Use shadow RAM */
136#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700137
Jordan Croused0070882012-02-21 08:54:52 -0700138#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700261 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 start = cmd;
270
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700271 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
272 *cmd++ = 0;
273
274#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
275 /*
276 * Context registers are already shadowed; just need to
277 * disable shadowing to prevent corruption.
278 */
279
280 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
281 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
282 *cmd++ = 4 << 16; /* regs, start=0 */
283 *cmd++ = 0x0; /* count = 0 */
284
285#else
286 /*
287 * Make sure the HW context has the correct register values before
288 * reading them.
289 */
290
291 /* Write context registers into shadow */
292 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
293 unsigned int start = context_register_ranges[i * 2];
294 unsigned int end = context_register_ranges[i * 2 + 1];
295 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
296 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
297 start;
298 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
299 & 0xFFFFE000) + (start - 0x2000) * 4;
300 }
301#endif
302
303 /* Need to handle some of the global registers separately */
304 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
305 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
306 *cmd++ = global_registers[i];
307 *cmd++ = tmp_ctx.reg_values[i];
308 }
309
310 /* Save vertex shader constants */
311 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
314 *cmd++ = 0x0000FFFF;
315 *cmd++ = 3; /* EXEC_COUNT */
316 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
317 drawctxt->constant_save_commands[1].hostptr = cmd;
318 drawctxt->constant_save_commands[1].gpuaddr =
319 virt2gpu(cmd, &drawctxt->gpustate);
320 /*
321 From fixup:
322
323 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
324 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
325
326 From register spec:
327 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
328 */
329 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
330 /* ALU constant shadow base */
331 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
332
333 /* Save fragment shader constants */
334 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
337 *cmd++ = 0x0000FFFF;
338 *cmd++ = 3; /* EXEC_COUNT */
339 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
340 drawctxt->constant_save_commands[2].hostptr = cmd;
341 drawctxt->constant_save_commands[2].gpuaddr =
342 virt2gpu(cmd, &drawctxt->gpustate);
343 /*
344 From fixup:
345
346 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
347 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
348
349 From register spec:
350 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
351 */
352 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
353
354 /*
355 From fixup:
356
357 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
358 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
359
360 From register spec:
361 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
362 start offset in on chip RAM,
363 128bit aligned
364
365 dst = base + offset
366 Because of the base alignment we can use
367 dst = base | offset
368 */
369 *cmd++ = 0; /* dst */
370
371 /* Save VS texture memory objects */
372 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
373 *cmd++ =
374 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
375 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
376 *cmd++ =
377 (drawctxt->gpustate.gpuaddr +
378 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
379
380 /* Save VS texture mipmap pointers */
381 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
382 *cmd++ =
383 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
384 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
385 *cmd++ =
386 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
387
388 /* Save VS texture sampler objects */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
391 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
392 *cmd++ =
393 (drawctxt->gpustate.gpuaddr +
394 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
395
396 /* Save FS texture memory objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ =
399 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
400 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
401 *cmd++ =
402 (drawctxt->gpustate.gpuaddr +
403 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
404
405 /* Save FS texture mipmap pointers */
406 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
407 *cmd++ =
408 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
409 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
410 *cmd++ =
411 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
412
413 /* Save FS texture sampler objects */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr +
420 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
421
422 /* Create indirect buffer command for above command sequence */
423 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
424
425 tmp_ctx.cmd = cmd;
426}
427
428/* Copy GMEM contents to system memory shadow. */
429static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
430 struct adreno_context *drawctxt,
431 struct gmem_shadow_t *shadow)
432{
433 unsigned int *cmds = tmp_ctx.cmd;
434 unsigned int *start = cmds;
435
Jordan Crousefb3012f2012-06-22 13:11:05 -0600436 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
437 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
438
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700439 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
440 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
441
442 /* RB_MODE_CONTROL */
443 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
444 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
445 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
446 /* RB_RENDER_CONTROL */
447 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
448 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
449
450 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
451 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
452 /* RB_COPY_CONTROL */
453 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
454 RB_CLEAR_MODE_RESOLVE) |
455 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
456 tmp_ctx.gmem_base >> 14);
457 /* RB_COPY_DEST_BASE */
458 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
459 shadow->gmemshadow.gpuaddr >> 5);
460 /* RB_COPY_DEST_PITCH */
461 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
462 (shadow->pitch * 4) / 32);
463 /* RB_COPY_DEST_INFO */
464 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
465 RB_TILINGMODE_LINEAR) |
466 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
467 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
468 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
471 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
472 /* GRAS_SC_CONTROL */
473 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
474
475 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
476 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
477 /* VFD_CONTROL_0 */
478 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
479 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
480 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
481 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
482 /* VFD_CONTROL_1 */
483 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
484 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
485 _SET(VFD_CTRLREG1_REGID4INST, 252);
486
487 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
488 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
489 /* VFD_FETCH_INSTR_0_0 */
490 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
491 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
492 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
493 /* VFD_FETCH_INSTR_1_0 */
494 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
495 shadow->quad_vertices.gpuaddr);
496
497 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
498 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
499 /* VFD_DECODE_INSTR_0 */
500 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
501 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
502 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700503 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
504 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
505
506 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
507 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
508 /* HLSQ_CONTROL_0_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700509 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700510 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700511 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700513 /* HLSQ_CONTROL_1_REG */
514 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700515 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700516 /* HLSQ_CONTROL_2_REG */
517 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
518 /* HLSQ_CONTROL_3_REG */
519 *cmds++ = 0x00000000;
520
521 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
522 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
523 /* HLSQ_VS_CONTROL_REG */
524 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
525 /* HLSQ_FS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700527 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700528 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
529 /* HLSQ_CONST_VSPRESV_RANGE_REG */
530 *cmds++ = 0x00000000;
531 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
532 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
533 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
534
535 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
536 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
537 /* SP_FS_LENGTH_REG */
538 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
539
540 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
541 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
542 /* SP_SP_CTRL_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700543 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
544 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700545
546 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
547 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
548 /* SP_VS_CTRL_REG0 */
549 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
550 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
551 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700552 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700553 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
554 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
555 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
556 /* SP_VS_CTRL_REG1 */
557 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
558 /* SP_VS_PARAM_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700559 *cmds++ = _SET(SP_VSPARAMREG_PSIZEREGID, 252);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700560 /* SP_VS_OUT_REG_0 */
561 *cmds++ = 0x00000000;
562 /* SP_VS_OUT_REG_1 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_2 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_3 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_4 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_5 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_6 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_7 */
575 *cmds++ = 0x00000000;
576
577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
578 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
579 /* SP_VS_VPC_DST_REG_0 */
580 *cmds++ = 0x00000000;
581 /* SP_VS_VPC_DST_REG_1 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_2 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_3 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_OBJ_OFFSET_REG */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_START_REG */
590 *cmds++ = 0x00000000;
591
592 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
593 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
594 /* SP_VS_LENGTH_REG */
595 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
596 /* SP_FS_CTRL_REG0 */
597 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
598 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
599 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700600 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700601 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700602 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700603 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
604 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
605 /* SP_FS_CTRL_REG1 */
606 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700607 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
608 /* SP_FS_OBJ_OFFSET_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700609 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
610 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 127);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700611 /* SP_FS_OBJ_START_REG */
612 *cmds++ = 0x00000000;
613
614 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
615 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
616 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
617 *cmds++ = 0x00000000;
618 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
619 *cmds++ = 0x00000000;
620
621 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
622 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
623 /* SP_FS_OUTPUT_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700624 *cmds++ = _SET(SP_IMAGEOUTPUTREG_DEPTHOUTMODE, SP_PIXEL_BASED);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700625
626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
627 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
628 /* SP_FS_MRT_REG_0 */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700629 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
630
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700631 /* SP_FS_MRT_REG_1 */
632 *cmds++ = 0x00000000;
633 /* SP_FS_MRT_REG_2 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_3 */
636 *cmds++ = 0x00000000;
637
638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
639 *cmds++ = CP_REG(A3XX_VPC_ATTR);
640 /* VPC_ATTR */
641 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
642 _SET(VPC_VPCATTR_LMSIZE, 1);
643 /* VPC_PACK */
644 *cmds++ = 0x00000000;
645 /* VPC_VARRYING_INTERUPT_MODE_0 */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_1 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_2 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_3 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARYING_PS_REPL_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_3 */
660 *cmds++ = 0x00000000;
661
662 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
663 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
664 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
665 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
666 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
667 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
668 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
669
670 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700671 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700672 /* end; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700673 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700674 /* nop; */
675 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700679
680 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
681 *cmds++ = 0x00000000;
682
683 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
684 *cmds++ = 0x00000000;
685
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700686 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
687 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
688 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
689 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
690 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
691 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
692 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
693
694 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700695 *cmds++ = 0x00000000; *cmds++ = 0x30201b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700696 /* end; */
697 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
698 /* nop; */
699 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
700 /* nop; */
701 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
702
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700703
704
705 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
706 *cmds++ = 0x00000000;
707
708 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
709 *cmds++ = 0x00000000;
710
711
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
713 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
714 /* RB_MSAA_CONTROL */
715 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
716 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
719 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
720 /* RB_DEPTH_CONTROL */
721 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
722
723 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700724 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
725 /* RB_STENCIL_CONTROL */
726 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
727 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
728 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
729 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
730 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
731 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
732 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
733 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
734
735 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
736 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
737 /* GRAS_SU_MODE_CONTROL */
738 *cmds++ = 0x00000000;
739
740 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700741 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
742 /* RB_MRT_CONTROL0 */
743 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
744 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
745 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
746 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
747
748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
749 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
750 /* RB_MRT_BLEND_CONTROL0 */
751 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
752 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
753 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
755 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
756 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
757 /* RB_MRT_CONTROL1 */
758 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
759 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
760 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
761
762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
763 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
764 /* RB_MRT_BLEND_CONTROL1 */
765 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
766 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
767 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
769 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
770 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
771 /* RB_MRT_CONTROL2 */
772 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
773 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
774 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
775
776 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
777 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
778 /* RB_MRT_BLEND_CONTROL2 */
779 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
780 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
781 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
782 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
783 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
784 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
785 /* RB_MRT_CONTROL3 */
786 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
787 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
788 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
789
790 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
791 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
792 /* RB_MRT_BLEND_CONTROL3 */
793 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
794 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
795 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
796 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
797 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
798 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
799
800 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
801 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
802 /* VFD_INDEX_MIN */
803 *cmds++ = 0x00000000;
804 /* VFD_INDEX_MAX */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700805 *cmds++ = 0x155;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700806 /* VFD_INSTANCEID_OFFSET */
807 *cmds++ = 0x00000000;
808 /* VFD_INDEX_OFFSET */
809 *cmds++ = 0x00000000;
810
811 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
812 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
813 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700814 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700815 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
816
817 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
818 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
819 /* TPL1_TP_VS_TEX_OFFSET */
820 *cmds++ = 0;
821
822 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
823 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
824 /* TPL1_TP_FS_TEX_OFFSET */
825 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
826 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
827 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
828
829 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
830 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
831 /* PC_PRIM_VTX_CNTL */
832 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
833 PC_DRAW_TRIANGLES) |
834 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
835 PC_DRAW_TRIANGLES) |
836 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
837
838 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
839 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
840 /* GRAS_SC_WINDOW_SCISSOR_TL */
841 *cmds++ = 0x00000000;
842 /* GRAS_SC_WINDOW_SCISSOR_BR */
843 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
844 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
845
846 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
847 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
848 /* GRAS_SC_SCREEN_SCISSOR_TL */
849 *cmds++ = 0x00000000;
850 /* GRAS_SC_SCREEN_SCISSOR_BR */
851 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
852 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
853
854 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
855 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
856 /* GRAS_CL_VPORT_XOFFSET */
857 *cmds++ = 0x00000000;
858 /* GRAS_CL_VPORT_XSCALE */
859 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
860 /* GRAS_CL_VPORT_YOFFSET */
861 *cmds++ = 0x00000000;
862 /* GRAS_CL_VPORT_YSCALE */
863 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
864
865 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
866 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
867 /* GRAS_CL_VPORT_ZOFFSET */
868 *cmds++ = 0x00000000;
869 /* GRAS_CL_VPORT_ZSCALE */
870 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
871
872 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
873 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
874 /* GRAS_CL_CLIP_CNTL */
875 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
876 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
877 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
878 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
879 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
880
881 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
882 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
883 /* GRAS_CL_GB_CLIP_ADJ */
884 *cmds++ = 0x00000000;
885
886 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
887 *cmds++ = 0x00000000;
888
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700889
890 /* oxili_generate_context_roll_packets */
891 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
892 *cmds++ = 0x00000400;
893
894 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
895 *cmds++ = 0x00000400;
896
897 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
898 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
899
900 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
901 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
902
903 /* Clear cache invalidate bit when re-loading the shader control regs */
904 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
905 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
906 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
907 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
908 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
909 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
910 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
911
912 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
913 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
914 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
915 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
916 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
917 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
918 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
919 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
920
921 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
922 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
923
924 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
925 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
926
927 /* end oxili_generate_context_roll_packets */
928
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700929 /*
930 * Resolve using two draw calls with a dummy register
931 * write in between. This is a HLM workaround
932 * that should be removed later.
933 */
934 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
935 *cmds++ = 0x00000000; /* Viz query info */
936 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
937 PC_DI_SRC_SEL_IMMEDIATE,
938 PC_DI_INDEX_SIZE_32_BIT,
939 PC_DI_IGNORE_VISIBILITY);
940 *cmds++ = 0x00000003; /* Num indices */
941 *cmds++ = 0x00000000; /* Index 0 */
942 *cmds++ = 0x00000001; /* Index 1 */
943 *cmds++ = 0x00000002; /* Index 2 */
944
945 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
946 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
947 *cmds++ = 0x00000000;
948
949 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
950 *cmds++ = 0x00000000; /* Viz query info */
951 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
952 PC_DI_SRC_SEL_IMMEDIATE,
953 PC_DI_INDEX_SIZE_32_BIT,
954 PC_DI_IGNORE_VISIBILITY);
955 *cmds++ = 0x00000003; /* Num indices */
956 *cmds++ = 0x00000002; /* Index 0 */
957 *cmds++ = 0x00000001; /* Index 1 */
958 *cmds++ = 0x00000003; /* Index 2 */
959
960 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
961 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
962 *cmds++ = 0x00000000;
963
964 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
965 *cmds++ = 0x00000000;
966
967 /* Create indirect buffer command for above command sequence */
968 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
969
970 return cmds;
971}
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700972static void build_shader_save_cmds(struct adreno_device *adreno_dev,
973 struct adreno_context *drawctxt)
974{
975 unsigned int *cmd = tmp_ctx.cmd;
976 unsigned int *start;
977
978 /* Reserve space for boolean values used for COND_EXEC packet */
979 drawctxt->cond_execs[0].hostptr = cmd;
980 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
981 *cmd++ = 0;
982 drawctxt->cond_execs[1].hostptr = cmd;
983 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
984 *cmd++ = 0;
985
986 drawctxt->shader_save_commands[0].hostptr = cmd;
987 drawctxt->shader_save_commands[0].gpuaddr =
988 virt2gpu(cmd, &drawctxt->gpustate);
989 *cmd++ = 0;
990 drawctxt->shader_save_commands[1].hostptr = cmd;
991 drawctxt->shader_save_commands[1].gpuaddr =
992 virt2gpu(cmd, &drawctxt->gpustate);
993 *cmd++ = 0;
994
995 start = cmd;
996
997 /* Save vertex shader */
998
999 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1000 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1001 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1002 *cmd++ = 0x0000FFFF;
1003 *cmd++ = 3; /* EXEC_COUNT */
1004
1005 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1006 drawctxt->shader_save_commands[2].hostptr = cmd;
1007 drawctxt->shader_save_commands[2].gpuaddr =
1008 virt2gpu(cmd, &drawctxt->gpustate);
1009 /*
1010 From fixup:
1011
1012 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
1013
1014 From regspec:
1015 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1016 If bit31 is 1, it means overflow
1017 or any long shader.
1018
1019 src = (HLSQ_SHADOW_BASE + 0x1000)/4
1020 */
1021 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1022 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1023
1024 /* Save fragment shader */
1025 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1026 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1027 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1028 *cmd++ = 0x0000FFFF;
1029 *cmd++ = 3; /* EXEC_COUNT */
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 drawctxt->shader_save_commands[3].hostptr = cmd;
1033 drawctxt->shader_save_commands[3].gpuaddr =
1034 virt2gpu(cmd, &drawctxt->gpustate);
1035 /*
1036 From fixup:
1037
1038 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
1039
1040 From regspec:
1041 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
1042 If bit31 is 1, it means overflow
1043 or any long shader.
1044
1045 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
1046 From regspec:
1047
1048 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
1049 First instruction of the whole shader will be stored from
1050 the offset in instruction cache, unit = 256bits, a cache line.
1051 It can start from 0 if no VS available.
1052
1053 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
1054 */
1055 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1056 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
1057 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
1058
1059 /* Create indirect buffer command for above command sequence */
1060 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
1061
1062 tmp_ctx.cmd = cmd;
1063}
1064
1065/*
1066 * Make an IB to modify context save IBs with the correct shader instruction
1067 * and constant sizes and offsets.
1068 */
1069
1070static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1071 struct adreno_context *drawctxt)
1072{
1073 unsigned int *cmd = tmp_ctx.cmd;
1074 unsigned int *start = cmd;
1075
1076 /* Flush HLSQ lazy updates */
1077 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1078 *cmd++ = 0x7; /* HLSQ_FLUSH */
1079 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1080 *cmd++ = 0;
1081
1082 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1083 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1084 *cmd++ = (unsigned int)
1085 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1086 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1087 0; /* No end addr for full invalidate */
1088
1089 /* Make sure registers are flushed */
1090 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1091 *cmd++ = 0;
1092
1093#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1094
1095 /* Save shader sizes */
1096 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1097 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1098 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1099
1100 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1101 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1102 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1103
1104 /* Save shader offsets */
1105 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1106 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1107 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1108
1109 /* Save constant sizes */
1110 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1111 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1112 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1113 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1114 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1115 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1116
1117 /* Save FS constant offset */
1118 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1119 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1120 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1121
1122
1123 /* Save VS instruction store mode */
1124 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1125 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1126 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1127
1128 /* Save FS instruction store mode */
1129 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1130 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1131 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1132#else
1133
1134 /* Shader save */
1135 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1136 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1137 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1138 drawctxt->shader_save_commands[2].gpuaddr);
1139
1140 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1141 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1142 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1143 *cmd++ = 0x00000000; /* AND value */
1144 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1145 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1146 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1147 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1148 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1149 A3XX_CP_SCRATCH_REG2;
1150 *cmd++ = 0x7f000000; /* AND value */
1151 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1152
1153 /*
1154 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1155 * SP_FS_OBJ_OFFSET_REG
1156 */
1157
1158 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1159 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1160 *cmd++ = 0x00000000; /* AND value */
1161 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1162 /*
1163 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1164 * 0x00000000
1165 */
1166 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1167 *cmd++ = A3XX_CP_SCRATCH_REG3;
1168 *cmd++ = 0xfe000000; /* AND value */
1169 *cmd++ = 0x00000000; /* OR value */
1170 /*
1171 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1172 */
1173 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1174 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1175 *cmd++ = 0xffffffff; /* AND value */
1176 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1177
1178 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1179 *cmd++ = A3XX_CP_SCRATCH_REG2;
1180 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1181
1182 /* Constant save */
1183 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001184 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1185 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001186 drawctxt->constant_save_commands[1].gpuaddr);
1187
1188 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001189 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1190 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001191 drawctxt->constant_save_commands[2].gpuaddr);
1192
1193 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1194 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1195 drawctxt->constant_save_commands[2].gpuaddr
1196 + sizeof(unsigned int));
1197
1198 /* Modify constant save conditionals */
1199 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1200 0, 0, drawctxt->cond_execs[2].gpuaddr);
1201
1202 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1203 0, 0, drawctxt->cond_execs[3].gpuaddr);
1204
1205 /* Save VS instruction store mode */
1206
1207 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1208 31, 0, drawctxt->cond_execs[0].gpuaddr);
1209
1210 /* Save FS instruction store mode */
1211 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1212 31, 0, drawctxt->cond_execs[1].gpuaddr);
1213
1214#endif
1215
1216 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1217
1218 tmp_ctx.cmd = cmd;
1219}
1220
1221/****************************************************************************/
1222/* Functions to build context restore IBs */
1223/****************************************************************************/
1224
1225static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1226 struct adreno_context *drawctxt,
1227 struct gmem_shadow_t *shadow)
1228{
1229 unsigned int *cmds = tmp_ctx.cmd;
1230 unsigned int *start = cmds;
1231
Jordan Crousefb3012f2012-06-22 13:11:05 -06001232 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
1233 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
1234
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001235 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1236 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1237 /* HLSQ_CONTROL_0_REG */
1238 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001239 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001240 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1241 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001242 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001243 /* HLSQ_CONTROL_1_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001244 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
1245 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001246 /* HLSQ_CONTROL_2_REG */
1247 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1248 /* HLSQ_CONTROL3_REG */
1249 *cmds++ = 0x00000000;
1250
1251 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1252 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1253 /* RB_MRT_BUF_INFO0 */
1254 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1255 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1256 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1257 (shadow->gmem_pitch * 4 * 8) / 256);
1258 /* RB_MRT_BUF_BASE0 */
1259 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1260
1261 /* Texture samplers */
1262 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1263 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1264 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1265 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1266 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1267 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1268 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1269 *cmds++ = 0x00000240;
1270 *cmds++ = 0x00000000;
1271
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001272 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1273 *cmds++ = 0x00000000;
1274
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001275 /* Texture memobjs */
1276 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1277 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1278 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1279 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1280 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1281 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1282 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1283 *cmds++ = 0x4cc06880;
1284 *cmds++ = shadow->height | (shadow->width << 14);
1285 *cmds++ = (shadow->pitch*4*8) << 9;
1286 *cmds++ = 0x00000000;
1287
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001288 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1289 *cmds++ = 0x00000000;
1290
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001291 /* Mipmap bases */
1292 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1293 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1294 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1295 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1296 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1297 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1298 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1299 *cmds++ = shadow->gmemshadow.gpuaddr;
1300 *cmds++ = 0x00000000;
1301 *cmds++ = 0x00000000;
1302 *cmds++ = 0x00000000;
1303 *cmds++ = 0x00000000;
1304 *cmds++ = 0x00000000;
1305 *cmds++ = 0x00000000;
1306 *cmds++ = 0x00000000;
1307 *cmds++ = 0x00000000;
1308 *cmds++ = 0x00000000;
1309 *cmds++ = 0x00000000;
1310 *cmds++ = 0x00000000;
1311 *cmds++ = 0x00000000;
1312 *cmds++ = 0x00000000;
1313
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001314 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1315 *cmds++ = 0x00000000;
1316
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001317 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1318 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1319 /* HLSQ_VS_CONTROL_REG */
1320 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1321 /* HLSQ_FS_CONTROL_REG */
1322 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1323 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1324 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1325 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1326 *cmds++ = 0x00000000;
1327 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1328 *cmds++ = 0x00000000;
1329
1330 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1331 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1332 /* SP_FS_LENGTH_REG */
1333 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1334
1335 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1336 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1337 /* SP_VS_CTRL_REG0 */
1338 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1339 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1340 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1341 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1342 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1343 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1344 /* SP_VS_CTRL_REG1 */
1345 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1346 /* SP_VS_PARAM_REG */
1347 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1348 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1349 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1350 /* SP_VS_OUT_REG0 */
1351 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1352 /* SP_VS_OUT_REG1 */
1353 *cmds++ = 0x00000000;
1354 /* SP_VS_OUT_REG2 */
1355 *cmds++ = 0x00000000;
1356 /* SP_VS_OUT_REG3 */
1357 *cmds++ = 0x00000000;
1358 /* SP_VS_OUT_REG4 */
1359 *cmds++ = 0x00000000;
1360 /* SP_VS_OUT_REG5 */
1361 *cmds++ = 0x00000000;
1362 /* SP_VS_OUT_REG6 */
1363 *cmds++ = 0x00000000;
1364 /* SP_VS_OUT_REG7 */
1365 *cmds++ = 0x00000000;
1366
1367 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1368 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1369 /* SP_VS_VPC_DST_REG0 */
1370 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1371 /* SP_VS_VPC_DST_REG1 */
1372 *cmds++ = 0x00000000;
1373 /* SP_VS_VPC_DST_REG2 */
1374 *cmds++ = 0x00000000;
1375 /* SP_VS_VPC_DST_REG3 */
1376 *cmds++ = 0x00000000;
1377 /* SP_VS_OBJ_OFFSET_REG */
1378 *cmds++ = 0x00000000;
1379 /* SP_VS_OBJ_START_REG */
1380 *cmds++ = 0x00000000;
1381
1382 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1383 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1384 /* SP_VS_LENGTH_REG */
1385 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1386 /* SP_FS_CTRL_REG0 */
1387 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1388 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1389 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001390 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1391 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001392 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1393 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001394 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001395 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1396 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1397 /* SP_FS_CTRL_REG1 */
1398 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1399 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1400 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1401 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001402 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001403 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 126);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001404 /* SP_FS_OBJ_START_REG */
1405 *cmds++ = 0x00000000;
1406
1407 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1408 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1409 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1410 *cmds++ = 0x00000000;
1411 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1412 *cmds++ = 0x00000000;
1413
1414 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1415 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1416 /* SP_FS_OUT_REG */
1417 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1418
Jordan Crousea7ec4212012-02-04 10:23:52 -07001419 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001420 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1421 /* SP_FS_MRT_REG0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001422 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001423 /* SP_FS_MRT_REG1 */
1424 *cmds++ = 0;
1425 /* SP_FS_MRT_REG2 */
1426 *cmds++ = 0;
1427 /* SP_FS_MRT_REG3 */
1428 *cmds++ = 0;
1429
1430 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1431 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1432 /* VPC_ATTR */
1433 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1434 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1435 _SET(VPC_VPCATTR_LMSIZE, 1);
1436 /* VPC_PACK */
1437 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1438 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1439 /* VPC_VARYING_INTERP_MODE_0 */
1440 *cmds++ = 0x00000000;
1441 /* VPC_VARYING_INTERP_MODE1 */
1442 *cmds++ = 0x00000000;
1443 /* VPC_VARYING_INTERP_MODE2 */
1444 *cmds++ = 0x00000000;
1445 /* VPC_VARYING_IINTERP_MODE3 */
1446 *cmds++ = 0x00000000;
1447 /* VPC_VARRYING_PS_REPL_MODE_0 */
1448 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1449 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1450 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1451 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1452 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1453 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1454 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1455 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1456 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1457 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1458 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1459 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1460 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1461 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1462 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1463 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1464 /* VPC_VARRYING_PS_REPL_MODE_1 */
1465 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1466 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1467 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1468 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1469 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1470 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1471 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1472 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1473 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1474 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1475 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1476 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1477 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1478 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1479 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1480 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1481 /* VPC_VARRYING_PS_REPL_MODE_2 */
1482 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1483 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1484 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1485 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1486 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1487 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1488 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1489 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1490 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1491 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1492 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1493 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1494 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1495 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1496 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1497 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1498 /* VPC_VARRYING_PS_REPL_MODE_3 */
1499 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1500 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1501 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1502 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1503 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1504 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1505 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1506 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1507 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1508 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1509 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1510 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1511 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1512 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1513 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1514 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1515
Jordan Crousea7ec4212012-02-04 10:23:52 -07001516 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001517 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1518 /* SP_SP_CTRL_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001519 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
1520 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001521
1522 /* Load vertex shader */
1523 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1524 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1525 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1526 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1527 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1528 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1529 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1530 /* (sy)end; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001531 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001532 /* nop; */
1533 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1534 /* nop; */
1535 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1536 /* nop; */
1537 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1538
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001539 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1540 *cmds++ = 0x00000000;
1541
1542 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1543 *cmds++ = 0x00000000;
1544
1545
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001546 /* Load fragment shader */
1547 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1548 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1549 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1550 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1551 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1552 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1553 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1554 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001555 *cmds++ = 0x00002000; *cmds++ = 0x57309902;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001556 /* (rpt5)nop; */
1557 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1558 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1559 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1560 /* (sy)mov.f32f32 r1.x, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001561 *cmds++ = 0x00000000; *cmds++ = 0x30040b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001562 /* mov.f32f32 r1.y, r0.y; */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001563 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001564 /* mov.f32f32 r1.z, r0.z; */
1565 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1566 /* mov.f32f32 r1.w, r0.w; */
1567 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1568 /* end; */
1569 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1570
1571 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1572 *cmds++ = 0x00000000;
1573
1574 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1575 *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001576
1577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1578 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1579 /* VFD_CONTROL_0 */
1580 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1581 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1582 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1583 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1584 /* VFD_CONTROL_1 */
1585 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1586 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1587 _SET(VFD_CTRLREG1_REGID4INST, 252);
1588
1589 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1590 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1591 /* VFD_FETCH_INSTR_0_0 */
1592 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1593 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1594 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1595 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1596 /* VFD_FETCH_INSTR_1_0 */
1597 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1598 shadow->quad_vertices_restore.gpuaddr);
1599 /* VFD_FETCH_INSTR_0_1 */
1600 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1601 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1602 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1603 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1604 /* VFD_FETCH_INSTR_1_1 */
1605 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1606 shadow->quad_vertices_restore.gpuaddr + 16);
1607
1608 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1609 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1610 /* VFD_DECODE_INSTR_0 */
1611 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1612 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1613 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1614 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1615 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1616 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1617 /* VFD_DECODE_INSTR_1 */
1618 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1619 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1620 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1621 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1622 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1623 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1624
1625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1626 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1627 /* RB_DEPTH_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001628 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_LESS);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001629
1630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1631 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1632 /* RB_STENCIL_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001633 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001634 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1635 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1636 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001637 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001638 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1639 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1640 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1641
1642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1643 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1644 /* RB_MODE_CONTROL */
1645 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1646 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1647
1648 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1649 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1650 /* RB_RENDER_CONTROL */
1651 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1652 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1653
1654 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1655 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1656 /* RB_MSAA_CONTROL */
1657 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1658 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1659
1660 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1661 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1662 /* RB_MRT_CONTROL0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001663 *cmds++ = _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1664 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001665 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1666
1667 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1668 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1669 /* RB_MRT_BLENDCONTROL0 */
1670 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1671 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1672 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1673 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1674 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1675 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1676 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1677 /* RB_MRT_CONTROL1 */
1678 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001679 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1680 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001681 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1682
1683 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1684 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1685 /* RB_MRT_BLENDCONTROL1 */
1686 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1687 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1688 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1689 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1690 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1691 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1692 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1693 /* RB_MRT_CONTROL2 */
1694 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001695 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1696 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001697 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1698
1699 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1700 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1701 /* RB_MRT_BLENDCONTROL2 */
1702 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1703 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1704 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1705 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1706 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1707 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1708 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1709 /* RB_MRT_CONTROL3 */
1710 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001711 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1712 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001713 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1714
1715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1716 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1717 /* RB_MRT_BLENDCONTROL3 */
1718 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1719 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1720 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1721 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1722 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1723 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1724 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1725
1726 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1727 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1728 /* VFD_INDEX_MIN */
1729 *cmds++ = 0x00000000;
1730 /* VFD_INDEX_MAX */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001731 *cmds++ = 340;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001732 /* VFD_INDEX_OFFSET */
1733 *cmds++ = 0x00000000;
1734 /* TPL1_TP_VS_TEX_OFFSET */
1735 *cmds++ = 0x00000000;
1736
1737 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1738 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1739 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001740 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001741 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1742
1743 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1744 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1745 /* TPL1_TP_VS_TEX_OFFSET */
1746 *cmds++ = 0x00000000;
1747
1748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1749 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1750 /* TPL1_TP_FS_TEX_OFFSET */
1751 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1752 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1753 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1754
1755 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1756 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1757 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001758 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1759 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1760 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001761
1762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1763 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1764 /* GRAS_SU_MODE_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001765 *cmds++ = _SET(GRAS_SU_CTRLMODE_LINEHALFWIDTH, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001766
1767 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1768 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1769 /* GRAS_SC_WINDOW_SCISSOR_TL */
1770 *cmds++ = 0x00000000;
1771 /* GRAS_SC_WINDOW_SCISSOR_BR */
1772 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1773 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1774
1775 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1776 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1777 /* GRAS_SC_SCREEN_SCISSOR_TL */
1778 *cmds++ = 0x00000000;
1779 /* GRAS_SC_SCREEN_SCISSOR_BR */
1780 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1781 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1782
1783 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1784 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1785 /* GRAS_CL_VPORT_XOFFSET */
1786 *cmds++ = 0x00000000;
1787 /* GRAS_CL_VPORT_XSCALE */
1788 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1789 /* GRAS_CL_VPORT_YOFFSET */
1790 *cmds++ = 0x00000000;
1791 /* GRAS_CL_VPORT_YSCALE */
1792 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1793
1794 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1795 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1796 /* GRAS_CL_VPORT_ZOFFSET */
1797 *cmds++ = 0x00000000;
1798 /* GRAS_CL_VPORT_ZSCALE */
1799 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1800
1801 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1802 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1803 /* GRAS_CL_CLIP_CNTL */
1804 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1805
1806 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1807 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1808 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1809 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1810
1811 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1812 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1813 /* PC_PRIM_VTX_CONTROL */
1814 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1815 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1816 PC_DRAW_TRIANGLES) |
1817 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1818 PC_DRAW_TRIANGLES) |
1819 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1820
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001821
1822 /* oxili_generate_context_roll_packets */
1823 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1824 *cmds++ = 0x00000400;
1825
1826 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1827 *cmds++ = 0x00000400;
1828
1829 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1830 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
1831
1832 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1833 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
1834
1835 /* Clear cache invalidate bit when re-loading the shader control regs */
1836 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1837 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1838 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1839 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1840 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1841 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1842
1843 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1844 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1845 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1846 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1847 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
1848 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1849 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1850 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
1851 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1852
1853 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1854 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
1855
1856 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1857 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
1858
1859 /* end oxili_generate_context_roll_packets */
1860
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001861 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1862 *cmds++ = 0x00000000; /* Viz query info */
1863 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1864 PC_DI_SRC_SEL_AUTO_INDEX,
1865 PC_DI_INDEX_SIZE_16_BIT,
1866 PC_DI_IGNORE_VISIBILITY);
1867 *cmds++ = 0x00000002; /* Num indices */
1868
1869 /* Create indirect buffer command for above command sequence */
1870 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1871
1872 return cmds;
1873}
1874
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001875
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001876static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1877 struct adreno_context *drawctxt)
1878{
1879 unsigned int *start = tmp_ctx.cmd;
1880 unsigned int *cmd = start;
1881 unsigned int *lcc_start;
1882
1883 int i;
1884
1885 /* Flush HLSQ lazy updates */
1886 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1887 *cmd++ = 0x7; /* HLSQ_FLUSH */
1888 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1889 *cmd++ = 0;
1890
1891 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1892 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1893 *cmd++ = (unsigned int)
1894 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1895 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1896 0; /* No end addr for full invalidate */
1897
1898 lcc_start = cmd;
1899
1900 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1901 cmd++;
1902
1903#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1904 /* Force mismatch */
1905 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1906#else
1907 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1908#endif
1909
1910 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1911 cmd = reg_range(cmd, context_register_ranges[i * 2],
1912 context_register_ranges[i * 2 + 1]);
1913 }
1914
1915 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1916 (cmd - lcc_start) - 1);
1917
1918#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1919 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1920#else
1921 lcc_start[2] |= (1 << 24) | (4 << 16);
1922#endif
1923
1924 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1925 *cmd++ = cp_type0_packet(global_registers[i], 1);
1926 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1927 *cmd++ = 0x00000000;
1928 }
1929
1930 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1931 tmp_ctx.cmd = cmd;
1932}
1933
1934static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1935 struct adreno_context *drawctxt)
1936{
1937 unsigned int *cmd = tmp_ctx.cmd;
1938 unsigned int *start = cmd;
1939 unsigned int mode = 4; /* Indirect mode */
1940 unsigned int stateblock;
1941 unsigned int numunits;
1942 unsigned int statetype;
1943
1944 drawctxt->cond_execs[2].hostptr = cmd;
1945 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1946 *cmd++ = 0;
1947 drawctxt->cond_execs[3].hostptr = cmd;
1948 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1949 *cmd++ = 0;
1950
1951#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1952 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1953 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1954 *cmd++ = 4 << 16;
1955 *cmd++ = 0x0;
1956#endif
1957 /* HLSQ full update */
1958 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1959 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1960 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1961
1962#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1963 /* Re-enable shadowing */
1964 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1965 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1966 *cmd++ = (4 << 16) | (1 << 24);
1967 *cmd++ = 0x0;
1968#endif
1969
1970 /* Load vertex shader constants */
1971 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1972 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1973 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1974 *cmd++ = 0x0000ffff;
1975 *cmd++ = 3; /* EXEC_COUNT */
1976 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1977 drawctxt->constant_load_commands[0].hostptr = cmd;
1978 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1979 &drawctxt->gpustate);
1980
1981 /*
1982 From fixup:
1983
1984 mode = 4 (indirect)
1985 stateblock = 4 (Vertex constants)
1986 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1987
1988 From register spec:
1989 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1990
1991 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1992 */
1993
1994 *cmd++ = 0; /* ord1 */
1995 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1996
1997 /* Load fragment shader constants */
1998 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1999 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2000 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2001 *cmd++ = 0x0000ffff;
2002 *cmd++ = 3; /* EXEC_COUNT */
2003 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2004 drawctxt->constant_load_commands[1].hostptr = cmd;
2005 drawctxt->constant_load_commands[1].gpuaddr =
2006 virt2gpu(cmd, &drawctxt->gpustate);
2007 /*
2008 From fixup:
2009
2010 mode = 4 (indirect)
2011 stateblock = 6 (Fragment constants)
2012 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
2013
2014 From register spec:
2015 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
2016
2017 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
2018 */
2019
2020 *cmd++ = 0; /* ord1 */
2021 drawctxt->constant_load_commands[2].hostptr = cmd;
2022 drawctxt->constant_load_commands[2].gpuaddr =
2023 virt2gpu(cmd, &drawctxt->gpustate);
2024 /*
2025 From fixup:
2026 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
2027 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
2028
2029 From register spec:
2030 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
2031 start offset in on chip RAM,
2032 128bit aligned
2033
2034 ord2 = base + offset | 1
2035 Because of the base alignment we can use
2036 ord2 = base | offset | 1
2037 */
2038 *cmd++ = 0; /* ord2 */
2039
2040 /* Restore VS texture memory objects */
2041 stateblock = 0;
2042 statetype = 1;
2043 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2044
2045 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2046 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2047 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
2048 & 0xfffffffc) | statetype;
2049
2050 /* Restore VS texture mipmap addresses */
2051 stateblock = 1;
2052 statetype = 1;
2053 numunits = TEX_SIZE_MIPMAP / 4;
2054 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2055 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2056 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
2057 & 0xfffffffc) | statetype;
2058
2059 /* Restore VS texture sampler objects */
2060 stateblock = 0;
2061 statetype = 0;
2062 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2063 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2064 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2065 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
2066 & 0xfffffffc) | statetype;
2067
2068 /* Restore FS texture memory objects */
2069 stateblock = 2;
2070 statetype = 1;
2071 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2072 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2073 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2074 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
2075 & 0xfffffffc) | statetype;
2076
2077 /* Restore FS texture mipmap addresses */
2078 stateblock = 3;
2079 statetype = 1;
2080 numunits = TEX_SIZE_MIPMAP / 4;
2081 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2082 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2083 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
2084 & 0xfffffffc) | statetype;
2085
2086 /* Restore FS texture sampler objects */
2087 stateblock = 2;
2088 statetype = 0;
2089 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2090 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2091 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2092 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
2093 & 0xfffffffc) | statetype;
2094
2095 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
2096 tmp_ctx.cmd = cmd;
2097}
2098
2099static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
2100 struct adreno_context *drawctxt)
2101{
2102 unsigned int *cmd = tmp_ctx.cmd;
2103 unsigned int *start = cmd;
2104
2105 /* Vertex shader */
2106 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2107 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2108 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2109 *cmd++ = 1;
2110 *cmd++ = 3; /* EXEC_COUNT */
2111
2112 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2113 drawctxt->shader_load_commands[0].hostptr = cmd;
2114 drawctxt->shader_load_commands[0].gpuaddr =
2115 virt2gpu(cmd, &drawctxt->gpustate);
2116 /*
2117 From fixup:
2118
2119 mode = 4 (indirect)
2120 stateblock = 4 (Vertex shader)
2121 numunits = SP_VS_CTRL_REG0.VS_LENGTH
2122
2123 From regspec:
2124 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
2125 If bit31 is 1, it means overflow
2126 or any long shader.
2127
2128 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2129 */
2130 *cmd++ = 0; /*ord1 */
2131 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
2132
2133 /* Fragment shader */
2134 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2135 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2136 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2137 *cmd++ = 1;
2138 *cmd++ = 3; /* EXEC_COUNT */
2139
2140 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2141 drawctxt->shader_load_commands[1].hostptr = cmd;
2142 drawctxt->shader_load_commands[1].gpuaddr =
2143 virt2gpu(cmd, &drawctxt->gpustate);
2144 /*
2145 From fixup:
2146
2147 mode = 4 (indirect)
2148 stateblock = 6 (Fragment shader)
2149 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2150
2151 From regspec:
2152 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2153 If bit31 is 1, it means overflow
2154 or any long shader.
2155
2156 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2157 */
2158 *cmd++ = 0; /*ord1 */
2159 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2160 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2161
2162 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2163 tmp_ctx.cmd = cmd;
2164}
2165
2166static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2167 struct adreno_context *drawctxt)
2168{
2169 unsigned int *cmd = tmp_ctx.cmd;
2170 unsigned int *start = cmd;
2171
2172 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2173 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2174 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2175 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2176 = virt2gpu(cmd, &drawctxt->gpustate);
2177 *cmd++ = 0;
2178
2179 /* Create indirect buffer command for above command sequence */
2180 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2181
2182 tmp_ctx.cmd = cmd;
2183}
2184
2185/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2186static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2187 struct adreno_context *drawctxt)
2188{
2189 unsigned int *cmd = tmp_ctx.cmd;
2190 unsigned int *start = cmd;
2191
2192#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2193 /* Save shader sizes */
2194 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2195 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2196 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2197
2198 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2199 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2200 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2201
2202 /* Save constant sizes */
2203 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2204 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2205 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2206
2207 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2208 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2209 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2210
2211 /* Save constant offsets */
2212 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2213 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2214 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2215#else
2216 /* Save shader sizes */
2217 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2218 30, (4 << 19) | (4 << 16),
2219 drawctxt->shader_load_commands[0].gpuaddr);
2220
2221 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2222 30, (6 << 19) | (4 << 16),
2223 drawctxt->shader_load_commands[1].gpuaddr);
2224
2225 /* Save constant sizes */
2226 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2227 23, (4 << 19) | (4 << 16),
2228 drawctxt->constant_load_commands[0].gpuaddr);
2229
2230 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2231 23, (6 << 19) | (4 << 16),
2232 drawctxt->constant_load_commands[1].gpuaddr);
2233
2234 /* Modify constant restore conditionals */
2235 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2236 0, 0, drawctxt->cond_execs[2].gpuaddr);
2237
2238 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2239 0, 0, drawctxt->cond_execs[3].gpuaddr);
2240
2241 /* Save fragment constant shadow offset */
2242 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2243 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2244 drawctxt->constant_load_commands[2].gpuaddr);
2245#endif
2246
2247 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2248 discard all the shader data */
2249
2250 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2251 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2252
2253 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2254
2255 tmp_ctx.cmd = cmd;
2256}
2257
2258static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2259 struct adreno_context *drawctxt)
2260{
2261 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2262
2263 build_regrestore_cmds(adreno_dev, drawctxt);
2264 build_constantrestore_cmds(adreno_dev, drawctxt);
2265 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2266 build_regconstantsave_cmds(adreno_dev, drawctxt);
2267 build_shader_save_cmds(adreno_dev, drawctxt);
2268 build_shader_restore_cmds(adreno_dev, drawctxt);
2269 build_restore_fixup_cmds(adreno_dev, drawctxt);
2270 build_save_fixup_cmds(adreno_dev, drawctxt);
2271
2272 return 0;
2273}
2274
2275/* create buffers for saving/restoring registers, constants, & GMEM */
2276static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2277 struct adreno_context *drawctxt)
2278{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002279 int result;
2280
Jordan Crouse7501d452012-04-19 08:58:44 -06002281 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2282 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002283
Jordan Crousea7ec4212012-02-04 10:23:52 -07002284 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2285 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002286
Jordan Crousea7ec4212012-02-04 10:23:52 -07002287 if (result)
2288 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002289
2290 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2291 &tmp_ctx.cmd);
2292
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002293 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2294 &drawctxt->context_gmem_shadow);
2295 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2296 &drawctxt->context_gmem_shadow);
2297
2298 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2299 KGSL_CACHE_OP_FLUSH);
2300
Jordan Crousea7ec4212012-02-04 10:23:52 -07002301 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2302
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002303 return 0;
2304}
2305
2306static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2307 struct adreno_context *drawctxt)
2308{
2309 int ret;
2310
2311 /*
2312 * Allocate memory for the GPU state and the context commands.
2313 * Despite the name, this is much more then just storage for
2314 * the gpustate. This contains command space for gmem save
2315 * and texture and vertex buffer storage too
2316 */
2317
2318 ret = kgsl_allocate(&drawctxt->gpustate,
2319 drawctxt->pagetable, CONTEXT_SIZE);
2320
2321 if (ret)
2322 return ret;
2323
2324 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2325 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2326
2327 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2328 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2329 if (ret)
2330 goto done;
2331
2332 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2333 }
2334
2335 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2336 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2337
2338done:
2339 if (ret)
2340 kgsl_sharedmem_free(&drawctxt->gpustate);
2341
2342 return ret;
2343}
2344
2345static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2346 struct adreno_context *context)
2347{
2348 struct kgsl_device *device = &adreno_dev->dev;
2349
Shubhraprakash Dasfc2c9042012-08-15 04:11:55 -07002350 if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTOYED))
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002351 return;
2352
2353 if (context->flags & CTXT_FLAGS_GPU_HANG)
2354 KGSL_CTXT_WARN(device,
2355 "Current active context has caused gpu hang\n");
2356
2357 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2358 /* Fixup self modifying IBs for save operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002359 adreno_ringbuffer_issuecmds(device, context,
2360 KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002361
2362 /* save registers and constants. */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002363 adreno_ringbuffer_issuecmds(device, context,
2364 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002365 context->regconstant_save, 3);
2366
2367 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2368 /* Save shader instructions */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002369 adreno_ringbuffer_issuecmds(device, context,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002370 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2371
2372 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2373 }
2374 }
2375
2376 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2377 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2378 /*
2379 * Save GMEM (note: changes shader. shader must
2380 * already be saved.)
2381 */
2382
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002383 adreno_ringbuffer_issuecmds(device, context,
2384 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002385 context->context_gmem_shadow.
2386 gmem_save, 3);
2387 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2388 }
2389}
2390
2391static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2392 struct adreno_context *context)
2393{
2394 struct kgsl_device *device = &adreno_dev->dev;
2395 unsigned int cmds[5];
2396
2397 if (context == NULL) {
2398 /* No context - set the default pagetable and thats it */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002399 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
2400 adreno_dev->drawctxt_active->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002401 return;
2402 }
2403
2404 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2405
2406 cmds[0] = cp_nop_packet(1);
2407 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2408 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2409 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002410 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2411 cmds[4] = context->id;
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002412 adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
2413 cmds, 5);
2414 kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002415
2416 /*
2417 * Restore GMEM. (note: changes shader.
2418 * Shader must not already be restored.)
2419 */
2420
2421 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002422 adreno_ringbuffer_issuecmds(device, context,
2423 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002424 context->context_gmem_shadow.
2425 gmem_restore, 3);
2426 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2427 }
2428
2429 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002430 adreno_ringbuffer_issuecmds(device, context,
2431 KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002432
2433 /* Fixup self modifying IBs for restore operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002434 adreno_ringbuffer_issuecmds(device, context,
2435 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002436 context->restore_fixup, 3);
2437
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002438 adreno_ringbuffer_issuecmds(device, context,
2439 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002440 context->constant_restore, 3);
2441
2442 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002443 adreno_ringbuffer_issuecmds(device, context,
2444 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002445 context->shader_restore, 3);
2446
2447 /* Restore HLSQ_CONTROL_0 register */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002448 adreno_ringbuffer_issuecmds(device, context,
2449 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002450 context->hlsqcontrol_restore, 3);
2451 }
2452}
2453
2454static void a3xx_rb_init(struct adreno_device *adreno_dev,
2455 struct adreno_ringbuffer *rb)
2456{
2457 unsigned int *cmds, cmds_gpu;
2458 cmds = adreno_ringbuffer_allocspace(rb, 18);
2459 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2460
2461 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2462 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2463 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2464 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2465 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2466 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2467 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2468 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2469 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2470 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2471 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2472 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2473 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2474 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2475 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2476 /* Protected mode control - turned off for A3XX */
2477 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2478 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2479 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2480
2481 adreno_ringbuffer_submit(rb);
2482}
2483
2484static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2485{
2486 struct kgsl_device *device = &adreno_dev->dev;
2487 const char *err = "";
2488
2489 switch (bit) {
2490 case A3XX_INT_RBBM_AHB_ERROR: {
2491 unsigned int reg;
2492
2493 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2494
2495 /*
2496 * Return the word address of the erroring register so that it
2497 * matches the register specification
2498 */
2499
2500 KGSL_DRV_CRIT(device,
2501 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2502 reg & (1 << 28) ? "WRITE" : "READ",
2503 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2504 (reg >> 24) & 0x3);
2505
2506 /* Clear the error */
2507 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2508 return;
2509 }
2510 case A3XX_INT_RBBM_REG_TIMEOUT:
2511 err = "RBBM: AHB register timeout";
2512 break;
2513 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2514 err = "RBBM: ME master split timeout";
2515 break;
2516 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2517 err = "RBBM: PFP master split timeout";
2518 break;
2519 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2520 err = "RBBM: ATB bus oveflow";
2521 break;
2522 case A3XX_INT_VFD_ERROR:
2523 err = "VFD: Out of bounds access";
2524 break;
2525 case A3XX_INT_CP_T0_PACKET_IN_IB:
2526 err = "ringbuffer TO packet in IB interrupt";
2527 break;
2528 case A3XX_INT_CP_OPCODE_ERROR:
2529 err = "ringbuffer opcode error interrupt";
2530 break;
2531 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2532 err = "ringbuffer reserved bit error interrupt";
2533 break;
2534 case A3XX_INT_CP_HW_FAULT:
2535 err = "ringbuffer hardware fault";
2536 break;
2537 case A3XX_INT_CP_REG_PROTECT_FAULT:
2538 err = "ringbuffer protected mode error interrupt";
2539 break;
2540 case A3XX_INT_CP_AHB_ERROR_HALT:
2541 err = "ringbuffer AHB error interrupt";
2542 break;
2543 case A3XX_INT_MISC_HANG_DETECT:
2544 err = "MISC: GPU hang detected";
2545 break;
2546 case A3XX_INT_UCHE_OOB_ACCESS:
2547 err = "UCHE: Out of bounds access";
2548 break;
2549 }
2550
2551 KGSL_DRV_CRIT(device, "%s\n", err);
2552 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2553}
2554
2555static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2556{
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002557 struct kgsl_device *device = &adreno_dev->dev;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002558
2559 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002560 unsigned int context_id;
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002561 kgsl_sharedmem_readl(&device->memstore, &context_id,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002562 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2563 current_context));
2564 if (context_id < KGSL_MEMSTORE_MAX) {
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002565 kgsl_sharedmem_writel(&device->memstore,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002566 KGSL_MEMSTORE_OFFSET(context_id,
2567 ts_cmp_enable), 0);
2568 wmb();
2569 }
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002570 KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002571 }
2572
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002573 wake_up_interruptible_all(&device->wait_queue);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002574
2575 /* Schedule work to free mem and issue ibs */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002576 queue_work(device->work_queue, &device->ts_expired_ws);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002577
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002578 atomic_notifier_call_chain(&device->ts_notifier_list,
2579 device->id, NULL);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002580}
2581
2582#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2583
2584#define A3XX_INT_MASK \
2585 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002586 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002587 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2588 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2589 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2590 (1 << A3XX_INT_CP_HW_FAULT) | \
2591 (1 << A3XX_INT_CP_IB1_INT) | \
2592 (1 << A3XX_INT_CP_IB2_INT) | \
2593 (1 << A3XX_INT_CP_RB_INT) | \
2594 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2595 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002596 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2597
2598static struct {
2599 void (*func)(struct adreno_device *, int);
2600} a3xx_irq_funcs[] = {
2601 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2602 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2603 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2604 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2605 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2606 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2607 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2608 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2609 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2610 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2611 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2612 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2613 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2614 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2615 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2616 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2617 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2618 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2619 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2620 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2621 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2622 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2623 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2624 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002625 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002626 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2627 /* 26 to 31 - Unused */
2628};
2629
2630static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2631{
2632 struct kgsl_device *device = &adreno_dev->dev;
2633 irqreturn_t ret = IRQ_NONE;
2634 unsigned int status, tmp;
2635 int i;
2636
2637 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2638
2639 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2640 if (tmp & 1) {
2641 if (a3xx_irq_funcs[i].func != NULL) {
2642 a3xx_irq_funcs[i].func(adreno_dev, i);
2643 ret = IRQ_HANDLED;
2644 } else {
2645 KGSL_DRV_CRIT(device,
2646 "Unhandled interrupt bit %x\n", i);
2647 }
2648 }
2649
2650 tmp >>= 1;
2651 }
2652
Carter Cooperb769c912012-04-13 08:16:35 -06002653 trace_kgsl_a3xx_irq_status(device, status);
2654
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002655 if (status)
2656 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2657 status);
2658 return ret;
2659}
2660
2661static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2662{
2663 struct kgsl_device *device = &adreno_dev->dev;
2664
Wei Zou08a7e572012-06-03 22:05:46 -07002665 if (state)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002666 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
Wei Zou08a7e572012-06-03 22:05:46 -07002667 else
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002668 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2669}
2670
2671static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2672{
2673 struct kgsl_device *device = &adreno_dev->dev;
2674 unsigned int reg, val;
2675
2676 /* Freeze the counter */
2677 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2678 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2679 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2680
2681 /* Read the value */
2682 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2683
2684 /* Reset the counter */
2685 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2686 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2687
2688 /* Re-enable the counter */
2689 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2690 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2691 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2692
2693 return val;
2694}
2695
2696static void a3xx_start(struct adreno_device *adreno_dev)
2697{
2698 struct kgsl_device *device = &adreno_dev->dev;
2699
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002700 /* Set up 16 deep read/write request queues */
liu zhongfd42e622012-05-01 19:18:30 -07002701 if (adreno_dev->gpurev == ADRENO_REV_A330) {
Lokesh Batra64031372012-08-22 19:45:07 -07002702 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
2703 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
2704 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
2705 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
liu zhongfd42e622012-05-01 19:18:30 -07002706 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
Lokesh Batra64031372012-08-22 19:45:07 -07002707 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
2708 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
liu zhongfd42e622012-05-01 19:18:30 -07002709 /* Enable WR-REQ */
Shubhraprakash Das473e9142012-08-11 19:18:29 -07002710 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002711
liu zhongfd42e622012-05-01 19:18:30 -07002712 /* Set up round robin arbitration between both AXI ports */
2713 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2714 /* Set up VBIF_ROUND_ROBIN_QOS_ARB */
2715 adreno_regwrite(device, A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002716
liu zhongfd42e622012-05-01 19:18:30 -07002717 /* Set up AOOO */
Shubhraprakash Das473e9142012-08-11 19:18:29 -07002718 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000FFFF);
2719 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0xFFFFFFFF);
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002720
Shubhraprakash Das473e9142012-08-11 19:18:29 -07002721 /* Enable 1K sort */
2722 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x1FFFF);
2723 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
Lokesh Batra64031372012-08-22 19:45:07 -07002724
2725 /* Diable VBIF clock gating. This is to enable AXI running
2726 * higher frequency than GPU.
2727 */
2728 adreno_regwrite(device, A3XX_VBIF_CLKON, 1);
liu zhongfd42e622012-05-01 19:18:30 -07002729 } else {
2730 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2731 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2732 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2733 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2734 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
2735 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2736 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2737 /* Enable WR-REQ */
2738 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF);
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002739
liu zhongfd42e622012-05-01 19:18:30 -07002740 /* Set up round robin arbitration between both AXI ports */
2741 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2742 /* Set up AOOO */
2743 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2744 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2745 }
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002746
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302747 if (cpu_is_apq8064()) {
2748 /* Enable 1K sort */
2749 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2750 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2751 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002752 /* Make all blocks contribute to the GPU BUSY perf counter */
2753 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2754
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002755 /* Tune the hystersis counters for SP and CP idle detection */
2756 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2757 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2758
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002759 /* Enable the RBBM error reporting bits. This lets us get
2760 useful information on failure */
2761
2762 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2763
2764 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002765 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002766
2767 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002768 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002769
2770 /* Turn on hang detection - this spews a lot of useful information
2771 * into the RBBM registers on a hang */
2772
2773 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2774 (1 << 16) | 0xFFF);
2775
Rajeev Kulkarni7f177962012-06-22 12:09:44 -07002776 /* Enable Clock gating */
2777 adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
2778 A3XX_RBBM_CLOCK_CTL_DEFAULT);
2779
liu zhong5af32d92012-08-29 14:36:36 -06002780 /* Set the OCMEM base address for A330 */
2781 if (adreno_dev->gpurev == ADRENO_REV_A330) {
2782 adreno_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
2783 (unsigned int)(adreno_dev->ocmem_base >> 14));
2784 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002785}
2786
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002787/* Defined in adreno_a3xx_snapshot.c */
2788void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2789 int *remain, int hang);
2790
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002791struct adreno_gpudev adreno_a3xx_gpudev = {
2792 .reg_rbbm_status = A3XX_RBBM_STATUS,
2793 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2794 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2795
2796 .ctxt_create = a3xx_drawctxt_create,
2797 .ctxt_save = a3xx_drawctxt_save,
2798 .ctxt_restore = a3xx_drawctxt_restore,
Shubhraprakash Das4624b552012-06-01 14:08:03 -06002799 .ctxt_draw_workaround = NULL,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002800 .rb_init = a3xx_rb_init,
2801 .irq_control = a3xx_irq_control,
2802 .irq_handler = a3xx_irq_handler,
2803 .busy_cycles = a3xx_busy_cycles,
2804 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002805 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002806};