blob: c454418a11faecfb0a46b5d749e21c3ccb81ec71 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Steve Mucklef132c6c2012-06-06 18:30:57 -070015#include <linux/sched.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053016#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070017
18#include "kgsl.h"
19#include "adreno.h"
20#include "kgsl_sharedmem.h"
21#include "kgsl_cffdump.h"
22#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060023#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070024
Jordan Crouse0c2761a2012-02-01 22:11:12 -070025/*
26 * Set of registers to dump for A3XX on postmortem and snapshot.
27 * Registers in pairs - first value is the start offset, second
28 * is the stop offset (inclusive)
29 */
30
31const unsigned int a3xx_registers[] = {
32 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
33 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
34 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
35 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
36 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
37 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
38 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
39 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070040 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070041 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
42 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
43 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
44 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
45 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
46 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
47 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
48 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
49 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
50 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
51 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
52 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
53 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
54 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
55 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
56 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
57 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
58 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
59 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
60 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
61 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
62 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
63 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
64 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070065 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
66 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
67 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070068};
69
70const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
71
Jordan Crouse99839252012-08-14 14:33:42 -060072/* The set of additional registers to be dumped for A330 */
73
74const unsigned int a330_registers[] = {
75 0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453,
76};
77
78const unsigned int a330_registers_count = ARRAY_SIZE(a330_registers) / 2;
79
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070080/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
81 * functions.
82 */
83
84#define _SET(_shift, _val) ((_val) << (_shift))
85
86/*
87 ****************************************************************************
88 *
89 * Context state shadow structure:
90 *
91 * +---------------------+------------+-------------+---------------------+---+
92 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
93 * +---------------------+------------+-------------+---------------------+---+
94 *
95 * 8K - ALU Constant Shadow (8K aligned)
96 * 4K - H/W Register Shadow (8K aligned)
97 * 5K - Command and Vertex Buffers
98 * 8K - Shader Instruction Shadow
99 * ~6K - Texture Constant Shadow
100 *
101 *
102 ***************************************************************************
103 */
104
105/* Sizes of all sections in state shadow memory */
106#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
107#define REG_SHADOW_SIZE (4*1024) /* 4KB */
108#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
109#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
110#define TEX_SIZE_MIPMAP 1936 /* bytes */
111#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
112#define TEX_SHADOW_SIZE \
113 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
114 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
115#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
116
117/* Total context size, excluding GMEM shadow */
118#define CONTEXT_SIZE \
119 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
120 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
121 TEX_SHADOW_SIZE)
122
123/* Offsets to different sections in context shadow memory */
124#define REG_OFFSET ALU_SHADOW_SIZE
125#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
126#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
127#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
128#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
129#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
130#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
131#define FS_TEX_OFFSET_MEM_OBJECTS \
132 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
133#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
134#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
135
136/* The offset for fragment shader data in HLSQ context */
137#define SSIZE (16*1024)
138
139#define HLSQ_SAMPLER_OFFSET 0x000
140#define HLSQ_MEMOBJ_OFFSET 0x400
141#define HLSQ_MIPMAP_OFFSET 0x800
142
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700143/* Use shadow RAM */
144#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700145
Jordan Croused0070882012-02-21 08:54:52 -0700146#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700147
148#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
149 vis_cull_mode) \
150 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
151 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
152 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
153 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
154 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
155 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
156
157/*
158 * List of context registers (starting from dword offset 0x2000).
159 * Each line contains start and end of a range of registers.
160 */
161static const unsigned int context_register_ranges[] = {
162 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
163 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
164 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
165 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
166 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
167 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
168 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
169 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
170 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
171 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
172 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
173 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
174 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
175 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
176 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
177 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
178 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
179 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
180 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
181 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
182 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
183 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
184 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
185 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
186 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
187 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
188 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
189 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
190 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
191 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
192 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
193};
194
195/* Global registers that need to be saved separately */
196static const unsigned int global_registers[] = {
197 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
198 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
199 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
200 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
201 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
202 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
203 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
204 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
205 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
206 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
207 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
208 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
209 A3XX_VSC_BIN_SIZE,
210 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
211 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
212 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
213 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
214 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
215 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
216 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
217 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
218 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
219 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
220 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
221 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
222 A3XX_VSC_SIZE_ADDRESS
223};
224
225#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
226
227/* A scratchpad used to build commands during context create */
228static struct tmp_ctx {
229 unsigned int *cmd; /* Next available dword in C&V buffer */
230
231 /* Addresses in comamnd buffer where registers are saved */
232 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
233 uint32_t gmem_base; /* Base GPU address of GMEM */
234} tmp_ctx;
235
236#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
237/*
238 * Function for executing dest = ( (reg & and) ROL rol ) | or
239 */
240static unsigned int *rmw_regtomem(unsigned int *cmd,
241 unsigned int reg, unsigned int and,
242 unsigned int rol, unsigned int or,
243 unsigned int dest)
244{
245 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
246 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
247 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
248 *cmd++ = 0x00000000; /* AND value */
249 *cmd++ = reg; /* OR address */
250
251 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
252 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
253 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
254 *cmd++ = and; /* AND value */
255 *cmd++ = or; /* OR value */
256
257 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
258 *cmd++ = A3XX_CP_SCRATCH_REG2;
259 *cmd++ = dest;
260
261 return cmd;
262}
263#endif
264
265static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
266 struct adreno_context *drawctxt)
267{
268 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700270 unsigned int i;
271
272 drawctxt->constant_save_commands[0].hostptr = cmd;
273 drawctxt->constant_save_commands[0].gpuaddr =
274 virt2gpu(cmd, &drawctxt->gpustate);
275 cmd++;
276
Jordan Crousea7ec4212012-02-04 10:23:52 -0700277 start = cmd;
278
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700279 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
280 *cmd++ = 0;
281
282#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
283 /*
284 * Context registers are already shadowed; just need to
285 * disable shadowing to prevent corruption.
286 */
287
288 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
289 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
290 *cmd++ = 4 << 16; /* regs, start=0 */
291 *cmd++ = 0x0; /* count = 0 */
292
293#else
294 /*
295 * Make sure the HW context has the correct register values before
296 * reading them.
297 */
298
299 /* Write context registers into shadow */
300 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
301 unsigned int start = context_register_ranges[i * 2];
302 unsigned int end = context_register_ranges[i * 2 + 1];
303 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
304 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
305 start;
306 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
307 & 0xFFFFE000) + (start - 0x2000) * 4;
308 }
309#endif
310
311 /* Need to handle some of the global registers separately */
312 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
313 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
314 *cmd++ = global_registers[i];
315 *cmd++ = tmp_ctx.reg_values[i];
316 }
317
318 /* Save vertex shader constants */
319 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
320 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
321 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
322 *cmd++ = 0x0000FFFF;
323 *cmd++ = 3; /* EXEC_COUNT */
324 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
325 drawctxt->constant_save_commands[1].hostptr = cmd;
326 drawctxt->constant_save_commands[1].gpuaddr =
327 virt2gpu(cmd, &drawctxt->gpustate);
328 /*
329 From fixup:
330
331 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
332 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
333
334 From register spec:
335 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
336 */
337 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
338 /* ALU constant shadow base */
339 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
340
341 /* Save fragment shader constants */
342 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
343 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
344 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
345 *cmd++ = 0x0000FFFF;
346 *cmd++ = 3; /* EXEC_COUNT */
347 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
348 drawctxt->constant_save_commands[2].hostptr = cmd;
349 drawctxt->constant_save_commands[2].gpuaddr =
350 virt2gpu(cmd, &drawctxt->gpustate);
351 /*
352 From fixup:
353
354 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
355 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
356
357 From register spec:
358 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
359 */
360 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
361
362 /*
363 From fixup:
364
365 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
366 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
367
368 From register spec:
369 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
370 start offset in on chip RAM,
371 128bit aligned
372
373 dst = base + offset
374 Because of the base alignment we can use
375 dst = base | offset
376 */
377 *cmd++ = 0; /* dst */
378
379 /* Save VS texture memory objects */
380 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
381 *cmd++ =
382 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
383 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
384 *cmd++ =
385 (drawctxt->gpustate.gpuaddr +
386 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
387
388 /* Save VS texture mipmap pointers */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ =
391 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
392 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
393 *cmd++ =
394 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
395
396 /* Save VS texture sampler objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
399 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
400 *cmd++ =
401 (drawctxt->gpustate.gpuaddr +
402 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
403
404 /* Save FS texture memory objects */
405 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
406 *cmd++ =
407 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
408 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
409 *cmd++ =
410 (drawctxt->gpustate.gpuaddr +
411 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
412
413 /* Save FS texture mipmap pointers */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
420
421 /* Save FS texture sampler objects */
422 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
423 *cmd++ =
424 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
425 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
426 *cmd++ =
427 (drawctxt->gpustate.gpuaddr +
428 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
429
430 /* Create indirect buffer command for above command sequence */
431 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
432
433 tmp_ctx.cmd = cmd;
434}
435
436/* Copy GMEM contents to system memory shadow. */
437static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
438 struct adreno_context *drawctxt,
439 struct gmem_shadow_t *shadow)
440{
441 unsigned int *cmds = tmp_ctx.cmd;
442 unsigned int *start = cmds;
443
Jordan Crousefb3012f2012-06-22 13:11:05 -0600444 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
445 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
446
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700447 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
448 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
449
450 /* RB_MODE_CONTROL */
451 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
452 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
453 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
454 /* RB_RENDER_CONTROL */
455 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
456 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
457
458 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
459 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
460 /* RB_COPY_CONTROL */
461 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
462 RB_CLEAR_MODE_RESOLVE) |
463 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
464 tmp_ctx.gmem_base >> 14);
465 /* RB_COPY_DEST_BASE */
466 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
467 shadow->gmemshadow.gpuaddr >> 5);
468 /* RB_COPY_DEST_PITCH */
469 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
470 (shadow->pitch * 4) / 32);
471 /* RB_COPY_DEST_INFO */
472 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
473 RB_TILINGMODE_LINEAR) |
474 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
475 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
476 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
477
478 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
479 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
480 /* GRAS_SC_CONTROL */
481 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
482
483 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
484 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
485 /* VFD_CONTROL_0 */
486 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
487 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
488 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
489 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
490 /* VFD_CONTROL_1 */
491 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
492 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
493 _SET(VFD_CTRLREG1_REGID4INST, 252);
494
495 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
496 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
497 /* VFD_FETCH_INSTR_0_0 */
498 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
499 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
500 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
501 /* VFD_FETCH_INSTR_1_0 */
502 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
503 shadow->quad_vertices.gpuaddr);
504
505 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
506 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
507 /* VFD_DECODE_INSTR_0 */
508 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
509 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
510 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700511 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
512 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
513
514 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
515 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
516 /* HLSQ_CONTROL_0_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700517 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700518 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700519 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700520 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700521 /* HLSQ_CONTROL_1_REG */
522 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700523 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700524 /* HLSQ_CONTROL_2_REG */
525 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
526 /* HLSQ_CONTROL_3_REG */
527 *cmds++ = 0x00000000;
528
529 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
530 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
531 /* HLSQ_VS_CONTROL_REG */
532 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
533 /* HLSQ_FS_CONTROL_REG */
534 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700535 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700536 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
537 /* HLSQ_CONST_VSPRESV_RANGE_REG */
538 *cmds++ = 0x00000000;
539 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
540 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
541 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
542
543 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
544 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
545 /* SP_FS_LENGTH_REG */
546 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
547
548 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
549 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
550 /* SP_SP_CTRL_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700551 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
552 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700553
554 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
555 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
556 /* SP_VS_CTRL_REG0 */
557 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
558 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
559 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700560 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700561 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
562 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
563 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
564 /* SP_VS_CTRL_REG1 */
565 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
566 /* SP_VS_PARAM_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700567 *cmds++ = _SET(SP_VSPARAMREG_PSIZEREGID, 252);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700568 /* SP_VS_OUT_REG_0 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_1 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_2 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_3 */
575 *cmds++ = 0x00000000;
576 /* SP_VS_OUT_REG_4 */
577 *cmds++ = 0x00000000;
578 /* SP_VS_OUT_REG_5 */
579 *cmds++ = 0x00000000;
580 /* SP_VS_OUT_REG_6 */
581 *cmds++ = 0x00000000;
582 /* SP_VS_OUT_REG_7 */
583 *cmds++ = 0x00000000;
584
585 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
586 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
587 /* SP_VS_VPC_DST_REG_0 */
588 *cmds++ = 0x00000000;
589 /* SP_VS_VPC_DST_REG_1 */
590 *cmds++ = 0x00000000;
591 /* SP_VS_VPC_DST_REG_2 */
592 *cmds++ = 0x00000000;
593 /* SP_VS_VPC_DST_REG_3 */
594 *cmds++ = 0x00000000;
595 /* SP_VS_OBJ_OFFSET_REG */
596 *cmds++ = 0x00000000;
597 /* SP_VS_OBJ_START_REG */
598 *cmds++ = 0x00000000;
599
600 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
601 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
602 /* SP_VS_LENGTH_REG */
603 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
604 /* SP_FS_CTRL_REG0 */
605 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
606 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
607 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700608 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700609 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700610 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700611 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
612 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
613 /* SP_FS_CTRL_REG1 */
614 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700615 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
616 /* SP_FS_OBJ_OFFSET_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700617 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
618 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 127);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700619 /* SP_FS_OBJ_START_REG */
620 *cmds++ = 0x00000000;
621
622 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
623 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
624 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
625 *cmds++ = 0x00000000;
626 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
627 *cmds++ = 0x00000000;
628
629 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
630 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
631 /* SP_FS_OUTPUT_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700632 *cmds++ = _SET(SP_IMAGEOUTPUTREG_DEPTHOUTMODE, SP_PIXEL_BASED);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700633
634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
635 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
636 /* SP_FS_MRT_REG_0 */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700637 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
638
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700639 /* SP_FS_MRT_REG_1 */
640 *cmds++ = 0x00000000;
641 /* SP_FS_MRT_REG_2 */
642 *cmds++ = 0x00000000;
643 /* SP_FS_MRT_REG_3 */
644 *cmds++ = 0x00000000;
645
646 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
647 *cmds++ = CP_REG(A3XX_VPC_ATTR);
648 /* VPC_ATTR */
649 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
650 _SET(VPC_VPCATTR_LMSIZE, 1);
651 /* VPC_PACK */
652 *cmds++ = 0x00000000;
653 /* VPC_VARRYING_INTERUPT_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARRYING_INTERUPT_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARRYING_INTERUPT_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARRYING_INTERUPT_MODE_3 */
660 *cmds++ = 0x00000000;
661 /* VPC_VARYING_PS_REPL_MODE_0 */
662 *cmds++ = 0x00000000;
663 /* VPC_VARYING_PS_REPL_MODE_1 */
664 *cmds++ = 0x00000000;
665 /* VPC_VARYING_PS_REPL_MODE_2 */
666 *cmds++ = 0x00000000;
667 /* VPC_VARYING_PS_REPL_MODE_3 */
668 *cmds++ = 0x00000000;
669
670 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
671 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
672 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
673 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
674 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
675 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
676 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
677
678 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700679 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700680 /* end; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700681 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700682 /* nop; */
683 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
684 /* nop; */
685 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
686
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700687
688 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
689 *cmds++ = 0x00000000;
690
691 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
692 *cmds++ = 0x00000000;
693
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700694 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
695 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
696 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
697 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
698 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
699 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
700 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
701
702 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700703 *cmds++ = 0x00000000; *cmds++ = 0x30201b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700704 /* end; */
705 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
706 /* nop; */
707 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
708 /* nop; */
709 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
710
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700711
712
713 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
714 *cmds++ = 0x00000000;
715
716 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
717 *cmds++ = 0x00000000;
718
719
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700720 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
721 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
722 /* RB_MSAA_CONTROL */
723 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
724 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
725
726 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
727 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
728 /* RB_DEPTH_CONTROL */
729 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
730
731 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700732 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
733 /* RB_STENCIL_CONTROL */
734 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
735 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
736 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
737 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
738 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
739 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
740 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
741 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
742
743 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
744 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
745 /* GRAS_SU_MODE_CONTROL */
746 *cmds++ = 0x00000000;
747
748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700749 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
750 /* RB_MRT_CONTROL0 */
751 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
752 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
753 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
754 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
755
756 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
757 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
758 /* RB_MRT_BLEND_CONTROL0 */
759 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
760 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
761 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
762 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
763 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
764 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
765 /* RB_MRT_CONTROL1 */
766 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
767 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
768 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
769
770 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
771 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
772 /* RB_MRT_BLEND_CONTROL1 */
773 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
774 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
775 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
776 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
777 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
778 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
779 /* RB_MRT_CONTROL2 */
780 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
781 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
782 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
783
784 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
785 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
786 /* RB_MRT_BLEND_CONTROL2 */
787 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
788 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
789 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
790 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
791 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
792 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
793 /* RB_MRT_CONTROL3 */
794 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
795 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
796 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
797
798 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
799 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
800 /* RB_MRT_BLEND_CONTROL3 */
801 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
802 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
803 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
804 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
805 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
806 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
807
808 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
809 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
810 /* VFD_INDEX_MIN */
811 *cmds++ = 0x00000000;
812 /* VFD_INDEX_MAX */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700813 *cmds++ = 0x155;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700814 /* VFD_INSTANCEID_OFFSET */
815 *cmds++ = 0x00000000;
816 /* VFD_INDEX_OFFSET */
817 *cmds++ = 0x00000000;
818
819 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
820 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
821 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700822 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700823 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
824
825 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
826 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
827 /* TPL1_TP_VS_TEX_OFFSET */
828 *cmds++ = 0;
829
830 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
831 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
832 /* TPL1_TP_FS_TEX_OFFSET */
833 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
834 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
835 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
836
837 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
838 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
839 /* PC_PRIM_VTX_CNTL */
840 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
841 PC_DRAW_TRIANGLES) |
842 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
843 PC_DRAW_TRIANGLES) |
844 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
845
846 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
847 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
848 /* GRAS_SC_WINDOW_SCISSOR_TL */
849 *cmds++ = 0x00000000;
850 /* GRAS_SC_WINDOW_SCISSOR_BR */
851 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
852 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
853
854 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
855 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
856 /* GRAS_SC_SCREEN_SCISSOR_TL */
857 *cmds++ = 0x00000000;
858 /* GRAS_SC_SCREEN_SCISSOR_BR */
859 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
860 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
861
862 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
863 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
864 /* GRAS_CL_VPORT_XOFFSET */
865 *cmds++ = 0x00000000;
866 /* GRAS_CL_VPORT_XSCALE */
867 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
868 /* GRAS_CL_VPORT_YOFFSET */
869 *cmds++ = 0x00000000;
870 /* GRAS_CL_VPORT_YSCALE */
871 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
872
873 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
874 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
875 /* GRAS_CL_VPORT_ZOFFSET */
876 *cmds++ = 0x00000000;
877 /* GRAS_CL_VPORT_ZSCALE */
878 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
879
880 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
881 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
882 /* GRAS_CL_CLIP_CNTL */
883 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
884 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
885 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
886 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
887 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
888
889 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
890 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
891 /* GRAS_CL_GB_CLIP_ADJ */
892 *cmds++ = 0x00000000;
893
894 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
895 *cmds++ = 0x00000000;
896
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700897
898 /* oxili_generate_context_roll_packets */
899 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
900 *cmds++ = 0x00000400;
901
902 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
903 *cmds++ = 0x00000400;
904
905 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
906 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
907
908 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
909 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
910
911 /* Clear cache invalidate bit when re-loading the shader control regs */
912 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
913 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
914 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
915 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
916 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
917 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
918 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
919
920 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
921 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
922 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
923 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
924 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
925 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
926 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
927 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
928
929 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
930 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
931
932 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
933 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
934
935 /* end oxili_generate_context_roll_packets */
936
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700937 /*
938 * Resolve using two draw calls with a dummy register
939 * write in between. This is a HLM workaround
940 * that should be removed later.
941 */
942 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
943 *cmds++ = 0x00000000; /* Viz query info */
944 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
945 PC_DI_SRC_SEL_IMMEDIATE,
946 PC_DI_INDEX_SIZE_32_BIT,
947 PC_DI_IGNORE_VISIBILITY);
948 *cmds++ = 0x00000003; /* Num indices */
949 *cmds++ = 0x00000000; /* Index 0 */
950 *cmds++ = 0x00000001; /* Index 1 */
951 *cmds++ = 0x00000002; /* Index 2 */
952
953 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
954 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
955 *cmds++ = 0x00000000;
956
957 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
958 *cmds++ = 0x00000000; /* Viz query info */
959 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
960 PC_DI_SRC_SEL_IMMEDIATE,
961 PC_DI_INDEX_SIZE_32_BIT,
962 PC_DI_IGNORE_VISIBILITY);
963 *cmds++ = 0x00000003; /* Num indices */
964 *cmds++ = 0x00000002; /* Index 0 */
965 *cmds++ = 0x00000001; /* Index 1 */
966 *cmds++ = 0x00000003; /* Index 2 */
967
968 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
969 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
970 *cmds++ = 0x00000000;
971
972 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
973 *cmds++ = 0x00000000;
974
975 /* Create indirect buffer command for above command sequence */
976 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
977
978 return cmds;
979}
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700980static void build_shader_save_cmds(struct adreno_device *adreno_dev,
981 struct adreno_context *drawctxt)
982{
983 unsigned int *cmd = tmp_ctx.cmd;
984 unsigned int *start;
985
986 /* Reserve space for boolean values used for COND_EXEC packet */
987 drawctxt->cond_execs[0].hostptr = cmd;
988 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
989 *cmd++ = 0;
990 drawctxt->cond_execs[1].hostptr = cmd;
991 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
992 *cmd++ = 0;
993
994 drawctxt->shader_save_commands[0].hostptr = cmd;
995 drawctxt->shader_save_commands[0].gpuaddr =
996 virt2gpu(cmd, &drawctxt->gpustate);
997 *cmd++ = 0;
998 drawctxt->shader_save_commands[1].hostptr = cmd;
999 drawctxt->shader_save_commands[1].gpuaddr =
1000 virt2gpu(cmd, &drawctxt->gpustate);
1001 *cmd++ = 0;
1002
1003 start = cmd;
1004
1005 /* Save vertex shader */
1006
1007 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1008 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1009 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1010 *cmd++ = 0x0000FFFF;
1011 *cmd++ = 3; /* EXEC_COUNT */
1012
1013 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1014 drawctxt->shader_save_commands[2].hostptr = cmd;
1015 drawctxt->shader_save_commands[2].gpuaddr =
1016 virt2gpu(cmd, &drawctxt->gpustate);
1017 /*
1018 From fixup:
1019
1020 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
1021
1022 From regspec:
1023 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1024 If bit31 is 1, it means overflow
1025 or any long shader.
1026
1027 src = (HLSQ_SHADOW_BASE + 0x1000)/4
1028 */
1029 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1030 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1031
1032 /* Save fragment shader */
1033 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1034 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1035 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1036 *cmd++ = 0x0000FFFF;
1037 *cmd++ = 3; /* EXEC_COUNT */
1038
1039 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1040 drawctxt->shader_save_commands[3].hostptr = cmd;
1041 drawctxt->shader_save_commands[3].gpuaddr =
1042 virt2gpu(cmd, &drawctxt->gpustate);
1043 /*
1044 From fixup:
1045
1046 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
1047
1048 From regspec:
1049 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
1050 If bit31 is 1, it means overflow
1051 or any long shader.
1052
1053 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
1054 From regspec:
1055
1056 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
1057 First instruction of the whole shader will be stored from
1058 the offset in instruction cache, unit = 256bits, a cache line.
1059 It can start from 0 if no VS available.
1060
1061 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
1062 */
1063 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1064 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
1065 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
1066
1067 /* Create indirect buffer command for above command sequence */
1068 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
1069
1070 tmp_ctx.cmd = cmd;
1071}
1072
1073/*
1074 * Make an IB to modify context save IBs with the correct shader instruction
1075 * and constant sizes and offsets.
1076 */
1077
1078static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1079 struct adreno_context *drawctxt)
1080{
1081 unsigned int *cmd = tmp_ctx.cmd;
1082 unsigned int *start = cmd;
1083
1084 /* Flush HLSQ lazy updates */
1085 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1086 *cmd++ = 0x7; /* HLSQ_FLUSH */
1087 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1088 *cmd++ = 0;
1089
1090 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1091 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1092 *cmd++ = (unsigned int)
1093 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1094 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1095 0; /* No end addr for full invalidate */
1096
1097 /* Make sure registers are flushed */
1098 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1099 *cmd++ = 0;
1100
1101#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1102
1103 /* Save shader sizes */
1104 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1105 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1106 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1107
1108 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1109 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1110 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1111
1112 /* Save shader offsets */
1113 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1114 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1115 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1116
1117 /* Save constant sizes */
1118 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1119 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1120 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1121 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1122 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1123 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1124
1125 /* Save FS constant offset */
1126 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1127 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1128 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1129
1130
1131 /* Save VS instruction store mode */
1132 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1133 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1134 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1135
1136 /* Save FS instruction store mode */
1137 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1138 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1139 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1140#else
1141
1142 /* Shader save */
1143 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1144 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1145 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1146 drawctxt->shader_save_commands[2].gpuaddr);
1147
1148 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1149 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1150 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1151 *cmd++ = 0x00000000; /* AND value */
1152 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1153 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1154 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1155 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1156 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1157 A3XX_CP_SCRATCH_REG2;
1158 *cmd++ = 0x7f000000; /* AND value */
1159 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1160
1161 /*
1162 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1163 * SP_FS_OBJ_OFFSET_REG
1164 */
1165
1166 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1167 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1168 *cmd++ = 0x00000000; /* AND value */
1169 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1170 /*
1171 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1172 * 0x00000000
1173 */
1174 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1175 *cmd++ = A3XX_CP_SCRATCH_REG3;
1176 *cmd++ = 0xfe000000; /* AND value */
1177 *cmd++ = 0x00000000; /* OR value */
1178 /*
1179 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1180 */
1181 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1182 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1183 *cmd++ = 0xffffffff; /* AND value */
1184 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1185
1186 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1187 *cmd++ = A3XX_CP_SCRATCH_REG2;
1188 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1189
1190 /* Constant save */
1191 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001192 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1193 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001194 drawctxt->constant_save_commands[1].gpuaddr);
1195
1196 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001197 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1198 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001199 drawctxt->constant_save_commands[2].gpuaddr);
1200
1201 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1202 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1203 drawctxt->constant_save_commands[2].gpuaddr
1204 + sizeof(unsigned int));
1205
1206 /* Modify constant save conditionals */
1207 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1208 0, 0, drawctxt->cond_execs[2].gpuaddr);
1209
1210 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1211 0, 0, drawctxt->cond_execs[3].gpuaddr);
1212
1213 /* Save VS instruction store mode */
1214
1215 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1216 31, 0, drawctxt->cond_execs[0].gpuaddr);
1217
1218 /* Save FS instruction store mode */
1219 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1220 31, 0, drawctxt->cond_execs[1].gpuaddr);
1221
1222#endif
1223
1224 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1225
1226 tmp_ctx.cmd = cmd;
1227}
1228
1229/****************************************************************************/
1230/* Functions to build context restore IBs */
1231/****************************************************************************/
1232
1233static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1234 struct adreno_context *drawctxt,
1235 struct gmem_shadow_t *shadow)
1236{
1237 unsigned int *cmds = tmp_ctx.cmd;
1238 unsigned int *start = cmds;
1239
Jordan Crousefb3012f2012-06-22 13:11:05 -06001240 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
1241 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
1242
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001243 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1244 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1245 /* HLSQ_CONTROL_0_REG */
1246 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001247 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001248 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1249 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001250 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001251 /* HLSQ_CONTROL_1_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001252 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
1253 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001254 /* HLSQ_CONTROL_2_REG */
1255 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1256 /* HLSQ_CONTROL3_REG */
1257 *cmds++ = 0x00000000;
1258
1259 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1260 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1261 /* RB_MRT_BUF_INFO0 */
1262 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1263 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1264 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1265 (shadow->gmem_pitch * 4 * 8) / 256);
1266 /* RB_MRT_BUF_BASE0 */
1267 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1268
1269 /* Texture samplers */
1270 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1271 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1272 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1273 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1274 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1275 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1276 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1277 *cmds++ = 0x00000240;
1278 *cmds++ = 0x00000000;
1279
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001280 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1281 *cmds++ = 0x00000000;
1282
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001283 /* Texture memobjs */
1284 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1285 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1286 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1287 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1288 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1289 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1290 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1291 *cmds++ = 0x4cc06880;
1292 *cmds++ = shadow->height | (shadow->width << 14);
1293 *cmds++ = (shadow->pitch*4*8) << 9;
1294 *cmds++ = 0x00000000;
1295
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001296 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1297 *cmds++ = 0x00000000;
1298
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001299 /* Mipmap bases */
1300 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1301 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1302 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1303 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1304 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1305 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1306 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1307 *cmds++ = shadow->gmemshadow.gpuaddr;
1308 *cmds++ = 0x00000000;
1309 *cmds++ = 0x00000000;
1310 *cmds++ = 0x00000000;
1311 *cmds++ = 0x00000000;
1312 *cmds++ = 0x00000000;
1313 *cmds++ = 0x00000000;
1314 *cmds++ = 0x00000000;
1315 *cmds++ = 0x00000000;
1316 *cmds++ = 0x00000000;
1317 *cmds++ = 0x00000000;
1318 *cmds++ = 0x00000000;
1319 *cmds++ = 0x00000000;
1320 *cmds++ = 0x00000000;
1321
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001322 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1323 *cmds++ = 0x00000000;
1324
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001325 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1326 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1327 /* HLSQ_VS_CONTROL_REG */
1328 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1329 /* HLSQ_FS_CONTROL_REG */
1330 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1331 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1332 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1333 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1334 *cmds++ = 0x00000000;
1335 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1336 *cmds++ = 0x00000000;
1337
1338 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1339 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1340 /* SP_FS_LENGTH_REG */
1341 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1342
1343 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1344 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1345 /* SP_VS_CTRL_REG0 */
1346 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1347 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1348 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1349 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1350 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1351 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1352 /* SP_VS_CTRL_REG1 */
1353 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1354 /* SP_VS_PARAM_REG */
1355 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1356 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1357 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1358 /* SP_VS_OUT_REG0 */
1359 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1360 /* SP_VS_OUT_REG1 */
1361 *cmds++ = 0x00000000;
1362 /* SP_VS_OUT_REG2 */
1363 *cmds++ = 0x00000000;
1364 /* SP_VS_OUT_REG3 */
1365 *cmds++ = 0x00000000;
1366 /* SP_VS_OUT_REG4 */
1367 *cmds++ = 0x00000000;
1368 /* SP_VS_OUT_REG5 */
1369 *cmds++ = 0x00000000;
1370 /* SP_VS_OUT_REG6 */
1371 *cmds++ = 0x00000000;
1372 /* SP_VS_OUT_REG7 */
1373 *cmds++ = 0x00000000;
1374
1375 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1376 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1377 /* SP_VS_VPC_DST_REG0 */
1378 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1379 /* SP_VS_VPC_DST_REG1 */
1380 *cmds++ = 0x00000000;
1381 /* SP_VS_VPC_DST_REG2 */
1382 *cmds++ = 0x00000000;
1383 /* SP_VS_VPC_DST_REG3 */
1384 *cmds++ = 0x00000000;
1385 /* SP_VS_OBJ_OFFSET_REG */
1386 *cmds++ = 0x00000000;
1387 /* SP_VS_OBJ_START_REG */
1388 *cmds++ = 0x00000000;
1389
1390 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1391 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1392 /* SP_VS_LENGTH_REG */
1393 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1394 /* SP_FS_CTRL_REG0 */
1395 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1396 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1397 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001398 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1399 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001400 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1401 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001402 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001403 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1404 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1405 /* SP_FS_CTRL_REG1 */
1406 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1407 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1408 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1409 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001410 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001411 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 126);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001412 /* SP_FS_OBJ_START_REG */
1413 *cmds++ = 0x00000000;
1414
1415 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1416 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1417 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1418 *cmds++ = 0x00000000;
1419 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1420 *cmds++ = 0x00000000;
1421
1422 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1423 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1424 /* SP_FS_OUT_REG */
1425 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1426
Jordan Crousea7ec4212012-02-04 10:23:52 -07001427 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001428 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1429 /* SP_FS_MRT_REG0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001430 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001431 /* SP_FS_MRT_REG1 */
1432 *cmds++ = 0;
1433 /* SP_FS_MRT_REG2 */
1434 *cmds++ = 0;
1435 /* SP_FS_MRT_REG3 */
1436 *cmds++ = 0;
1437
1438 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1439 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1440 /* VPC_ATTR */
1441 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1442 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1443 _SET(VPC_VPCATTR_LMSIZE, 1);
1444 /* VPC_PACK */
1445 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1446 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1447 /* VPC_VARYING_INTERP_MODE_0 */
1448 *cmds++ = 0x00000000;
1449 /* VPC_VARYING_INTERP_MODE1 */
1450 *cmds++ = 0x00000000;
1451 /* VPC_VARYING_INTERP_MODE2 */
1452 *cmds++ = 0x00000000;
1453 /* VPC_VARYING_IINTERP_MODE3 */
1454 *cmds++ = 0x00000000;
1455 /* VPC_VARRYING_PS_REPL_MODE_0 */
1456 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1457 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1458 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1459 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1460 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1461 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1462 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1463 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1464 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1465 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1466 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1467 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1468 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1469 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1470 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1471 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1472 /* VPC_VARRYING_PS_REPL_MODE_1 */
1473 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1474 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1475 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1476 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1477 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1478 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1479 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1480 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1481 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1482 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1483 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1484 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1485 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1486 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1487 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1488 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1489 /* VPC_VARRYING_PS_REPL_MODE_2 */
1490 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1491 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1492 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1493 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1494 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1495 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1496 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1497 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1498 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1499 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1500 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1501 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1502 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1503 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1504 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1505 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1506 /* VPC_VARRYING_PS_REPL_MODE_3 */
1507 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1508 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1509 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1510 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1511 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1512 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1513 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1514 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1515 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1516 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1517 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1518 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1519 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1520 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1521 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1522 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1523
Jordan Crousea7ec4212012-02-04 10:23:52 -07001524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001525 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1526 /* SP_SP_CTRL_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001527 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
1528 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001529
1530 /* Load vertex shader */
1531 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1532 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1533 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1534 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1535 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1536 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1537 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1538 /* (sy)end; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001539 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001540 /* nop; */
1541 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1542 /* nop; */
1543 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1544 /* nop; */
1545 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1546
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001547 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1548 *cmds++ = 0x00000000;
1549
1550 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1551 *cmds++ = 0x00000000;
1552
1553
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001554 /* Load fragment shader */
1555 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1556 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1557 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1558 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1559 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1560 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1561 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1562 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001563 *cmds++ = 0x00002000; *cmds++ = 0x57309902;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001564 /* (rpt5)nop; */
1565 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1566 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1567 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1568 /* (sy)mov.f32f32 r1.x, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001569 *cmds++ = 0x00000000; *cmds++ = 0x30040b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001570 /* mov.f32f32 r1.y, r0.y; */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001571 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001572 /* mov.f32f32 r1.z, r0.z; */
1573 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1574 /* mov.f32f32 r1.w, r0.w; */
1575 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1576 /* end; */
1577 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1578
1579 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1580 *cmds++ = 0x00000000;
1581
1582 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1583 *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001584
1585 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1586 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1587 /* VFD_CONTROL_0 */
1588 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1589 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1590 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1591 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1592 /* VFD_CONTROL_1 */
1593 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1594 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1595 _SET(VFD_CTRLREG1_REGID4INST, 252);
1596
1597 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1598 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1599 /* VFD_FETCH_INSTR_0_0 */
1600 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1601 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1602 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1603 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1604 /* VFD_FETCH_INSTR_1_0 */
1605 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1606 shadow->quad_vertices_restore.gpuaddr);
1607 /* VFD_FETCH_INSTR_0_1 */
1608 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1609 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1610 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1611 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1612 /* VFD_FETCH_INSTR_1_1 */
1613 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1614 shadow->quad_vertices_restore.gpuaddr + 16);
1615
1616 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1617 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1618 /* VFD_DECODE_INSTR_0 */
1619 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1620 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1621 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1622 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1623 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1624 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1625 /* VFD_DECODE_INSTR_1 */
1626 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1627 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1628 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1629 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1630 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1631 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1632
1633 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1634 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1635 /* RB_DEPTH_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001636 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_LESS);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001637
1638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1639 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1640 /* RB_STENCIL_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001641 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001642 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1643 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1644 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001645 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001646 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1647 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1648 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1649
1650 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1651 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1652 /* RB_MODE_CONTROL */
1653 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1654 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1655
1656 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1657 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1658 /* RB_RENDER_CONTROL */
1659 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1660 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1661
1662 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1663 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1664 /* RB_MSAA_CONTROL */
1665 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1666 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1667
1668 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1669 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1670 /* RB_MRT_CONTROL0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001671 *cmds++ = _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1672 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001673 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1674
1675 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1676 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1677 /* RB_MRT_BLENDCONTROL0 */
1678 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1679 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1680 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1681 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1682 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1683 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1684 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1685 /* RB_MRT_CONTROL1 */
1686 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001687 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1688 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001689 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1690
1691 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1692 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1693 /* RB_MRT_BLENDCONTROL1 */
1694 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1695 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1696 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1697 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1698 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1699 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1700 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1701 /* RB_MRT_CONTROL2 */
1702 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001703 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1704 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001705 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1706
1707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1708 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1709 /* RB_MRT_BLENDCONTROL2 */
1710 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1711 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1712 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1713 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1714 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1715 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1716 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1717 /* RB_MRT_CONTROL3 */
1718 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001719 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1720 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001721 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1722
1723 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1724 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1725 /* RB_MRT_BLENDCONTROL3 */
1726 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1727 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1728 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1729 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1730 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1731 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1732 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1733
1734 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1735 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1736 /* VFD_INDEX_MIN */
1737 *cmds++ = 0x00000000;
1738 /* VFD_INDEX_MAX */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001739 *cmds++ = 340;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001740 /* VFD_INDEX_OFFSET */
1741 *cmds++ = 0x00000000;
1742 /* TPL1_TP_VS_TEX_OFFSET */
1743 *cmds++ = 0x00000000;
1744
1745 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1746 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1747 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001748 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001749 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1750
1751 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1752 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1753 /* TPL1_TP_VS_TEX_OFFSET */
1754 *cmds++ = 0x00000000;
1755
1756 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1757 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1758 /* TPL1_TP_FS_TEX_OFFSET */
1759 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1760 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1761 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1762
1763 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1764 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1765 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001766 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1767 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1768 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001769
1770 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1771 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1772 /* GRAS_SU_MODE_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001773 *cmds++ = _SET(GRAS_SU_CTRLMODE_LINEHALFWIDTH, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001774
1775 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1776 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1777 /* GRAS_SC_WINDOW_SCISSOR_TL */
1778 *cmds++ = 0x00000000;
1779 /* GRAS_SC_WINDOW_SCISSOR_BR */
1780 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1781 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1782
1783 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1784 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1785 /* GRAS_SC_SCREEN_SCISSOR_TL */
1786 *cmds++ = 0x00000000;
1787 /* GRAS_SC_SCREEN_SCISSOR_BR */
1788 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1789 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1790
1791 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1792 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1793 /* GRAS_CL_VPORT_XOFFSET */
1794 *cmds++ = 0x00000000;
1795 /* GRAS_CL_VPORT_XSCALE */
1796 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1797 /* GRAS_CL_VPORT_YOFFSET */
1798 *cmds++ = 0x00000000;
1799 /* GRAS_CL_VPORT_YSCALE */
1800 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1801
1802 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1803 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1804 /* GRAS_CL_VPORT_ZOFFSET */
1805 *cmds++ = 0x00000000;
1806 /* GRAS_CL_VPORT_ZSCALE */
1807 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1808
1809 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1810 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1811 /* GRAS_CL_CLIP_CNTL */
1812 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1813
1814 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1815 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1816 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1817 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1818
1819 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1820 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1821 /* PC_PRIM_VTX_CONTROL */
1822 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1823 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1824 PC_DRAW_TRIANGLES) |
1825 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1826 PC_DRAW_TRIANGLES) |
1827 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1828
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001829
1830 /* oxili_generate_context_roll_packets */
1831 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1832 *cmds++ = 0x00000400;
1833
1834 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1835 *cmds++ = 0x00000400;
1836
1837 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1838 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
1839
1840 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1841 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
1842
1843 /* Clear cache invalidate bit when re-loading the shader control regs */
1844 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1845 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1846 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1847 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1848 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1849 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1850
1851 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1852 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1853 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1854 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1855 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
1856 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1857 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1858 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
1859 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1860
1861 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1862 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
1863
1864 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1865 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
1866
1867 /* end oxili_generate_context_roll_packets */
1868
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001869 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1870 *cmds++ = 0x00000000; /* Viz query info */
1871 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1872 PC_DI_SRC_SEL_AUTO_INDEX,
1873 PC_DI_INDEX_SIZE_16_BIT,
1874 PC_DI_IGNORE_VISIBILITY);
1875 *cmds++ = 0x00000002; /* Num indices */
1876
1877 /* Create indirect buffer command for above command sequence */
1878 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1879
1880 return cmds;
1881}
1882
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001883
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001884static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1885 struct adreno_context *drawctxt)
1886{
1887 unsigned int *start = tmp_ctx.cmd;
1888 unsigned int *cmd = start;
1889 unsigned int *lcc_start;
1890
1891 int i;
1892
1893 /* Flush HLSQ lazy updates */
1894 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1895 *cmd++ = 0x7; /* HLSQ_FLUSH */
1896 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1897 *cmd++ = 0;
1898
1899 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1900 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1901 *cmd++ = (unsigned int)
1902 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1903 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1904 0; /* No end addr for full invalidate */
1905
1906 lcc_start = cmd;
1907
1908 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1909 cmd++;
1910
1911#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1912 /* Force mismatch */
1913 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1914#else
1915 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1916#endif
1917
1918 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1919 cmd = reg_range(cmd, context_register_ranges[i * 2],
1920 context_register_ranges[i * 2 + 1]);
1921 }
1922
1923 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1924 (cmd - lcc_start) - 1);
1925
1926#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1927 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1928#else
1929 lcc_start[2] |= (1 << 24) | (4 << 16);
1930#endif
1931
1932 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1933 *cmd++ = cp_type0_packet(global_registers[i], 1);
1934 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1935 *cmd++ = 0x00000000;
1936 }
1937
1938 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1939 tmp_ctx.cmd = cmd;
1940}
1941
1942static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1943 struct adreno_context *drawctxt)
1944{
1945 unsigned int *cmd = tmp_ctx.cmd;
1946 unsigned int *start = cmd;
1947 unsigned int mode = 4; /* Indirect mode */
1948 unsigned int stateblock;
1949 unsigned int numunits;
1950 unsigned int statetype;
1951
1952 drawctxt->cond_execs[2].hostptr = cmd;
1953 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1954 *cmd++ = 0;
1955 drawctxt->cond_execs[3].hostptr = cmd;
1956 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1957 *cmd++ = 0;
1958
1959#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1960 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1961 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1962 *cmd++ = 4 << 16;
1963 *cmd++ = 0x0;
1964#endif
1965 /* HLSQ full update */
1966 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1967 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1968 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1969
1970#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1971 /* Re-enable shadowing */
1972 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1973 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1974 *cmd++ = (4 << 16) | (1 << 24);
1975 *cmd++ = 0x0;
1976#endif
1977
1978 /* Load vertex shader constants */
1979 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1980 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1981 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1982 *cmd++ = 0x0000ffff;
1983 *cmd++ = 3; /* EXEC_COUNT */
1984 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1985 drawctxt->constant_load_commands[0].hostptr = cmd;
1986 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1987 &drawctxt->gpustate);
1988
1989 /*
1990 From fixup:
1991
1992 mode = 4 (indirect)
1993 stateblock = 4 (Vertex constants)
1994 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1995
1996 From register spec:
1997 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1998
1999 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
2000 */
2001
2002 *cmd++ = 0; /* ord1 */
2003 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
2004
2005 /* Load fragment shader constants */
2006 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2007 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2008 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2009 *cmd++ = 0x0000ffff;
2010 *cmd++ = 3; /* EXEC_COUNT */
2011 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2012 drawctxt->constant_load_commands[1].hostptr = cmd;
2013 drawctxt->constant_load_commands[1].gpuaddr =
2014 virt2gpu(cmd, &drawctxt->gpustate);
2015 /*
2016 From fixup:
2017
2018 mode = 4 (indirect)
2019 stateblock = 6 (Fragment constants)
2020 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
2021
2022 From register spec:
2023 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
2024
2025 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
2026 */
2027
2028 *cmd++ = 0; /* ord1 */
2029 drawctxt->constant_load_commands[2].hostptr = cmd;
2030 drawctxt->constant_load_commands[2].gpuaddr =
2031 virt2gpu(cmd, &drawctxt->gpustate);
2032 /*
2033 From fixup:
2034 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
2035 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
2036
2037 From register spec:
2038 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
2039 start offset in on chip RAM,
2040 128bit aligned
2041
2042 ord2 = base + offset | 1
2043 Because of the base alignment we can use
2044 ord2 = base | offset | 1
2045 */
2046 *cmd++ = 0; /* ord2 */
2047
2048 /* Restore VS texture memory objects */
2049 stateblock = 0;
2050 statetype = 1;
2051 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2052
2053 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2054 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2055 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
2056 & 0xfffffffc) | statetype;
2057
2058 /* Restore VS texture mipmap addresses */
2059 stateblock = 1;
2060 statetype = 1;
2061 numunits = TEX_SIZE_MIPMAP / 4;
2062 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2063 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2064 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
2065 & 0xfffffffc) | statetype;
2066
2067 /* Restore VS texture sampler objects */
2068 stateblock = 0;
2069 statetype = 0;
2070 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2071 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2072 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2073 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
2074 & 0xfffffffc) | statetype;
2075
2076 /* Restore FS texture memory objects */
2077 stateblock = 2;
2078 statetype = 1;
2079 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2080 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2081 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2082 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
2083 & 0xfffffffc) | statetype;
2084
2085 /* Restore FS texture mipmap addresses */
2086 stateblock = 3;
2087 statetype = 1;
2088 numunits = TEX_SIZE_MIPMAP / 4;
2089 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2090 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2091 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
2092 & 0xfffffffc) | statetype;
2093
2094 /* Restore FS texture sampler objects */
2095 stateblock = 2;
2096 statetype = 0;
2097 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2098 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2099 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2100 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
2101 & 0xfffffffc) | statetype;
2102
2103 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
2104 tmp_ctx.cmd = cmd;
2105}
2106
2107static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
2108 struct adreno_context *drawctxt)
2109{
2110 unsigned int *cmd = tmp_ctx.cmd;
2111 unsigned int *start = cmd;
2112
2113 /* Vertex shader */
2114 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2115 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2116 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2117 *cmd++ = 1;
2118 *cmd++ = 3; /* EXEC_COUNT */
2119
2120 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2121 drawctxt->shader_load_commands[0].hostptr = cmd;
2122 drawctxt->shader_load_commands[0].gpuaddr =
2123 virt2gpu(cmd, &drawctxt->gpustate);
2124 /*
2125 From fixup:
2126
2127 mode = 4 (indirect)
2128 stateblock = 4 (Vertex shader)
2129 numunits = SP_VS_CTRL_REG0.VS_LENGTH
2130
2131 From regspec:
2132 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
2133 If bit31 is 1, it means overflow
2134 or any long shader.
2135
2136 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2137 */
2138 *cmd++ = 0; /*ord1 */
2139 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
2140
2141 /* Fragment shader */
2142 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2143 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2144 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2145 *cmd++ = 1;
2146 *cmd++ = 3; /* EXEC_COUNT */
2147
2148 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2149 drawctxt->shader_load_commands[1].hostptr = cmd;
2150 drawctxt->shader_load_commands[1].gpuaddr =
2151 virt2gpu(cmd, &drawctxt->gpustate);
2152 /*
2153 From fixup:
2154
2155 mode = 4 (indirect)
2156 stateblock = 6 (Fragment shader)
2157 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2158
2159 From regspec:
2160 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2161 If bit31 is 1, it means overflow
2162 or any long shader.
2163
2164 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2165 */
2166 *cmd++ = 0; /*ord1 */
2167 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2168 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2169
2170 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2171 tmp_ctx.cmd = cmd;
2172}
2173
2174static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2175 struct adreno_context *drawctxt)
2176{
2177 unsigned int *cmd = tmp_ctx.cmd;
2178 unsigned int *start = cmd;
2179
2180 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2181 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2182 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2183 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2184 = virt2gpu(cmd, &drawctxt->gpustate);
2185 *cmd++ = 0;
2186
2187 /* Create indirect buffer command for above command sequence */
2188 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2189
2190 tmp_ctx.cmd = cmd;
2191}
2192
2193/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2194static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2195 struct adreno_context *drawctxt)
2196{
2197 unsigned int *cmd = tmp_ctx.cmd;
2198 unsigned int *start = cmd;
2199
2200#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2201 /* Save shader sizes */
2202 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2203 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2204 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2205
2206 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2207 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2208 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2209
2210 /* Save constant sizes */
2211 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2212 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2213 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2214
2215 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2216 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2217 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2218
2219 /* Save constant offsets */
2220 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2221 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2222 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2223#else
2224 /* Save shader sizes */
2225 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2226 30, (4 << 19) | (4 << 16),
2227 drawctxt->shader_load_commands[0].gpuaddr);
2228
2229 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2230 30, (6 << 19) | (4 << 16),
2231 drawctxt->shader_load_commands[1].gpuaddr);
2232
2233 /* Save constant sizes */
2234 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2235 23, (4 << 19) | (4 << 16),
2236 drawctxt->constant_load_commands[0].gpuaddr);
2237
2238 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2239 23, (6 << 19) | (4 << 16),
2240 drawctxt->constant_load_commands[1].gpuaddr);
2241
2242 /* Modify constant restore conditionals */
2243 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2244 0, 0, drawctxt->cond_execs[2].gpuaddr);
2245
2246 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2247 0, 0, drawctxt->cond_execs[3].gpuaddr);
2248
2249 /* Save fragment constant shadow offset */
2250 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2251 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2252 drawctxt->constant_load_commands[2].gpuaddr);
2253#endif
2254
2255 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2256 discard all the shader data */
2257
2258 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2259 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2260
2261 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2262
2263 tmp_ctx.cmd = cmd;
2264}
2265
2266static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2267 struct adreno_context *drawctxt)
2268{
2269 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2270
2271 build_regrestore_cmds(adreno_dev, drawctxt);
2272 build_constantrestore_cmds(adreno_dev, drawctxt);
2273 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2274 build_regconstantsave_cmds(adreno_dev, drawctxt);
2275 build_shader_save_cmds(adreno_dev, drawctxt);
2276 build_shader_restore_cmds(adreno_dev, drawctxt);
2277 build_restore_fixup_cmds(adreno_dev, drawctxt);
2278 build_save_fixup_cmds(adreno_dev, drawctxt);
2279
2280 return 0;
2281}
2282
2283/* create buffers for saving/restoring registers, constants, & GMEM */
2284static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2285 struct adreno_context *drawctxt)
2286{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002287 int result;
2288
Jordan Crouse7501d452012-04-19 08:58:44 -06002289 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2290 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002291
Jordan Crousea7ec4212012-02-04 10:23:52 -07002292 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2293 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002294
Jordan Crousea7ec4212012-02-04 10:23:52 -07002295 if (result)
2296 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002297
2298 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2299 &tmp_ctx.cmd);
2300
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002301 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2302 &drawctxt->context_gmem_shadow);
2303 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2304 &drawctxt->context_gmem_shadow);
2305
2306 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2307 KGSL_CACHE_OP_FLUSH);
2308
Jordan Crousea7ec4212012-02-04 10:23:52 -07002309 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2310
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002311 return 0;
2312}
2313
2314static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2315 struct adreno_context *drawctxt)
2316{
2317 int ret;
2318
2319 /*
2320 * Allocate memory for the GPU state and the context commands.
2321 * Despite the name, this is much more then just storage for
2322 * the gpustate. This contains command space for gmem save
2323 * and texture and vertex buffer storage too
2324 */
2325
2326 ret = kgsl_allocate(&drawctxt->gpustate,
2327 drawctxt->pagetable, CONTEXT_SIZE);
2328
2329 if (ret)
2330 return ret;
2331
2332 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2333 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2334
2335 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2336 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2337 if (ret)
2338 goto done;
2339
2340 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2341 }
2342
2343 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2344 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2345
2346done:
2347 if (ret)
2348 kgsl_sharedmem_free(&drawctxt->gpustate);
2349
2350 return ret;
2351}
2352
2353static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2354 struct adreno_context *context)
2355{
2356 struct kgsl_device *device = &adreno_dev->dev;
2357
Shubhraprakash Dasfc2c9042012-08-15 04:11:55 -07002358 if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTOYED))
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002359 return;
2360
2361 if (context->flags & CTXT_FLAGS_GPU_HANG)
2362 KGSL_CTXT_WARN(device,
2363 "Current active context has caused gpu hang\n");
2364
2365 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2366 /* Fixup self modifying IBs for save operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002367 adreno_ringbuffer_issuecmds(device, context,
2368 KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002369
2370 /* save registers and constants. */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002371 adreno_ringbuffer_issuecmds(device, context,
2372 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002373 context->regconstant_save, 3);
2374
2375 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2376 /* Save shader instructions */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002377 adreno_ringbuffer_issuecmds(device, context,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002378 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2379
2380 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2381 }
2382 }
2383
2384 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2385 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2386 /*
2387 * Save GMEM (note: changes shader. shader must
2388 * already be saved.)
2389 */
2390
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002391 adreno_ringbuffer_issuecmds(device, context,
2392 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002393 context->context_gmem_shadow.
2394 gmem_save, 3);
2395 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2396 }
2397}
2398
2399static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2400 struct adreno_context *context)
2401{
2402 struct kgsl_device *device = &adreno_dev->dev;
2403 unsigned int cmds[5];
2404
2405 if (context == NULL) {
2406 /* No context - set the default pagetable and thats it */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002407 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
2408 adreno_dev->drawctxt_active->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002409 return;
2410 }
2411
2412 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2413
2414 cmds[0] = cp_nop_packet(1);
2415 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2416 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2417 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002418 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2419 cmds[4] = context->id;
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002420 adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
2421 cmds, 5);
2422 kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002423
2424 /*
2425 * Restore GMEM. (note: changes shader.
2426 * Shader must not already be restored.)
2427 */
2428
2429 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002430 adreno_ringbuffer_issuecmds(device, context,
2431 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002432 context->context_gmem_shadow.
2433 gmem_restore, 3);
2434 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2435 }
2436
2437 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002438 adreno_ringbuffer_issuecmds(device, context,
2439 KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002440
2441 /* Fixup self modifying IBs for restore operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002442 adreno_ringbuffer_issuecmds(device, context,
2443 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002444 context->restore_fixup, 3);
2445
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002446 adreno_ringbuffer_issuecmds(device, context,
2447 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002448 context->constant_restore, 3);
2449
2450 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002451 adreno_ringbuffer_issuecmds(device, context,
2452 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002453 context->shader_restore, 3);
2454
2455 /* Restore HLSQ_CONTROL_0 register */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002456 adreno_ringbuffer_issuecmds(device, context,
2457 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002458 context->hlsqcontrol_restore, 3);
2459 }
2460}
2461
2462static void a3xx_rb_init(struct adreno_device *adreno_dev,
2463 struct adreno_ringbuffer *rb)
2464{
2465 unsigned int *cmds, cmds_gpu;
Shubhraprakash Dasd316ff82012-08-02 12:43:48 -07002466 cmds = adreno_ringbuffer_allocspace(rb, NULL, 18);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002467 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2468
2469 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2470 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2471 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2472 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2473 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2474 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2475 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2476 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2477 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2478 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2479 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2480 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2481 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2482 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2483 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2484 /* Protected mode control - turned off for A3XX */
2485 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2486 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2487 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2488
2489 adreno_ringbuffer_submit(rb);
2490}
2491
2492static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2493{
2494 struct kgsl_device *device = &adreno_dev->dev;
2495 const char *err = "";
2496
2497 switch (bit) {
2498 case A3XX_INT_RBBM_AHB_ERROR: {
2499 unsigned int reg;
2500
2501 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2502
2503 /*
2504 * Return the word address of the erroring register so that it
2505 * matches the register specification
2506 */
2507
2508 KGSL_DRV_CRIT(device,
2509 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2510 reg & (1 << 28) ? "WRITE" : "READ",
2511 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2512 (reg >> 24) & 0x3);
2513
2514 /* Clear the error */
2515 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2516 return;
2517 }
2518 case A3XX_INT_RBBM_REG_TIMEOUT:
2519 err = "RBBM: AHB register timeout";
2520 break;
2521 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2522 err = "RBBM: ME master split timeout";
2523 break;
2524 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2525 err = "RBBM: PFP master split timeout";
2526 break;
2527 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2528 err = "RBBM: ATB bus oveflow";
2529 break;
2530 case A3XX_INT_VFD_ERROR:
2531 err = "VFD: Out of bounds access";
2532 break;
2533 case A3XX_INT_CP_T0_PACKET_IN_IB:
2534 err = "ringbuffer TO packet in IB interrupt";
2535 break;
2536 case A3XX_INT_CP_OPCODE_ERROR:
2537 err = "ringbuffer opcode error interrupt";
2538 break;
2539 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2540 err = "ringbuffer reserved bit error interrupt";
2541 break;
2542 case A3XX_INT_CP_HW_FAULT:
2543 err = "ringbuffer hardware fault";
2544 break;
2545 case A3XX_INT_CP_REG_PROTECT_FAULT:
2546 err = "ringbuffer protected mode error interrupt";
2547 break;
2548 case A3XX_INT_CP_AHB_ERROR_HALT:
2549 err = "ringbuffer AHB error interrupt";
2550 break;
2551 case A3XX_INT_MISC_HANG_DETECT:
2552 err = "MISC: GPU hang detected";
2553 break;
2554 case A3XX_INT_UCHE_OOB_ACCESS:
2555 err = "UCHE: Out of bounds access";
2556 break;
2557 }
2558
2559 KGSL_DRV_CRIT(device, "%s\n", err);
2560 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2561}
2562
2563static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2564{
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002565 struct kgsl_device *device = &adreno_dev->dev;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002566
2567 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002568 unsigned int context_id;
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002569 kgsl_sharedmem_readl(&device->memstore, &context_id,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002570 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2571 current_context));
2572 if (context_id < KGSL_MEMSTORE_MAX) {
Carter Cooperbedd2282012-11-05 11:53:54 -07002573 /* reset per context ts_cmp_enable */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002574 kgsl_sharedmem_writel(&device->memstore,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002575 KGSL_MEMSTORE_OFFSET(context_id,
2576 ts_cmp_enable), 0);
Carter Cooperbedd2282012-11-05 11:53:54 -07002577 /* Always reset global timestamp ts_cmp_enable */
2578 kgsl_sharedmem_writel(&device->memstore,
2579 KGSL_MEMSTORE_OFFSET(
2580 KGSL_MEMSTORE_GLOBAL,
2581 ts_cmp_enable), 0);
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002582 wmb();
2583 }
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002584 KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002585 }
2586
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002587 wake_up_interruptible_all(&device->wait_queue);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002588
2589 /* Schedule work to free mem and issue ibs */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002590 queue_work(device->work_queue, &device->ts_expired_ws);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002591
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002592 atomic_notifier_call_chain(&device->ts_notifier_list,
2593 device->id, NULL);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002594}
2595
2596#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2597
2598#define A3XX_INT_MASK \
2599 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002600 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002601 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2602 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2603 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2604 (1 << A3XX_INT_CP_HW_FAULT) | \
2605 (1 << A3XX_INT_CP_IB1_INT) | \
2606 (1 << A3XX_INT_CP_IB2_INT) | \
2607 (1 << A3XX_INT_CP_RB_INT) | \
2608 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2609 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002610 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2611
2612static struct {
2613 void (*func)(struct adreno_device *, int);
2614} a3xx_irq_funcs[] = {
2615 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2616 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2617 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2618 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2619 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2620 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2621 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2622 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2623 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2624 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2625 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2626 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2627 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2628 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2629 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2630 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2631 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2632 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2633 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2634 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2635 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2636 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2637 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2638 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002639 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002640 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2641 /* 26 to 31 - Unused */
2642};
2643
2644static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2645{
2646 struct kgsl_device *device = &adreno_dev->dev;
2647 irqreturn_t ret = IRQ_NONE;
2648 unsigned int status, tmp;
2649 int i;
2650
2651 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2652
2653 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2654 if (tmp & 1) {
2655 if (a3xx_irq_funcs[i].func != NULL) {
2656 a3xx_irq_funcs[i].func(adreno_dev, i);
2657 ret = IRQ_HANDLED;
2658 } else {
2659 KGSL_DRV_CRIT(device,
2660 "Unhandled interrupt bit %x\n", i);
2661 }
2662 }
2663
2664 tmp >>= 1;
2665 }
2666
Carter Cooperb769c912012-04-13 08:16:35 -06002667 trace_kgsl_a3xx_irq_status(device, status);
2668
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002669 if (status)
2670 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2671 status);
2672 return ret;
2673}
2674
2675static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2676{
2677 struct kgsl_device *device = &adreno_dev->dev;
2678
Wei Zou08a7e572012-06-03 22:05:46 -07002679 if (state)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002680 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
Wei Zou08a7e572012-06-03 22:05:46 -07002681 else
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002682 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2683}
2684
2685static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2686{
2687 struct kgsl_device *device = &adreno_dev->dev;
2688 unsigned int reg, val;
2689
2690 /* Freeze the counter */
2691 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2692 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2693 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2694
2695 /* Read the value */
2696 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2697
2698 /* Reset the counter */
2699 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2700 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2701
2702 /* Re-enable the counter */
2703 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2704 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2705 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2706
2707 return val;
2708}
2709
Carter Cooper86a5af32012-08-15 16:14:38 -06002710struct a3xx_vbif_data {
2711 unsigned int reg;
2712 unsigned int val;
2713};
2714
2715/* VBIF registers start after 0x3000 so use 0x0 as end of list marker */
2716static struct a3xx_vbif_data a305_vbif[] = {
2717 /* Set up 16 deep read/write request queues */
2718 { A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
2719 { A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
2720 { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
2721 { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
2722 { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
2723 { A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
2724 { A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
2725 /* Enable WR-REQ */
2726 { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
2727 /* Set up round robin arbitration between both AXI ports */
2728 { A3XX_VBIF_ARB_CTL, 0x00000030 },
2729 /* Set up AOOO */
2730 { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
2731 { A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
2732 {0, 0},
2733};
2734
2735static struct a3xx_vbif_data a320_vbif[] = {
2736 /* Set up 16 deep read/write request queues */
2737 { A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
2738 { A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
2739 { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
2740 { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
2741 { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
2742 { A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
2743 { A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
2744 /* Enable WR-REQ */
2745 { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
2746 /* Set up round robin arbitration between both AXI ports */
2747 { A3XX_VBIF_ARB_CTL, 0x00000030 },
2748 /* Set up AOOO */
2749 { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
2750 { A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
2751 /* Enable 1K sort */
2752 { A3XX_VBIF_ABIT_SORT, 0x000000FF },
2753 { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
2754 {0, 0},
2755};
2756
2757static struct a3xx_vbif_data a330_vbif[] = {
2758 /* Set up 16 deep read/write request queues */
2759 { A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
Jordan Crouse9efae8f2012-11-08 13:52:48 -07002760 { A3XX_VBIF_IN_RD_LIM_CONF1, 0x00001818 },
2761 { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00001818 },
2762 { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00001818 },
Carter Cooper86a5af32012-08-15 16:14:38 -06002763 { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
2764 { A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
Jordan Crouse9efae8f2012-11-08 13:52:48 -07002765 { A3XX_VBIF_IN_WR_LIM_CONF1, 0x00001818 },
Carter Cooper86a5af32012-08-15 16:14:38 -06002766 /* Enable WR-REQ */
2767 { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
2768 /* Set up round robin arbitration between both AXI ports */
2769 { A3XX_VBIF_ARB_CTL, 0x00000030 },
2770 /* Set up VBIF_ROUND_ROBIN_QOS_ARB */
2771 { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001 },
2772 /* Set up AOOO */
Jordan Crouse9efae8f2012-11-08 13:52:48 -07002773 { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003F },
2774 { A3XX_VBIF_OUT_AXI_AOOO, 0x003F003F },
Carter Cooper86a5af32012-08-15 16:14:38 -06002775 /* Enable 1K sort */
Jordan Crouse9efae8f2012-11-08 13:52:48 -07002776 { A3XX_VBIF_ABIT_SORT, 0x0001003F },
Carter Cooper86a5af32012-08-15 16:14:38 -06002777 { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
2778 /* Disable VBIF clock gating. This is to enable AXI running
2779 * higher frequency than GPU.
2780 */
2781 { A3XX_VBIF_CLKON, 1 },
2782 {0, 0},
2783};
2784
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002785static void a3xx_start(struct adreno_device *adreno_dev)
2786{
2787 struct kgsl_device *device = &adreno_dev->dev;
Carter Cooper86a5af32012-08-15 16:14:38 -06002788 struct a3xx_vbif_data *vbif = NULL;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002789
Carter Cooper86a5af32012-08-15 16:14:38 -06002790 if (adreno_is_a305(adreno_dev))
2791 vbif = a305_vbif;
2792 else if (adreno_is_a320(adreno_dev))
2793 vbif = a320_vbif;
2794 else if (adreno_is_a330(adreno_dev))
2795 vbif = a330_vbif;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002796
Carter Cooper86a5af32012-08-15 16:14:38 -06002797 BUG_ON(vbif == NULL);
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002798
Carter Cooper86a5af32012-08-15 16:14:38 -06002799 while (vbif->reg != 0) {
2800 adreno_regwrite(device, vbif->reg, vbif->val);
2801 vbif++;
liu zhongfd42e622012-05-01 19:18:30 -07002802 }
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002803
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002804 /* Make all blocks contribute to the GPU BUSY perf counter */
2805 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2806
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002807 /* Tune the hystersis counters for SP and CP idle detection */
2808 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2809 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2810
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002811 /* Enable the RBBM error reporting bits. This lets us get
2812 useful information on failure */
2813
2814 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2815
2816 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002817 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002818
2819 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002820 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002821
2822 /* Turn on hang detection - this spews a lot of useful information
2823 * into the RBBM registers on a hang */
2824
2825 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2826 (1 << 16) | 0xFFF);
2827
Kevin Matlage17fbff72012-08-29 16:50:45 -06002828 /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */
2829 adreno_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
2830
Rajeev Kulkarni7f177962012-06-22 12:09:44 -07002831 /* Enable Clock gating */
2832 adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
2833 A3XX_RBBM_CLOCK_CTL_DEFAULT);
2834
liu zhong5af32d92012-08-29 14:36:36 -06002835 /* Set the OCMEM base address for A330 */
Jordan Crousec0978202012-08-29 14:35:51 -06002836 if (adreno_is_a330(adreno_dev)) {
liu zhong5af32d92012-08-29 14:36:36 -06002837 adreno_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
2838 (unsigned int)(adreno_dev->ocmem_base >> 14));
2839 }
Jordan Crouseb5c80482012-10-03 09:38:41 -06002840
2841 /* Turn on performance counters */
2842 adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
2843
2844 /*
2845 * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS
2846 * we will use this to augment our hang detection
2847 */
2848
2849 adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
2850 SP_FS_FULL_ALU_INSTRUCTIONS);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002851}
2852
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002853/* Defined in adreno_a3xx_snapshot.c */
2854void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2855 int *remain, int hang);
2856
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002857struct adreno_gpudev adreno_a3xx_gpudev = {
2858 .reg_rbbm_status = A3XX_RBBM_STATUS,
2859 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2860 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2861
2862 .ctxt_create = a3xx_drawctxt_create,
2863 .ctxt_save = a3xx_drawctxt_save,
2864 .ctxt_restore = a3xx_drawctxt_restore,
Shubhraprakash Das4624b552012-06-01 14:08:03 -06002865 .ctxt_draw_workaround = NULL,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002866 .rb_init = a3xx_rb_init,
2867 .irq_control = a3xx_irq_control,
2868 .irq_handler = a3xx_irq_handler,
2869 .busy_cycles = a3xx_busy_cycles,
2870 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002871 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002872};