blob: cbc7bed46aaa3baa49da78faaaf1509a000db09e [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
22/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
23 * functions.
24 */
25
26#define _SET(_shift, _val) ((_val) << (_shift))
27
28/*
29 ****************************************************************************
30 *
31 * Context state shadow structure:
32 *
33 * +---------------------+------------+-------------+---------------------+---+
34 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
35 * +---------------------+------------+-------------+---------------------+---+
36 *
37 * 8K - ALU Constant Shadow (8K aligned)
38 * 4K - H/W Register Shadow (8K aligned)
39 * 5K - Command and Vertex Buffers
40 * 8K - Shader Instruction Shadow
41 * ~6K - Texture Constant Shadow
42 *
43 *
44 ***************************************************************************
45 */
46
47/* Sizes of all sections in state shadow memory */
48#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
49#define REG_SHADOW_SIZE (4*1024) /* 4KB */
50#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
51#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
52#define TEX_SIZE_MIPMAP 1936 /* bytes */
53#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
54#define TEX_SHADOW_SIZE \
55 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
56 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
57#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
58
59/* Total context size, excluding GMEM shadow */
60#define CONTEXT_SIZE \
61 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
62 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
63 TEX_SHADOW_SIZE)
64
65/* Offsets to different sections in context shadow memory */
66#define REG_OFFSET ALU_SHADOW_SIZE
67#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
68#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
69#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
70#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
71#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
72#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
73#define FS_TEX_OFFSET_MEM_OBJECTS \
74 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
75#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
76#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
77
78/* The offset for fragment shader data in HLSQ context */
79#define SSIZE (16*1024)
80
81#define HLSQ_SAMPLER_OFFSET 0x000
82#define HLSQ_MEMOBJ_OFFSET 0x400
83#define HLSQ_MIPMAP_OFFSET 0x800
84
85#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM
86/* Use shadow RAM */
87#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
88#else
89/* Use working RAM */
90#define HLSQ_SHADOW_BASE 0x10000
91#endif
92
93#define REG_TO_MEM_LOOP_COUNT_SHIFT 15
94
95#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
96 vis_cull_mode) \
97 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
98 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
99 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
100 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
101 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
102 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
103
104/*
105 * List of context registers (starting from dword offset 0x2000).
106 * Each line contains start and end of a range of registers.
107 */
108static const unsigned int context_register_ranges[] = {
109 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
110 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
111 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
112 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
113 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
114 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
115 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
116 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
117 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
118 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
119 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
120 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
121 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
122 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
123 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
124 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
125 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
126 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
127 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
128 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
129 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
130 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
131 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
132 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
133 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
134 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
135 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
136 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
137 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
138 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
139 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
140};
141
142/* Global registers that need to be saved separately */
143static const unsigned int global_registers[] = {
144 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
145 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
146 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
147 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
148 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
149 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
150 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
151 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
152 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
153 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
154 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
155 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
156 A3XX_VSC_BIN_SIZE,
157 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
158 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
159 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
160 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
161 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
162 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
163 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
164 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
165 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
166 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
167 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
168 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
169 A3XX_VSC_SIZE_ADDRESS
170};
171
172#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
173
174/* A scratchpad used to build commands during context create */
175static struct tmp_ctx {
176 unsigned int *cmd; /* Next available dword in C&V buffer */
177
178 /* Addresses in comamnd buffer where registers are saved */
179 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
180 uint32_t gmem_base; /* Base GPU address of GMEM */
181} tmp_ctx;
182
183#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
184/*
185 * Function for executing dest = ( (reg & and) ROL rol ) | or
186 */
187static unsigned int *rmw_regtomem(unsigned int *cmd,
188 unsigned int reg, unsigned int and,
189 unsigned int rol, unsigned int or,
190 unsigned int dest)
191{
192 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
193 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
194 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
195 *cmd++ = 0x00000000; /* AND value */
196 *cmd++ = reg; /* OR address */
197
198 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
199 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
200 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
201 *cmd++ = and; /* AND value */
202 *cmd++ = or; /* OR value */
203
204 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
205 *cmd++ = A3XX_CP_SCRATCH_REG2;
206 *cmd++ = dest;
207
208 return cmd;
209}
210#endif
211
212static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
213 struct adreno_context *drawctxt)
214{
215 unsigned int *cmd = tmp_ctx.cmd;
216 unsigned int *start = cmd;
217 unsigned int i;
218
219 drawctxt->constant_save_commands[0].hostptr = cmd;
220 drawctxt->constant_save_commands[0].gpuaddr =
221 virt2gpu(cmd, &drawctxt->gpustate);
222 cmd++;
223
224 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
225 *cmd++ = 0;
226
227#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
228 /*
229 * Context registers are already shadowed; just need to
230 * disable shadowing to prevent corruption.
231 */
232
233 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
234 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
235 *cmd++ = 4 << 16; /* regs, start=0 */
236 *cmd++ = 0x0; /* count = 0 */
237
238#else
239 /*
240 * Make sure the HW context has the correct register values before
241 * reading them.
242 */
243
244 /* Write context registers into shadow */
245 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
246 unsigned int start = context_register_ranges[i * 2];
247 unsigned int end = context_register_ranges[i * 2 + 1];
248 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
249 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
250 start;
251 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
252 & 0xFFFFE000) + (start - 0x2000) * 4;
253 }
254#endif
255
256 /* Need to handle some of the global registers separately */
257 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
258 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
259 *cmd++ = global_registers[i];
260 *cmd++ = tmp_ctx.reg_values[i];
261 }
262
263 /* Save vertex shader constants */
264 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
265 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
266 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
267 *cmd++ = 0x0000FFFF;
268 *cmd++ = 3; /* EXEC_COUNT */
269 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
270 drawctxt->constant_save_commands[1].hostptr = cmd;
271 drawctxt->constant_save_commands[1].gpuaddr =
272 virt2gpu(cmd, &drawctxt->gpustate);
273 /*
274 From fixup:
275
276 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
277 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
278
279 From register spec:
280 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
281 */
282 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
283 /* ALU constant shadow base */
284 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
285
286 /* Save fragment shader constants */
287 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
288 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
289 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
290 *cmd++ = 0x0000FFFF;
291 *cmd++ = 3; /* EXEC_COUNT */
292 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
293 drawctxt->constant_save_commands[2].hostptr = cmd;
294 drawctxt->constant_save_commands[2].gpuaddr =
295 virt2gpu(cmd, &drawctxt->gpustate);
296 /*
297 From fixup:
298
299 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
300 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
301
302 From register spec:
303 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
304 */
305 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
306
307 /*
308 From fixup:
309
310 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
311 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
312
313 From register spec:
314 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
315 start offset in on chip RAM,
316 128bit aligned
317
318 dst = base + offset
319 Because of the base alignment we can use
320 dst = base | offset
321 */
322 *cmd++ = 0; /* dst */
323
324 /* Save VS texture memory objects */
325 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
326 *cmd++ =
327 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
328 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
329 *cmd++ =
330 (drawctxt->gpustate.gpuaddr +
331 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
332
333 /* Save VS texture mipmap pointers */
334 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
335 *cmd++ =
336 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
337 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
338 *cmd++ =
339 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
340
341 /* Save VS texture sampler objects */
342 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
343 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
344 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
345 *cmd++ =
346 (drawctxt->gpustate.gpuaddr +
347 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
348
349 /* Save FS texture memory objects */
350 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
351 *cmd++ =
352 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
353 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
354 *cmd++ =
355 (drawctxt->gpustate.gpuaddr +
356 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
357
358 /* Save FS texture mipmap pointers */
359 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
360 *cmd++ =
361 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
362 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
363 *cmd++ =
364 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
365
366 /* Save FS texture sampler objects */
367 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
368 *cmd++ =
369 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
370 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
371 *cmd++ =
372 (drawctxt->gpustate.gpuaddr +
373 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
374
375 /* Create indirect buffer command for above command sequence */
376 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
377
378 tmp_ctx.cmd = cmd;
379}
380
381/* Copy GMEM contents to system memory shadow. */
382static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
383 struct adreno_context *drawctxt,
384 struct gmem_shadow_t *shadow)
385{
386 unsigned int *cmds = tmp_ctx.cmd;
387 unsigned int *start = cmds;
388
389 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
390 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
391
392 /* RB_MODE_CONTROL */
393 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
394 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
395 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
396 /* RB_RENDER_CONTROL */
397 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
398 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
399
400 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
401 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
402 /* RB_COPY_CONTROL */
403 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
404 RB_CLEAR_MODE_RESOLVE) |
405 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
406 tmp_ctx.gmem_base >> 14);
407 /* RB_COPY_DEST_BASE */
408 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
409 shadow->gmemshadow.gpuaddr >> 5);
410 /* RB_COPY_DEST_PITCH */
411 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
412 (shadow->pitch * 4) / 32);
413 /* RB_COPY_DEST_INFO */
414 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
415 RB_TILINGMODE_LINEAR) |
416 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
417 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
418 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
419
420 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
421 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
422 /* GRAS_SC_CONTROL */
423 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
424
425 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
426 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
427 /* VFD_CONTROL_0 */
428 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
429 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
430 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
431 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
432 /* VFD_CONTROL_1 */
433 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
434 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
435 _SET(VFD_CTRLREG1_REGID4INST, 252);
436
437 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
438 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
439 /* VFD_FETCH_INSTR_0_0 */
440 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
441 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
442 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
443 /* VFD_FETCH_INSTR_1_0 */
444 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
445 shadow->quad_vertices.gpuaddr);
446
447 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
448 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
449 /* VFD_DECODE_INSTR_0 */
450 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
451 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
452 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
453 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
454 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
455 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
456
457 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
458 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
459 /* HLSQ_CONTROL_0_REG */
460 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
461 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
462 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
463 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
464 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
465 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
466 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
467 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
468 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
469 /* HLSQ_CONTROL_1_REG */
470 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
471 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
472 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
473 /* HLSQ_CONTROL_2_REG */
474 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
475 /* HLSQ_CONTROL_3_REG */
476 *cmds++ = 0x00000000;
477
478 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
479 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
480 /* HLSQ_VS_CONTROL_REG */
481 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
482 /* HLSQ_FS_CONTROL_REG */
483 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
484 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
485 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
486 /* HLSQ_CONST_VSPRESV_RANGE_REG */
487 *cmds++ = 0x00000000;
488 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
489 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
490 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
491
492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
493 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
494 /* SP_FS_LENGTH_REG */
495 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
496
497 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
498 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
499 /* SP_SP_CTRL_REG */
500 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
501 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
502
503 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
504 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
505 /* SP_VS_CTRL_REG0 */
506 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
507 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
508 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
509 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
510 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
511 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
512 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
513 /* SP_VS_CTRL_REG1 */
514 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
515 /* SP_VS_PARAM_REG */
516 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
517 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
518 /* SP_VS_OUT_REG_0 */
519 *cmds++ = 0x00000000;
520 /* SP_VS_OUT_REG_1 */
521 *cmds++ = 0x00000000;
522 /* SP_VS_OUT_REG_2 */
523 *cmds++ = 0x00000000;
524 /* SP_VS_OUT_REG_3 */
525 *cmds++ = 0x00000000;
526 /* SP_VS_OUT_REG_4 */
527 *cmds++ = 0x00000000;
528 /* SP_VS_OUT_REG_5 */
529 *cmds++ = 0x00000000;
530 /* SP_VS_OUT_REG_6 */
531 *cmds++ = 0x00000000;
532 /* SP_VS_OUT_REG_7 */
533 *cmds++ = 0x00000000;
534
535 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
536 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
537 /* SP_VS_VPC_DST_REG_0 */
538 *cmds++ = 0x00000000;
539 /* SP_VS_VPC_DST_REG_1 */
540 *cmds++ = 0x00000000;
541 /* SP_VS_VPC_DST_REG_2 */
542 *cmds++ = 0x00000000;
543 /* SP_VS_VPC_DST_REG_3 */
544 *cmds++ = 0x00000000;
545 /* SP_VS_OBJ_OFFSET_REG */
546 *cmds++ = 0x00000000;
547 /* SP_VS_OBJ_START_REG */
548 *cmds++ = 0x00000000;
549
550 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
551 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
552 /* SP_VS_LENGTH_REG */
553 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
554 /* SP_FS_CTRL_REG0 */
555 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
556 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
557 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
558 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
559 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
560 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
561 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
562 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
563 /* SP_FS_CTRL_REG1 */
564 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
565 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
566 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
567 /* SP_FS_OBJ_OFFSET_REG */
568 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
569 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
570 /* SP_FS_OBJ_START_REG */
571 *cmds++ = 0x00000000;
572
573 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
574 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
575 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
576 *cmds++ = 0x00000000;
577 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
578 *cmds++ = 0x00000000;
579
580 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
581 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
582 /* SP_FS_OUTPUT_REG */
583 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
584
585 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
586 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
587 /* SP_FS_MRT_REG_0 */
588 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
589 /* SP_FS_MRT_REG_1 */
590 *cmds++ = 0x00000000;
591 /* SP_FS_MRT_REG_2 */
592 *cmds++ = 0x00000000;
593 /* SP_FS_MRT_REG_3 */
594 *cmds++ = 0x00000000;
595
596 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
597 *cmds++ = CP_REG(A3XX_VPC_ATTR);
598 /* VPC_ATTR */
599 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
600 _SET(VPC_VPCATTR_LMSIZE, 1);
601 /* VPC_PACK */
602 *cmds++ = 0x00000000;
603 /* VPC_VARRYING_INTERUPT_MODE_0 */
604 *cmds++ = 0x00000000;
605 /* VPC_VARRYING_INTERUPT_MODE_1 */
606 *cmds++ = 0x00000000;
607 /* VPC_VARRYING_INTERUPT_MODE_2 */
608 *cmds++ = 0x00000000;
609 /* VPC_VARRYING_INTERUPT_MODE_3 */
610 *cmds++ = 0x00000000;
611 /* VPC_VARYING_PS_REPL_MODE_0 */
612 *cmds++ = 0x00000000;
613 /* VPC_VARYING_PS_REPL_MODE_1 */
614 *cmds++ = 0x00000000;
615 /* VPC_VARYING_PS_REPL_MODE_2 */
616 *cmds++ = 0x00000000;
617 /* VPC_VARYING_PS_REPL_MODE_3 */
618 *cmds++ = 0x00000000;
619
620 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
621 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
622 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
623 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
624 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
625 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
626 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
627
628 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
629 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
630 /* end; */
631 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
632 /* nop; */
633 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
634 /* nop; */
635 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
636
637 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
638 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
639 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
640 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
641 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
642 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
643 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
644
645 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
646 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
647 /* end; */
648 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
649 /* nop; */
650 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
651 /* nop; */
652 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
653
654 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
655 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
656 /* RB_MSAA_CONTROL */
657 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
658 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
659
660 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
661 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
662 /* RB_DEPTH_CONTROL */
663 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
664
665 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
666 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
667 /* RB_MRT_CONTROL0 */
668 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
669 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
670 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
671 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
672
673 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
674 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
675 /* RB_MRT_BLEND_CONTROL0 */
676 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
677 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
678 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
679 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
680 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
681 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
682 /* RB_MRT_CONTROL1 */
683 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
684 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
685 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
686
687 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
688 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
689 /* RB_MRT_BLEND_CONTROL1 */
690 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
691 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
692 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
693 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
694 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
695 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
696 /* RB_MRT_CONTROL2 */
697 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
698 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
699 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
700
701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
702 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
703 /* RB_MRT_BLEND_CONTROL2 */
704 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
705 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
706 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
707 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
708 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
709 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
710 /* RB_MRT_CONTROL3 */
711 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
712 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
713 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
714
715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
716 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
717 /* RB_MRT_BLEND_CONTROL3 */
718 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
719 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
720 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
721 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
723 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
724
725 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
726 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
727 /* VFD_INDEX_MIN */
728 *cmds++ = 0x00000000;
729 /* VFD_INDEX_MAX */
730 *cmds++ = 0xFFFFFFFF;
731 /* VFD_INSTANCEID_OFFSET */
732 *cmds++ = 0x00000000;
733 /* VFD_INDEX_OFFSET */
734 *cmds++ = 0x00000000;
735
736 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
737 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
738 /* VFD_VS_THREADING_THRESHOLD */
739 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
740 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
741
742 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
743 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
744 /* TPL1_TP_VS_TEX_OFFSET */
745 *cmds++ = 0;
746
747 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
748 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
749 /* TPL1_TP_FS_TEX_OFFSET */
750 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
751 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
752 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
753
754 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
755 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
756 /* PC_PRIM_VTX_CNTL */
757 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
758 PC_DRAW_TRIANGLES) |
759 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
760 PC_DRAW_TRIANGLES) |
761 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
762
763 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
764 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
765 /* GRAS_SC_WINDOW_SCISSOR_TL */
766 *cmds++ = 0x00000000;
767 /* GRAS_SC_WINDOW_SCISSOR_BR */
768 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
769 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
770
771 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
772 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
773 /* GRAS_SC_SCREEN_SCISSOR_TL */
774 *cmds++ = 0x00000000;
775 /* GRAS_SC_SCREEN_SCISSOR_BR */
776 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
777 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
778
779 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
780 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
781 /* GRAS_CL_VPORT_XOFFSET */
782 *cmds++ = 0x00000000;
783 /* GRAS_CL_VPORT_XSCALE */
784 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
785 /* GRAS_CL_VPORT_YOFFSET */
786 *cmds++ = 0x00000000;
787 /* GRAS_CL_VPORT_YSCALE */
788 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
789
790 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
791 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
792 /* GRAS_CL_VPORT_ZOFFSET */
793 *cmds++ = 0x00000000;
794 /* GRAS_CL_VPORT_ZSCALE */
795 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
796
797 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
798 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
799 /* GRAS_CL_CLIP_CNTL */
800 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
801 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
802 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
803 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
804 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
805
806 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
807 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
808 /* GRAS_CL_GB_CLIP_ADJ */
809 *cmds++ = 0x00000000;
810
811 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
812 *cmds++ = 0x00000000;
813
814 /*
815 * Resolve using two draw calls with a dummy register
816 * write in between. This is a HLM workaround
817 * that should be removed later.
818 */
819 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
820 *cmds++ = 0x00000000; /* Viz query info */
821 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
822 PC_DI_SRC_SEL_IMMEDIATE,
823 PC_DI_INDEX_SIZE_32_BIT,
824 PC_DI_IGNORE_VISIBILITY);
825 *cmds++ = 0x00000003; /* Num indices */
826 *cmds++ = 0x00000000; /* Index 0 */
827 *cmds++ = 0x00000001; /* Index 1 */
828 *cmds++ = 0x00000002; /* Index 2 */
829
830 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
831 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
832 *cmds++ = 0x00000000;
833
834 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
835 *cmds++ = 0x00000000; /* Viz query info */
836 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
837 PC_DI_SRC_SEL_IMMEDIATE,
838 PC_DI_INDEX_SIZE_32_BIT,
839 PC_DI_IGNORE_VISIBILITY);
840 *cmds++ = 0x00000003; /* Num indices */
841 *cmds++ = 0x00000002; /* Index 0 */
842 *cmds++ = 0x00000001; /* Index 1 */
843 *cmds++ = 0x00000003; /* Index 2 */
844
845 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
846 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
847 *cmds++ = 0x00000000;
848
849 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
850 *cmds++ = 0x00000000;
851
852 /* Create indirect buffer command for above command sequence */
853 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
854
855 return cmds;
856}
857
858static void build_shader_save_cmds(struct adreno_device *adreno_dev,
859 struct adreno_context *drawctxt)
860{
861 unsigned int *cmd = tmp_ctx.cmd;
862 unsigned int *start;
863
864 /* Reserve space for boolean values used for COND_EXEC packet */
865 drawctxt->cond_execs[0].hostptr = cmd;
866 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
867 *cmd++ = 0;
868 drawctxt->cond_execs[1].hostptr = cmd;
869 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
870 *cmd++ = 0;
871
872 drawctxt->shader_save_commands[0].hostptr = cmd;
873 drawctxt->shader_save_commands[0].gpuaddr =
874 virt2gpu(cmd, &drawctxt->gpustate);
875 *cmd++ = 0;
876 drawctxt->shader_save_commands[1].hostptr = cmd;
877 drawctxt->shader_save_commands[1].gpuaddr =
878 virt2gpu(cmd, &drawctxt->gpustate);
879 *cmd++ = 0;
880
881 start = cmd;
882
883 /* Save vertex shader */
884
885 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
886 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
887 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
888 *cmd++ = 0x0000FFFF;
889 *cmd++ = 3; /* EXEC_COUNT */
890
891 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
892 drawctxt->shader_save_commands[2].hostptr = cmd;
893 drawctxt->shader_save_commands[2].gpuaddr =
894 virt2gpu(cmd, &drawctxt->gpustate);
895 /*
896 From fixup:
897
898 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
899
900 From regspec:
901 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
902 If bit31 is 1, it means overflow
903 or any long shader.
904
905 src = (HLSQ_SHADOW_BASE + 0x1000)/4
906 */
907 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
908 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
909
910 /* Save fragment shader */
911 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
912 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
913 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
914 *cmd++ = 0x0000FFFF;
915 *cmd++ = 3; /* EXEC_COUNT */
916
917 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
918 drawctxt->shader_save_commands[3].hostptr = cmd;
919 drawctxt->shader_save_commands[3].gpuaddr =
920 virt2gpu(cmd, &drawctxt->gpustate);
921 /*
922 From fixup:
923
924 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
925
926 From regspec:
927 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
928 If bit31 is 1, it means overflow
929 or any long shader.
930
931 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
932 From regspec:
933
934 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
935 First instruction of the whole shader will be stored from
936 the offset in instruction cache, unit = 256bits, a cache line.
937 It can start from 0 if no VS available.
938
939 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
940 */
941 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
942 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
943 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
944
945 /* Create indirect buffer command for above command sequence */
946 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
947
948 tmp_ctx.cmd = cmd;
949}
950
951/*
952 * Make an IB to modify context save IBs with the correct shader instruction
953 * and constant sizes and offsets.
954 */
955
956static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
957 struct adreno_context *drawctxt)
958{
959 unsigned int *cmd = tmp_ctx.cmd;
960 unsigned int *start = cmd;
961
962 /* Flush HLSQ lazy updates */
963 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
964 *cmd++ = 0x7; /* HLSQ_FLUSH */
965 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
966 *cmd++ = 0;
967
968 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
969 *cmd++ = 0x00000000; /* No start addr for full invalidate */
970 *cmd++ = (unsigned int)
971 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
972 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
973 0; /* No end addr for full invalidate */
974
975 /* Make sure registers are flushed */
976 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
977 *cmd++ = 0;
978
979#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
980
981 /* Save shader sizes */
982 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
983 *cmd++ = A3XX_SP_VS_CTRL_REG0;
984 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
985
986 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
987 *cmd++ = A3XX_SP_FS_CTRL_REG0;
988 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
989
990 /* Save shader offsets */
991 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
992 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
993 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
994
995 /* Save constant sizes */
996 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
997 *cmd++ = A3XX_SP_VS_CTRL_REG1;
998 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
999 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1000 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1001 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1002
1003 /* Save FS constant offset */
1004 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1005 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1006 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1007
1008
1009 /* Save VS instruction store mode */
1010 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1011 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1012 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1013
1014 /* Save FS instruction store mode */
1015 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1016 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1017 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1018#else
1019
1020 /* Shader save */
1021 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1022 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1023 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1024 drawctxt->shader_save_commands[2].gpuaddr);
1025
1026 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1027 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1028 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1029 *cmd++ = 0x00000000; /* AND value */
1030 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1031 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1032 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1033 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1034 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1035 A3XX_CP_SCRATCH_REG2;
1036 *cmd++ = 0x7f000000; /* AND value */
1037 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1038
1039 /*
1040 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1041 * SP_FS_OBJ_OFFSET_REG
1042 */
1043
1044 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1045 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1046 *cmd++ = 0x00000000; /* AND value */
1047 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1048 /*
1049 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1050 * 0x00000000
1051 */
1052 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1053 *cmd++ = A3XX_CP_SCRATCH_REG3;
1054 *cmd++ = 0xfe000000; /* AND value */
1055 *cmd++ = 0x00000000; /* OR value */
1056 /*
1057 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1058 */
1059 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1060 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1061 *cmd++ = 0xffffffff; /* AND value */
1062 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1063
1064 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1065 *cmd++ = A3XX_CP_SCRATCH_REG2;
1066 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1067
1068 /* Constant save */
1069 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1070 17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
1071 drawctxt->constant_save_commands[1].gpuaddr);
1072
1073 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1074 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
1075 drawctxt->constant_save_commands[2].gpuaddr);
1076
1077 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1078 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1079 drawctxt->constant_save_commands[2].gpuaddr
1080 + sizeof(unsigned int));
1081
1082 /* Modify constant save conditionals */
1083 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1084 0, 0, drawctxt->cond_execs[2].gpuaddr);
1085
1086 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1087 0, 0, drawctxt->cond_execs[3].gpuaddr);
1088
1089 /* Save VS instruction store mode */
1090
1091 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1092 31, 0, drawctxt->cond_execs[0].gpuaddr);
1093
1094 /* Save FS instruction store mode */
1095 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1096 31, 0, drawctxt->cond_execs[1].gpuaddr);
1097
1098#endif
1099
1100 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1101
1102 tmp_ctx.cmd = cmd;
1103}
1104
1105/****************************************************************************/
1106/* Functions to build context restore IBs */
1107/****************************************************************************/
1108
1109static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1110 struct adreno_context *drawctxt,
1111 struct gmem_shadow_t *shadow)
1112{
1113 unsigned int *cmds = tmp_ctx.cmd;
1114 unsigned int *start = cmds;
1115
1116 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1117 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1118 /* HLSQ_CONTROL_0_REG */
1119 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1120 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1121 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1122 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1123 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1124 /* HLSQ_CONTROL_1_REG */
1125 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1126 /* HLSQ_CONTROL_2_REG */
1127 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1128 /* HLSQ_CONTROL3_REG */
1129 *cmds++ = 0x00000000;
1130
1131 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1132 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1133 /* RB_MRT_BUF_INFO0 */
1134 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1135 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1136 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1137 (shadow->gmem_pitch * 4 * 8) / 256);
1138 /* RB_MRT_BUF_BASE0 */
1139 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1140
1141 /* Texture samplers */
1142 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1143 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1144 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1145 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1146 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1147 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1148 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1149 *cmds++ = 0x00000240;
1150 *cmds++ = 0x00000000;
1151
1152 /* Texture memobjs */
1153 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1154 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1155 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1156 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1157 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1158 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1159 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1160 *cmds++ = 0x4cc06880;
1161 *cmds++ = shadow->height | (shadow->width << 14);
1162 *cmds++ = (shadow->pitch*4*8) << 9;
1163 *cmds++ = 0x00000000;
1164
1165 /* Mipmap bases */
1166 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1167 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1168 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1169 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1170 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1171 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1172 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1173 *cmds++ = shadow->gmemshadow.gpuaddr;
1174 *cmds++ = 0x00000000;
1175 *cmds++ = 0x00000000;
1176 *cmds++ = 0x00000000;
1177 *cmds++ = 0x00000000;
1178 *cmds++ = 0x00000000;
1179 *cmds++ = 0x00000000;
1180 *cmds++ = 0x00000000;
1181 *cmds++ = 0x00000000;
1182 *cmds++ = 0x00000000;
1183 *cmds++ = 0x00000000;
1184 *cmds++ = 0x00000000;
1185 *cmds++ = 0x00000000;
1186 *cmds++ = 0x00000000;
1187
1188 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1189 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1190 /* HLSQ_VS_CONTROL_REG */
1191 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1192 /* HLSQ_FS_CONTROL_REG */
1193 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1194 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1195 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1196 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1197 *cmds++ = 0x00000000;
1198 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1199 *cmds++ = 0x00000000;
1200
1201 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1202 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1203 /* SP_FS_LENGTH_REG */
1204 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1205
1206 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1207 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1208 /* SP_VS_CTRL_REG0 */
1209 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1210 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1211 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1212 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1213 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1214 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1215 /* SP_VS_CTRL_REG1 */
1216 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1217 /* SP_VS_PARAM_REG */
1218 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1219 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1220 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1221 /* SP_VS_OUT_REG0 */
1222 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1223 /* SP_VS_OUT_REG1 */
1224 *cmds++ = 0x00000000;
1225 /* SP_VS_OUT_REG2 */
1226 *cmds++ = 0x00000000;
1227 /* SP_VS_OUT_REG3 */
1228 *cmds++ = 0x00000000;
1229 /* SP_VS_OUT_REG4 */
1230 *cmds++ = 0x00000000;
1231 /* SP_VS_OUT_REG5 */
1232 *cmds++ = 0x00000000;
1233 /* SP_VS_OUT_REG6 */
1234 *cmds++ = 0x00000000;
1235 /* SP_VS_OUT_REG7 */
1236 *cmds++ = 0x00000000;
1237
1238 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1239 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1240 /* SP_VS_VPC_DST_REG0 */
1241 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1242 /* SP_VS_VPC_DST_REG1 */
1243 *cmds++ = 0x00000000;
1244 /* SP_VS_VPC_DST_REG2 */
1245 *cmds++ = 0x00000000;
1246 /* SP_VS_VPC_DST_REG3 */
1247 *cmds++ = 0x00000000;
1248 /* SP_VS_OBJ_OFFSET_REG */
1249 *cmds++ = 0x00000000;
1250 /* SP_VS_OBJ_START_REG */
1251 *cmds++ = 0x00000000;
1252
1253 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1254 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1255 /* SP_VS_LENGTH_REG */
1256 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1257 /* SP_FS_CTRL_REG0 */
1258 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1259 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1260 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1261 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1262 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1263 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1264 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1265 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1266 /* SP_FS_CTRL_REG1 */
1267 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1268 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1269 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1270 /* SP_FS_OBJ_OFFSET_REG */
1271 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128);
1272 /* SP_FS_OBJ_START_REG */
1273 *cmds++ = 0x00000000;
1274
1275 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1276 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1277 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1278 *cmds++ = 0x00000000;
1279 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1280 *cmds++ = 0x00000000;
1281
1282 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1283 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1284 /* SP_FS_OUT_REG */
1285 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1286
1287 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1288 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1289 /* SP_FS_MRT_REG0 */
1290 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1291 /* SP_FS_MRT_REG1 */
1292 *cmds++ = 0;
1293 /* SP_FS_MRT_REG2 */
1294 *cmds++ = 0;
1295 /* SP_FS_MRT_REG3 */
1296 *cmds++ = 0;
1297
1298 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1299 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1300 /* VPC_ATTR */
1301 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1302 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1303 _SET(VPC_VPCATTR_LMSIZE, 1);
1304 /* VPC_PACK */
1305 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1306 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1307 /* VPC_VARYING_INTERP_MODE_0 */
1308 *cmds++ = 0x00000000;
1309 /* VPC_VARYING_INTERP_MODE1 */
1310 *cmds++ = 0x00000000;
1311 /* VPC_VARYING_INTERP_MODE2 */
1312 *cmds++ = 0x00000000;
1313 /* VPC_VARYING_IINTERP_MODE3 */
1314 *cmds++ = 0x00000000;
1315 /* VPC_VARRYING_PS_REPL_MODE_0 */
1316 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1317 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1318 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1319 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1320 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1321 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1322 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1323 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1324 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1325 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1326 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1327 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1328 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1329 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1330 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1331 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1332 /* VPC_VARRYING_PS_REPL_MODE_1 */
1333 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1334 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1335 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1336 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1337 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1338 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1339 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1340 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1341 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1342 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1343 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1344 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1345 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1346 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1347 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1348 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1349 /* VPC_VARRYING_PS_REPL_MODE_2 */
1350 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1351 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1352 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1353 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1354 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1355 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1356 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1357 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1358 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1359 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1360 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1361 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1362 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1363 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1366 /* VPC_VARRYING_PS_REPL_MODE_3 */
1367 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1383
1384 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1385 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1386 /* SP_SP_CTRL_REG */
1387 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1388
1389 /* Load vertex shader */
1390 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1391 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1392 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1393 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1394 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1395 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1396 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1397 /* (sy)end; */
1398 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1399 /* nop; */
1400 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1401 /* nop; */
1402 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1403 /* nop; */
1404 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1405
1406 /* Load fragment shader */
1407 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1408 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1409 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1410 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1411 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1412 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1413 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1414 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1415 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1416 /* (rpt5)nop; */
1417 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1418 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1419 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1420 /* (sy)mov.f32f32 r1.x, r0.x; */
1421 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1422 /* mov.f32f32 r1.y, r0.y; */
1423 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1424 /* mov.f32f32 r1.z, r0.z; */
1425 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1426 /* mov.f32f32 r1.w, r0.w; */
1427 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1428 /* end; */
1429 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1430
1431 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1432 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1433 /* VFD_CONTROL_0 */
1434 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1435 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1436 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1437 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1438 /* VFD_CONTROL_1 */
1439 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1440 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1441 _SET(VFD_CTRLREG1_REGID4INST, 252);
1442
1443 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1444 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1445 /* VFD_FETCH_INSTR_0_0 */
1446 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1447 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1448 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1449 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1450 /* VFD_FETCH_INSTR_1_0 */
1451 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1452 shadow->quad_vertices_restore.gpuaddr);
1453 /* VFD_FETCH_INSTR_0_1 */
1454 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1455 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1456 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1457 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1458 /* VFD_FETCH_INSTR_1_1 */
1459 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1460 shadow->quad_vertices_restore.gpuaddr + 16);
1461
1462 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1463 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1464 /* VFD_DECODE_INSTR_0 */
1465 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1466 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1467 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1468 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1469 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1470 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1471 /* VFD_DECODE_INSTR_1 */
1472 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1473 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1474 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1475 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1476 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1477 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1478
1479 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1480 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1481 /* RB_DEPTH_CONTROL */
1482 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1483
1484 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1485 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1486 /* RB_STENCIL_CONTROL */
1487 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1488 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1489 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1490 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1491 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1492 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1493 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1494 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1495
1496 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1497 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1498 /* RB_MODE_CONTROL */
1499 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1500 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1501
1502 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1503 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1504 /* RB_RENDER_CONTROL */
1505 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1506 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1507
1508 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1509 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1510 /* RB_MSAA_CONTROL */
1511 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1512 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1513
1514 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1515 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1516 /* RB_MRT_CONTROL0 */
1517 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1518 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1519 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1520 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1521
1522 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1523 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1524 /* RB_MRT_BLENDCONTROL0 */
1525 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1526 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1527 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1528 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1529 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1530 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1531 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1532 /* RB_MRT_CONTROL1 */
1533 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1534 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1535 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1536
1537 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1538 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1539 /* RB_MRT_BLENDCONTROL1 */
1540 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1541 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1542 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1543 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1544 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1545 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1546 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1547 /* RB_MRT_CONTROL2 */
1548 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1549 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1550 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1551
1552 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1553 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1554 /* RB_MRT_BLENDCONTROL2 */
1555 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1556 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1557 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1558 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1559 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1560 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1561 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1562 /* RB_MRT_CONTROL3 */
1563 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1564 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1565 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1566
1567 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1568 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1569 /* RB_MRT_BLENDCONTROL3 */
1570 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1571 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1572 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1573 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1574 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1576 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1577
1578 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1579 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1580 /* VFD_INDEX_MIN */
1581 *cmds++ = 0x00000000;
1582 /* VFD_INDEX_MAX */
1583 *cmds++ = 0xFFFFFFFF;
1584 /* VFD_INDEX_OFFSET */
1585 *cmds++ = 0x00000000;
1586 /* TPL1_TP_VS_TEX_OFFSET */
1587 *cmds++ = 0x00000000;
1588
1589 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1590 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1591 /* VFD_VS_THREADING_THRESHOLD */
1592 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1593 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1594
1595 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1596 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1597 /* TPL1_TP_VS_TEX_OFFSET */
1598 *cmds++ = 0x00000000;
1599
1600 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1601 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1602 /* TPL1_TP_FS_TEX_OFFSET */
1603 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1604 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1605 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1606
1607 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1608 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1609 /* GRAS_SC_CONTROL */
1610 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1611
1612 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1613 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1614 /* GRAS_SU_MODE_CONTROL */
1615 *cmds++ = 0x00000000;
1616
1617 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1618 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1619 /* GRAS_SC_WINDOW_SCISSOR_TL */
1620 *cmds++ = 0x00000000;
1621 /* GRAS_SC_WINDOW_SCISSOR_BR */
1622 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1623 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1624
1625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1626 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1627 /* GRAS_SC_SCREEN_SCISSOR_TL */
1628 *cmds++ = 0x00000000;
1629 /* GRAS_SC_SCREEN_SCISSOR_BR */
1630 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1631 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1632
1633 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1634 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1635 /* GRAS_CL_VPORT_XOFFSET */
1636 *cmds++ = 0x00000000;
1637 /* GRAS_CL_VPORT_XSCALE */
1638 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1639 /* GRAS_CL_VPORT_YOFFSET */
1640 *cmds++ = 0x00000000;
1641 /* GRAS_CL_VPORT_YSCALE */
1642 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1643
1644 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1645 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1646 /* GRAS_CL_VPORT_ZOFFSET */
1647 *cmds++ = 0x00000000;
1648 /* GRAS_CL_VPORT_ZSCALE */
1649 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1650
1651 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1652 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1653 /* GRAS_CL_CLIP_CNTL */
1654 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1655
1656 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1657 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1658 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1659 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1660
1661 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1662 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1663 /* PC_PRIM_VTX_CONTROL */
1664 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1665 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1666 PC_DRAW_TRIANGLES) |
1667 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1668 PC_DRAW_TRIANGLES) |
1669 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1670
1671 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1672 *cmds++ = 0x00000000; /* Viz query info */
1673 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1674 PC_DI_SRC_SEL_AUTO_INDEX,
1675 PC_DI_INDEX_SIZE_16_BIT,
1676 PC_DI_IGNORE_VISIBILITY);
1677 *cmds++ = 0x00000002; /* Num indices */
1678
1679 /* Create indirect buffer command for above command sequence */
1680 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1681
1682 return cmds;
1683}
1684
1685static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1686 struct adreno_context *drawctxt)
1687{
1688 unsigned int *start = tmp_ctx.cmd;
1689 unsigned int *cmd = start;
1690 unsigned int *lcc_start;
1691
1692 int i;
1693
1694 /* Flush HLSQ lazy updates */
1695 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1696 *cmd++ = 0x7; /* HLSQ_FLUSH */
1697 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1698 *cmd++ = 0;
1699
1700 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1701 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1702 *cmd++ = (unsigned int)
1703 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1704 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1705 0; /* No end addr for full invalidate */
1706
1707 lcc_start = cmd;
1708
1709 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1710 cmd++;
1711
1712#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1713 /* Force mismatch */
1714 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1715#else
1716 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1717#endif
1718
1719 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1720 cmd = reg_range(cmd, context_register_ranges[i * 2],
1721 context_register_ranges[i * 2 + 1]);
1722 }
1723
1724 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1725 (cmd - lcc_start) - 1);
1726
1727#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1728 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1729#else
1730 lcc_start[2] |= (1 << 24) | (4 << 16);
1731#endif
1732
1733 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1734 *cmd++ = cp_type0_packet(global_registers[i], 1);
1735 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1736 *cmd++ = 0x00000000;
1737 }
1738
1739 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1740 tmp_ctx.cmd = cmd;
1741}
1742
1743static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1744 struct adreno_context *drawctxt)
1745{
1746 unsigned int *cmd = tmp_ctx.cmd;
1747 unsigned int *start = cmd;
1748 unsigned int mode = 4; /* Indirect mode */
1749 unsigned int stateblock;
1750 unsigned int numunits;
1751 unsigned int statetype;
1752
1753 drawctxt->cond_execs[2].hostptr = cmd;
1754 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1755 *cmd++ = 0;
1756 drawctxt->cond_execs[3].hostptr = cmd;
1757 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1758 *cmd++ = 0;
1759
1760#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1761 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1762 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1763 *cmd++ = 4 << 16;
1764 *cmd++ = 0x0;
1765#endif
1766 /* HLSQ full update */
1767 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1768 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1769 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1770
1771#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1772 /* Re-enable shadowing */
1773 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1774 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1775 *cmd++ = (4 << 16) | (1 << 24);
1776 *cmd++ = 0x0;
1777#endif
1778
1779 /* Load vertex shader constants */
1780 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1781 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1782 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1783 *cmd++ = 0x0000ffff;
1784 *cmd++ = 3; /* EXEC_COUNT */
1785 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1786 drawctxt->constant_load_commands[0].hostptr = cmd;
1787 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1788 &drawctxt->gpustate);
1789
1790 /*
1791 From fixup:
1792
1793 mode = 4 (indirect)
1794 stateblock = 4 (Vertex constants)
1795 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1796
1797 From register spec:
1798 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1799
1800 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1801 */
1802
1803 *cmd++ = 0; /* ord1 */
1804 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1805
1806 /* Load fragment shader constants */
1807 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1808 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1809 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1810 *cmd++ = 0x0000ffff;
1811 *cmd++ = 3; /* EXEC_COUNT */
1812 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1813 drawctxt->constant_load_commands[1].hostptr = cmd;
1814 drawctxt->constant_load_commands[1].gpuaddr =
1815 virt2gpu(cmd, &drawctxt->gpustate);
1816 /*
1817 From fixup:
1818
1819 mode = 4 (indirect)
1820 stateblock = 6 (Fragment constants)
1821 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1822
1823 From register spec:
1824 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1825
1826 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1827 */
1828
1829 *cmd++ = 0; /* ord1 */
1830 drawctxt->constant_load_commands[2].hostptr = cmd;
1831 drawctxt->constant_load_commands[2].gpuaddr =
1832 virt2gpu(cmd, &drawctxt->gpustate);
1833 /*
1834 From fixup:
1835 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1836 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1837
1838 From register spec:
1839 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1840 start offset in on chip RAM,
1841 128bit aligned
1842
1843 ord2 = base + offset | 1
1844 Because of the base alignment we can use
1845 ord2 = base | offset | 1
1846 */
1847 *cmd++ = 0; /* ord2 */
1848
1849 /* Restore VS texture memory objects */
1850 stateblock = 0;
1851 statetype = 1;
1852 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1853
1854 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1855 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1856 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1857 & 0xfffffffc) | statetype;
1858
1859 /* Restore VS texture mipmap addresses */
1860 stateblock = 1;
1861 statetype = 1;
1862 numunits = TEX_SIZE_MIPMAP / 4;
1863 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1864 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1865 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1866 & 0xfffffffc) | statetype;
1867
1868 /* Restore VS texture sampler objects */
1869 stateblock = 0;
1870 statetype = 0;
1871 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1872 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1873 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1874 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1875 & 0xfffffffc) | statetype;
1876
1877 /* Restore FS texture memory objects */
1878 stateblock = 2;
1879 statetype = 1;
1880 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1881 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1882 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1883 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1884 & 0xfffffffc) | statetype;
1885
1886 /* Restore FS texture mipmap addresses */
1887 stateblock = 3;
1888 statetype = 1;
1889 numunits = TEX_SIZE_MIPMAP / 4;
1890 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1891 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1892 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1893 & 0xfffffffc) | statetype;
1894
1895 /* Restore FS texture sampler objects */
1896 stateblock = 2;
1897 statetype = 0;
1898 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1899 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1900 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1901 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1902 & 0xfffffffc) | statetype;
1903
1904 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1905 tmp_ctx.cmd = cmd;
1906}
1907
1908static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1909 struct adreno_context *drawctxt)
1910{
1911 unsigned int *cmd = tmp_ctx.cmd;
1912 unsigned int *start = cmd;
1913
1914 /* Vertex shader */
1915 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1916 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1917 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1918 *cmd++ = 1;
1919 *cmd++ = 3; /* EXEC_COUNT */
1920
1921 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1922 drawctxt->shader_load_commands[0].hostptr = cmd;
1923 drawctxt->shader_load_commands[0].gpuaddr =
1924 virt2gpu(cmd, &drawctxt->gpustate);
1925 /*
1926 From fixup:
1927
1928 mode = 4 (indirect)
1929 stateblock = 4 (Vertex shader)
1930 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1931
1932 From regspec:
1933 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1934 If bit31 is 1, it means overflow
1935 or any long shader.
1936
1937 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1938 */
1939 *cmd++ = 0; /*ord1 */
1940 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1941
1942 /* Fragment shader */
1943 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1944 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1945 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1946 *cmd++ = 1;
1947 *cmd++ = 3; /* EXEC_COUNT */
1948
1949 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1950 drawctxt->shader_load_commands[1].hostptr = cmd;
1951 drawctxt->shader_load_commands[1].gpuaddr =
1952 virt2gpu(cmd, &drawctxt->gpustate);
1953 /*
1954 From fixup:
1955
1956 mode = 4 (indirect)
1957 stateblock = 6 (Fragment shader)
1958 numunits = SP_FS_CTRL_REG0.FS_LENGTH
1959
1960 From regspec:
1961 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
1962 If bit31 is 1, it means overflow
1963 or any long shader.
1964
1965 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1966 */
1967 *cmd++ = 0; /*ord1 */
1968 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
1969 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
1970
1971 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
1972 tmp_ctx.cmd = cmd;
1973}
1974
1975static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
1976 struct adreno_context *drawctxt)
1977{
1978 unsigned int *cmd = tmp_ctx.cmd;
1979 unsigned int *start = cmd;
1980
1981 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1982 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1983 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
1984 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
1985 = virt2gpu(cmd, &drawctxt->gpustate);
1986 *cmd++ = 0;
1987
1988 /* Create indirect buffer command for above command sequence */
1989 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
1990
1991 tmp_ctx.cmd = cmd;
1992}
1993
1994/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
1995static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
1996 struct adreno_context *drawctxt)
1997{
1998 unsigned int *cmd = tmp_ctx.cmd;
1999 unsigned int *start = cmd;
2000
2001#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2002 /* Save shader sizes */
2003 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2004 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2005 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2006
2007 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2008 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2009 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2010
2011 /* Save constant sizes */
2012 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2013 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2014 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2015
2016 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2017 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2018 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2019
2020 /* Save constant offsets */
2021 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2022 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2023 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2024#else
2025 /* Save shader sizes */
2026 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2027 30, (4 << 19) | (4 << 16),
2028 drawctxt->shader_load_commands[0].gpuaddr);
2029
2030 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2031 30, (6 << 19) | (4 << 16),
2032 drawctxt->shader_load_commands[1].gpuaddr);
2033
2034 /* Save constant sizes */
2035 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2036 23, (4 << 19) | (4 << 16),
2037 drawctxt->constant_load_commands[0].gpuaddr);
2038
2039 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2040 23, (6 << 19) | (4 << 16),
2041 drawctxt->constant_load_commands[1].gpuaddr);
2042
2043 /* Modify constant restore conditionals */
2044 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2045 0, 0, drawctxt->cond_execs[2].gpuaddr);
2046
2047 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2048 0, 0, drawctxt->cond_execs[3].gpuaddr);
2049
2050 /* Save fragment constant shadow offset */
2051 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2052 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2053 drawctxt->constant_load_commands[2].gpuaddr);
2054#endif
2055
2056 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2057 discard all the shader data */
2058
2059 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2060 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2061
2062 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2063
2064 tmp_ctx.cmd = cmd;
2065}
2066
2067static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2068 struct adreno_context *drawctxt)
2069{
2070 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2071
2072 build_regrestore_cmds(adreno_dev, drawctxt);
2073 build_constantrestore_cmds(adreno_dev, drawctxt);
2074 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2075 build_regconstantsave_cmds(adreno_dev, drawctxt);
2076 build_shader_save_cmds(adreno_dev, drawctxt);
2077 build_shader_restore_cmds(adreno_dev, drawctxt);
2078 build_restore_fixup_cmds(adreno_dev, drawctxt);
2079 build_save_fixup_cmds(adreno_dev, drawctxt);
2080
2081 return 0;
2082}
2083
2084/* create buffers for saving/restoring registers, constants, & GMEM */
2085static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2086 struct adreno_context *drawctxt)
2087{
2088 calc_gmemsize(&drawctxt->context_gmem_shadow,
2089 adreno_dev->gmemspace.sizebytes);
2090 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2091
2092 if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) {
2093 int result =
2094 kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2095 drawctxt->pagetable,
2096 drawctxt->context_gmem_shadow.size);
2097
2098 if (result)
2099 return result;
2100 } else {
2101 memset(&drawctxt->context_gmem_shadow.gmemshadow, 0,
2102 sizeof(drawctxt->context_gmem_shadow.gmemshadow));
2103
2104 return 0;
2105 }
2106
2107 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2108 &tmp_ctx.cmd);
2109
2110 /* Dow we need to idle? */
2111 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2112
2113 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2114 &drawctxt->context_gmem_shadow);
2115 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2116 &drawctxt->context_gmem_shadow);
2117
2118 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2119 KGSL_CACHE_OP_FLUSH);
2120
2121 return 0;
2122}
2123
2124static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2125 struct adreno_context *drawctxt)
2126{
2127 int ret;
2128
2129 /*
2130 * Allocate memory for the GPU state and the context commands.
2131 * Despite the name, this is much more then just storage for
2132 * the gpustate. This contains command space for gmem save
2133 * and texture and vertex buffer storage too
2134 */
2135
2136 ret = kgsl_allocate(&drawctxt->gpustate,
2137 drawctxt->pagetable, CONTEXT_SIZE);
2138
2139 if (ret)
2140 return ret;
2141
2142 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2143 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2144
2145 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2146 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2147 if (ret)
2148 goto done;
2149
2150 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2151 }
2152
2153 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2154 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2155
2156done:
2157 if (ret)
2158 kgsl_sharedmem_free(&drawctxt->gpustate);
2159
2160 return ret;
2161}
2162
2163static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2164 struct adreno_context *context)
2165{
2166 struct kgsl_device *device = &adreno_dev->dev;
2167
2168 if (context == NULL)
2169 return;
2170
2171 if (context->flags & CTXT_FLAGS_GPU_HANG)
2172 KGSL_CTXT_WARN(device,
2173 "Current active context has caused gpu hang\n");
2174
2175 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2176 /* Fixup self modifying IBs for save operations */
2177 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2178 context->save_fixup, 3);
2179
2180 /* save registers and constants. */
2181 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2182 context->regconstant_save, 3);
2183
2184 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2185 /* Save shader instructions */
2186 adreno_ringbuffer_issuecmds(device,
2187 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2188
2189 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2190 }
2191 }
2192
2193 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2194 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2195 /*
2196 * Save GMEM (note: changes shader. shader must
2197 * already be saved.)
2198 */
2199
2200 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2201 context->context_gmem_shadow.
2202 gmem_save, 3);
2203 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2204 }
2205}
2206
2207static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2208 struct adreno_context *context)
2209{
2210 struct kgsl_device *device = &adreno_dev->dev;
2211 unsigned int cmds[5];
2212
2213 if (context == NULL) {
2214 /* No context - set the default pagetable and thats it */
2215 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2216 return;
2217 }
2218
2219 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2220
2221 cmds[0] = cp_nop_packet(1);
2222 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2223 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2224 cmds[3] = device->memstore.gpuaddr +
2225 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2226 cmds[4] = (unsigned int)context;
2227 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2228 kgsl_mmu_setstate(device, context->pagetable);
2229
2230 /*
2231 * Restore GMEM. (note: changes shader.
2232 * Shader must not already be restored.)
2233 */
2234
2235 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2236 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2237 context->context_gmem_shadow.
2238 gmem_restore, 3);
2239 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2240 }
2241
2242 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2243 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2244 context->reg_restore, 3);
2245
2246 /* Fixup self modifying IBs for restore operations */
2247 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2248 context->restore_fixup, 3);
2249
2250 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2251 context->constant_restore, 3);
2252
2253 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2254 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2255 context->shader_restore, 3);
2256
2257 /* Restore HLSQ_CONTROL_0 register */
2258 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2259 context->hlsqcontrol_restore, 3);
2260 }
2261}
2262
2263static void a3xx_rb_init(struct adreno_device *adreno_dev,
2264 struct adreno_ringbuffer *rb)
2265{
2266 unsigned int *cmds, cmds_gpu;
2267 cmds = adreno_ringbuffer_allocspace(rb, 18);
2268 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2269
2270 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2271 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2272 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2273 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2274 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2275 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2276 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2277 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2278 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2279 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2280 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2281 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2282 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2283 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2284 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2285 /* Protected mode control - turned off for A3XX */
2286 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2287 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2288 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2289
2290 adreno_ringbuffer_submit(rb);
2291}
2292
2293static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2294{
2295 struct kgsl_device *device = &adreno_dev->dev;
2296 const char *err = "";
2297
2298 switch (bit) {
2299 case A3XX_INT_RBBM_AHB_ERROR: {
2300 unsigned int reg;
2301
2302 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2303
2304 /*
2305 * Return the word address of the erroring register so that it
2306 * matches the register specification
2307 */
2308
2309 KGSL_DRV_CRIT(device,
2310 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2311 reg & (1 << 28) ? "WRITE" : "READ",
2312 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2313 (reg >> 24) & 0x3);
2314
2315 /* Clear the error */
2316 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2317 return;
2318 }
2319 case A3XX_INT_RBBM_REG_TIMEOUT:
2320 err = "RBBM: AHB register timeout";
2321 break;
2322 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2323 err = "RBBM: ME master split timeout";
2324 break;
2325 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2326 err = "RBBM: PFP master split timeout";
2327 break;
2328 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2329 err = "RBBM: ATB bus oveflow";
2330 break;
2331 case A3XX_INT_VFD_ERROR:
2332 err = "VFD: Out of bounds access";
2333 break;
2334 case A3XX_INT_CP_T0_PACKET_IN_IB:
2335 err = "ringbuffer TO packet in IB interrupt";
2336 break;
2337 case A3XX_INT_CP_OPCODE_ERROR:
2338 err = "ringbuffer opcode error interrupt";
2339 break;
2340 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2341 err = "ringbuffer reserved bit error interrupt";
2342 break;
2343 case A3XX_INT_CP_HW_FAULT:
2344 err = "ringbuffer hardware fault";
2345 break;
2346 case A3XX_INT_CP_REG_PROTECT_FAULT:
2347 err = "ringbuffer protected mode error interrupt";
2348 break;
2349 case A3XX_INT_CP_AHB_ERROR_HALT:
2350 err = "ringbuffer AHB error interrupt";
2351 break;
2352 case A3XX_INT_MISC_HANG_DETECT:
2353 err = "MISC: GPU hang detected";
2354 break;
2355 case A3XX_INT_UCHE_OOB_ACCESS:
2356 err = "UCHE: Out of bounds access";
2357 break;
2358 }
2359
2360 KGSL_DRV_CRIT(device, "%s\n", err);
2361 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2362}
2363
2364static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2365{
2366 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2367
2368 if (irq == A3XX_INT_CP_RB_INT) {
2369 kgsl_sharedmem_writel(&rb->device->memstore,
2370 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2371 wmb();
2372 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2373 }
2374
2375 wake_up_interruptible_all(&rb->device->wait_queue);
2376
2377 /* Schedule work to free mem and issue ibs */
2378 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2379
2380 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2381 rb->device->id, NULL);
2382}
2383
2384#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2385
2386#define A3XX_INT_MASK \
2387 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2388 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
2389 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \
2390 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \
2391 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
2392 (1 << A3XX_INT_VFD_ERROR) | \
2393 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2394 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2395 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2396 (1 << A3XX_INT_CP_HW_FAULT) | \
2397 (1 << A3XX_INT_CP_IB1_INT) | \
2398 (1 << A3XX_INT_CP_IB2_INT) | \
2399 (1 << A3XX_INT_CP_RB_INT) | \
2400 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2401 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
2402 (1 << A3XX_INT_MISC_HANG_DETECT) | \
2403 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2404
2405static struct {
2406 void (*func)(struct adreno_device *, int);
2407} a3xx_irq_funcs[] = {
2408 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2409 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2410 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2411 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2412 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2413 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2414 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2415 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2416 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2417 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2418 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2419 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2420 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2421 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2422 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2423 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2424 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2425 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2426 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2427 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2428 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2429 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2430 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2431 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
2432 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */
2433 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2434 /* 26 to 31 - Unused */
2435};
2436
2437static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2438{
2439 struct kgsl_device *device = &adreno_dev->dev;
2440 irqreturn_t ret = IRQ_NONE;
2441 unsigned int status, tmp;
2442 int i;
2443
2444 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2445
2446 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2447 if (tmp & 1) {
2448 if (a3xx_irq_funcs[i].func != NULL) {
2449 a3xx_irq_funcs[i].func(adreno_dev, i);
2450 ret = IRQ_HANDLED;
2451 } else {
2452 KGSL_DRV_CRIT(device,
2453 "Unhandled interrupt bit %x\n", i);
2454 }
2455 }
2456
2457 tmp >>= 1;
2458 }
2459
2460 if (status)
2461 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2462 status);
2463 return ret;
2464}
2465
2466static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2467{
2468 struct kgsl_device *device = &adreno_dev->dev;
2469
2470 if (state)
2471 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2472 else
2473 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2474}
2475
2476static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2477{
2478 struct kgsl_device *device = &adreno_dev->dev;
2479 unsigned int reg, val;
2480
2481 /* Freeze the counter */
2482 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2483 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2484 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2485
2486 /* Read the value */
2487 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2488
2489 /* Reset the counter */
2490 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2491 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2492
2493 /* Re-enable the counter */
2494 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2495 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2496 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2497
2498 return val;
2499}
2500
2501static void a3xx_start(struct adreno_device *adreno_dev)
2502{
2503 struct kgsl_device *device = &adreno_dev->dev;
2504
2505 /* Reset the core */
2506 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2507 0x00000001);
2508 msleep(20);
2509
2510 /*
2511 * enable fixed master AXI port of 0x0 for all clients to keep
2512 * traffic from going to random places
2513 */
2514
2515 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2516 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2517 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2518
2519 /* Make all blocks contribute to the GPU BUSY perf counter */
2520 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2521
2522 /* Enable the RBBM error reporting bits. This lets us get
2523 useful information on failure */
2524
2525 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2526
2527 /* Enable AHB error reporting */
2528 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
2529
2530 /* Turn on the power counters */
2531 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
2532}
2533
2534struct adreno_gpudev adreno_a3xx_gpudev = {
2535 .reg_rbbm_status = A3XX_RBBM_STATUS,
2536 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2537 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2538
2539 .ctxt_create = a3xx_drawctxt_create,
2540 .ctxt_save = a3xx_drawctxt_save,
2541 .ctxt_restore = a3xx_drawctxt_restore,
2542 .rb_init = a3xx_rb_init,
2543 .irq_control = a3xx_irq_control,
2544 .irq_handler = a3xx_irq_handler,
2545 .busy_cycles = a3xx_busy_cycles,
2546 .start = a3xx_start,
2547};