blob: 1bad811384447956d397823ae5b77e754acc0967 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
37 0x0579, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
38 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
62 0x2750, 0x2756, 0x2760, 0x2760,
63};
64
65const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
66
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070067/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
68 * functions.
69 */
70
71#define _SET(_shift, _val) ((_val) << (_shift))
72
73/*
74 ****************************************************************************
75 *
76 * Context state shadow structure:
77 *
78 * +---------------------+------------+-------------+---------------------+---+
79 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
80 * +---------------------+------------+-------------+---------------------+---+
81 *
82 * 8K - ALU Constant Shadow (8K aligned)
83 * 4K - H/W Register Shadow (8K aligned)
84 * 5K - Command and Vertex Buffers
85 * 8K - Shader Instruction Shadow
86 * ~6K - Texture Constant Shadow
87 *
88 *
89 ***************************************************************************
90 */
91
92/* Sizes of all sections in state shadow memory */
93#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
94#define REG_SHADOW_SIZE (4*1024) /* 4KB */
95#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
96#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
97#define TEX_SIZE_MIPMAP 1936 /* bytes */
98#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
99#define TEX_SHADOW_SIZE \
100 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
101 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
102#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
103
104/* Total context size, excluding GMEM shadow */
105#define CONTEXT_SIZE \
106 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
107 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
108 TEX_SHADOW_SIZE)
109
110/* Offsets to different sections in context shadow memory */
111#define REG_OFFSET ALU_SHADOW_SIZE
112#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
113#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
114#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
115#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
116#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
117#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
118#define FS_TEX_OFFSET_MEM_OBJECTS \
119 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
120#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122
123/* The offset for fragment shader data in HLSQ context */
124#define SSIZE (16*1024)
125
126#define HLSQ_SAMPLER_OFFSET 0x000
127#define HLSQ_MEMOBJ_OFFSET 0x400
128#define HLSQ_MIPMAP_OFFSET 0x800
129
130#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM
131/* Use shadow RAM */
132#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
133#else
134/* Use working RAM */
135#define HLSQ_SHADOW_BASE 0x10000
136#endif
137
138#define REG_TO_MEM_LOOP_COUNT_SHIFT 15
139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
261 unsigned int *start = cmd;
262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
269 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
270 *cmd++ = 0;
271
272#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
273 /*
274 * Context registers are already shadowed; just need to
275 * disable shadowing to prevent corruption.
276 */
277
278 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
279 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
280 *cmd++ = 4 << 16; /* regs, start=0 */
281 *cmd++ = 0x0; /* count = 0 */
282
283#else
284 /*
285 * Make sure the HW context has the correct register values before
286 * reading them.
287 */
288
289 /* Write context registers into shadow */
290 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
291 unsigned int start = context_register_ranges[i * 2];
292 unsigned int end = context_register_ranges[i * 2 + 1];
293 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
294 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
295 start;
296 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
297 & 0xFFFFE000) + (start - 0x2000) * 4;
298 }
299#endif
300
301 /* Need to handle some of the global registers separately */
302 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
303 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
304 *cmd++ = global_registers[i];
305 *cmd++ = tmp_ctx.reg_values[i];
306 }
307
308 /* Save vertex shader constants */
309 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
310 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
311 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
312 *cmd++ = 0x0000FFFF;
313 *cmd++ = 3; /* EXEC_COUNT */
314 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
315 drawctxt->constant_save_commands[1].hostptr = cmd;
316 drawctxt->constant_save_commands[1].gpuaddr =
317 virt2gpu(cmd, &drawctxt->gpustate);
318 /*
319 From fixup:
320
321 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
322 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
323
324 From register spec:
325 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
326 */
327 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
328 /* ALU constant shadow base */
329 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
330
331 /* Save fragment shader constants */
332 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
333 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
334 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
335 *cmd++ = 0x0000FFFF;
336 *cmd++ = 3; /* EXEC_COUNT */
337 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
338 drawctxt->constant_save_commands[2].hostptr = cmd;
339 drawctxt->constant_save_commands[2].gpuaddr =
340 virt2gpu(cmd, &drawctxt->gpustate);
341 /*
342 From fixup:
343
344 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
345 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
346
347 From register spec:
348 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
349 */
350 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
351
352 /*
353 From fixup:
354
355 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
356 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
357
358 From register spec:
359 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
360 start offset in on chip RAM,
361 128bit aligned
362
363 dst = base + offset
364 Because of the base alignment we can use
365 dst = base | offset
366 */
367 *cmd++ = 0; /* dst */
368
369 /* Save VS texture memory objects */
370 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
371 *cmd++ =
372 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
373 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
374 *cmd++ =
375 (drawctxt->gpustate.gpuaddr +
376 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
377
378 /* Save VS texture mipmap pointers */
379 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
380 *cmd++ =
381 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
382 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
383 *cmd++ =
384 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
385
386 /* Save VS texture sampler objects */
387 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
388 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
389 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
390 *cmd++ =
391 (drawctxt->gpustate.gpuaddr +
392 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
393
394 /* Save FS texture memory objects */
395 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
396 *cmd++ =
397 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
398 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
399 *cmd++ =
400 (drawctxt->gpustate.gpuaddr +
401 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
402
403 /* Save FS texture mipmap pointers */
404 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
405 *cmd++ =
406 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
407 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
408 *cmd++ =
409 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
410
411 /* Save FS texture sampler objects */
412 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
413 *cmd++ =
414 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
415 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
416 *cmd++ =
417 (drawctxt->gpustate.gpuaddr +
418 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
419
420 /* Create indirect buffer command for above command sequence */
421 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
422
423 tmp_ctx.cmd = cmd;
424}
425
426/* Copy GMEM contents to system memory shadow. */
427static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
428 struct adreno_context *drawctxt,
429 struct gmem_shadow_t *shadow)
430{
431 unsigned int *cmds = tmp_ctx.cmd;
432 unsigned int *start = cmds;
433
434 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
435 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
436
437 /* RB_MODE_CONTROL */
438 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
439 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
440 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
441 /* RB_RENDER_CONTROL */
442 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
443 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
444
445 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
446 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
447 /* RB_COPY_CONTROL */
448 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
449 RB_CLEAR_MODE_RESOLVE) |
450 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
451 tmp_ctx.gmem_base >> 14);
452 /* RB_COPY_DEST_BASE */
453 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
454 shadow->gmemshadow.gpuaddr >> 5);
455 /* RB_COPY_DEST_PITCH */
456 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
457 (shadow->pitch * 4) / 32);
458 /* RB_COPY_DEST_INFO */
459 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
460 RB_TILINGMODE_LINEAR) |
461 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
462 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
463 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
464
465 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
466 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
467 /* GRAS_SC_CONTROL */
468 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
471 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
472 /* VFD_CONTROL_0 */
473 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
474 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
475 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
476 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
477 /* VFD_CONTROL_1 */
478 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
479 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
480 _SET(VFD_CTRLREG1_REGID4INST, 252);
481
482 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
483 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
484 /* VFD_FETCH_INSTR_0_0 */
485 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
486 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
487 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
488 /* VFD_FETCH_INSTR_1_0 */
489 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
490 shadow->quad_vertices.gpuaddr);
491
492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
493 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
494 /* VFD_DECODE_INSTR_0 */
495 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
496 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
497 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
498 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
499 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
500 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
501
502 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
503 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
504 /* HLSQ_CONTROL_0_REG */
505 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
506 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
507 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
508 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
509 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
510 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
511 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
513 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
514 /* HLSQ_CONTROL_1_REG */
515 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
516 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
517 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
518 /* HLSQ_CONTROL_2_REG */
519 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
520 /* HLSQ_CONTROL_3_REG */
521 *cmds++ = 0x00000000;
522
523 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
524 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
525 /* HLSQ_VS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
527 /* HLSQ_FS_CONTROL_REG */
528 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
529 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
530 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
531 /* HLSQ_CONST_VSPRESV_RANGE_REG */
532 *cmds++ = 0x00000000;
533 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
534 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
535 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
536
537 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
538 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
539 /* SP_FS_LENGTH_REG */
540 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
541
542 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
543 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
544 /* SP_SP_CTRL_REG */
545 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
546 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
547
548 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
549 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
550 /* SP_VS_CTRL_REG0 */
551 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
552 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
553 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
554 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
555 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
556 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
557 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
558 /* SP_VS_CTRL_REG1 */
559 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
560 /* SP_VS_PARAM_REG */
561 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
562 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
563 /* SP_VS_OUT_REG_0 */
564 *cmds++ = 0x00000000;
565 /* SP_VS_OUT_REG_1 */
566 *cmds++ = 0x00000000;
567 /* SP_VS_OUT_REG_2 */
568 *cmds++ = 0x00000000;
569 /* SP_VS_OUT_REG_3 */
570 *cmds++ = 0x00000000;
571 /* SP_VS_OUT_REG_4 */
572 *cmds++ = 0x00000000;
573 /* SP_VS_OUT_REG_5 */
574 *cmds++ = 0x00000000;
575 /* SP_VS_OUT_REG_6 */
576 *cmds++ = 0x00000000;
577 /* SP_VS_OUT_REG_7 */
578 *cmds++ = 0x00000000;
579
580 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
581 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
582 /* SP_VS_VPC_DST_REG_0 */
583 *cmds++ = 0x00000000;
584 /* SP_VS_VPC_DST_REG_1 */
585 *cmds++ = 0x00000000;
586 /* SP_VS_VPC_DST_REG_2 */
587 *cmds++ = 0x00000000;
588 /* SP_VS_VPC_DST_REG_3 */
589 *cmds++ = 0x00000000;
590 /* SP_VS_OBJ_OFFSET_REG */
591 *cmds++ = 0x00000000;
592 /* SP_VS_OBJ_START_REG */
593 *cmds++ = 0x00000000;
594
595 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
596 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
597 /* SP_VS_LENGTH_REG */
598 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
599 /* SP_FS_CTRL_REG0 */
600 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
601 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
602 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
603 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
604 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
605 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
606 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
607 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
608 /* SP_FS_CTRL_REG1 */
609 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
610 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
611 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
612 /* SP_FS_OBJ_OFFSET_REG */
613 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
614 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
615 /* SP_FS_OBJ_START_REG */
616 *cmds++ = 0x00000000;
617
618 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
619 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
620 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
621 *cmds++ = 0x00000000;
622 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
623 *cmds++ = 0x00000000;
624
625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
626 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
627 /* SP_FS_OUTPUT_REG */
628 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
629
630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
631 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
632 /* SP_FS_MRT_REG_0 */
633 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
634 /* SP_FS_MRT_REG_1 */
635 *cmds++ = 0x00000000;
636 /* SP_FS_MRT_REG_2 */
637 *cmds++ = 0x00000000;
638 /* SP_FS_MRT_REG_3 */
639 *cmds++ = 0x00000000;
640
641 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
642 *cmds++ = CP_REG(A3XX_VPC_ATTR);
643 /* VPC_ATTR */
644 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
645 _SET(VPC_VPCATTR_LMSIZE, 1);
646 /* VPC_PACK */
647 *cmds++ = 0x00000000;
648 /* VPC_VARRYING_INTERUPT_MODE_0 */
649 *cmds++ = 0x00000000;
650 /* VPC_VARRYING_INTERUPT_MODE_1 */
651 *cmds++ = 0x00000000;
652 /* VPC_VARRYING_INTERUPT_MODE_2 */
653 *cmds++ = 0x00000000;
654 /* VPC_VARRYING_INTERUPT_MODE_3 */
655 *cmds++ = 0x00000000;
656 /* VPC_VARYING_PS_REPL_MODE_0 */
657 *cmds++ = 0x00000000;
658 /* VPC_VARYING_PS_REPL_MODE_1 */
659 *cmds++ = 0x00000000;
660 /* VPC_VARYING_PS_REPL_MODE_2 */
661 *cmds++ = 0x00000000;
662 /* VPC_VARYING_PS_REPL_MODE_3 */
663 *cmds++ = 0x00000000;
664
665 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
666 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
667 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
668 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
669 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
670 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
671 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
672
673 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
674 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
675 /* end; */
676 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
677 /* nop; */
678 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
679 /* nop; */
680 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
681
682 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
683 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
684 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
685 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
686 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
687 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
688 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
689
690 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
691 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
692 /* end; */
693 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
694 /* nop; */
695 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
696 /* nop; */
697 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
698
699 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
700 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
701 /* RB_MSAA_CONTROL */
702 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
703 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
704
705 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
706 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
707 /* RB_DEPTH_CONTROL */
708 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
709
710 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
711 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
712 /* RB_MRT_CONTROL0 */
713 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
714 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
715 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
716 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
719 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
720 /* RB_MRT_BLEND_CONTROL0 */
721 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
723 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
724 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
725 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
726 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
727 /* RB_MRT_CONTROL1 */
728 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
729 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
730 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
731
732 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
733 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
734 /* RB_MRT_BLEND_CONTROL1 */
735 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
736 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
737 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
738 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
739 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
740 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
741 /* RB_MRT_CONTROL2 */
742 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
743 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
744 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
745
746 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
747 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
748 /* RB_MRT_BLEND_CONTROL2 */
749 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
750 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
751 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
752 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
753 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
755 /* RB_MRT_CONTROL3 */
756 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
757 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
758 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
759
760 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
761 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
762 /* RB_MRT_BLEND_CONTROL3 */
763 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
764 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
765 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
766 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
767 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
769
770 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
771 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
772 /* VFD_INDEX_MIN */
773 *cmds++ = 0x00000000;
774 /* VFD_INDEX_MAX */
775 *cmds++ = 0xFFFFFFFF;
776 /* VFD_INSTANCEID_OFFSET */
777 *cmds++ = 0x00000000;
778 /* VFD_INDEX_OFFSET */
779 *cmds++ = 0x00000000;
780
781 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
782 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
783 /* VFD_VS_THREADING_THRESHOLD */
784 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
785 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
786
787 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
788 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
789 /* TPL1_TP_VS_TEX_OFFSET */
790 *cmds++ = 0;
791
792 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
793 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
794 /* TPL1_TP_FS_TEX_OFFSET */
795 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
796 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
797 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
798
799 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
800 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
801 /* PC_PRIM_VTX_CNTL */
802 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
803 PC_DRAW_TRIANGLES) |
804 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
805 PC_DRAW_TRIANGLES) |
806 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
807
808 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
809 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
810 /* GRAS_SC_WINDOW_SCISSOR_TL */
811 *cmds++ = 0x00000000;
812 /* GRAS_SC_WINDOW_SCISSOR_BR */
813 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
814 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
815
816 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
817 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
818 /* GRAS_SC_SCREEN_SCISSOR_TL */
819 *cmds++ = 0x00000000;
820 /* GRAS_SC_SCREEN_SCISSOR_BR */
821 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
822 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
823
824 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
825 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
826 /* GRAS_CL_VPORT_XOFFSET */
827 *cmds++ = 0x00000000;
828 /* GRAS_CL_VPORT_XSCALE */
829 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
830 /* GRAS_CL_VPORT_YOFFSET */
831 *cmds++ = 0x00000000;
832 /* GRAS_CL_VPORT_YSCALE */
833 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
834
835 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
836 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
837 /* GRAS_CL_VPORT_ZOFFSET */
838 *cmds++ = 0x00000000;
839 /* GRAS_CL_VPORT_ZSCALE */
840 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
841
842 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
843 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
844 /* GRAS_CL_CLIP_CNTL */
845 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
850
851 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
852 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
853 /* GRAS_CL_GB_CLIP_ADJ */
854 *cmds++ = 0x00000000;
855
856 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
857 *cmds++ = 0x00000000;
858
859 /*
860 * Resolve using two draw calls with a dummy register
861 * write in between. This is a HLM workaround
862 * that should be removed later.
863 */
864 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
865 *cmds++ = 0x00000000; /* Viz query info */
866 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
867 PC_DI_SRC_SEL_IMMEDIATE,
868 PC_DI_INDEX_SIZE_32_BIT,
869 PC_DI_IGNORE_VISIBILITY);
870 *cmds++ = 0x00000003; /* Num indices */
871 *cmds++ = 0x00000000; /* Index 0 */
872 *cmds++ = 0x00000001; /* Index 1 */
873 *cmds++ = 0x00000002; /* Index 2 */
874
875 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
876 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
877 *cmds++ = 0x00000000;
878
879 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
880 *cmds++ = 0x00000000; /* Viz query info */
881 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
882 PC_DI_SRC_SEL_IMMEDIATE,
883 PC_DI_INDEX_SIZE_32_BIT,
884 PC_DI_IGNORE_VISIBILITY);
885 *cmds++ = 0x00000003; /* Num indices */
886 *cmds++ = 0x00000002; /* Index 0 */
887 *cmds++ = 0x00000001; /* Index 1 */
888 *cmds++ = 0x00000003; /* Index 2 */
889
890 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
891 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
892 *cmds++ = 0x00000000;
893
894 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
895 *cmds++ = 0x00000000;
896
897 /* Create indirect buffer command for above command sequence */
898 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
899
900 return cmds;
901}
902
903static void build_shader_save_cmds(struct adreno_device *adreno_dev,
904 struct adreno_context *drawctxt)
905{
906 unsigned int *cmd = tmp_ctx.cmd;
907 unsigned int *start;
908
909 /* Reserve space for boolean values used for COND_EXEC packet */
910 drawctxt->cond_execs[0].hostptr = cmd;
911 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
912 *cmd++ = 0;
913 drawctxt->cond_execs[1].hostptr = cmd;
914 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
915 *cmd++ = 0;
916
917 drawctxt->shader_save_commands[0].hostptr = cmd;
918 drawctxt->shader_save_commands[0].gpuaddr =
919 virt2gpu(cmd, &drawctxt->gpustate);
920 *cmd++ = 0;
921 drawctxt->shader_save_commands[1].hostptr = cmd;
922 drawctxt->shader_save_commands[1].gpuaddr =
923 virt2gpu(cmd, &drawctxt->gpustate);
924 *cmd++ = 0;
925
926 start = cmd;
927
928 /* Save vertex shader */
929
930 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
931 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
932 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
933 *cmd++ = 0x0000FFFF;
934 *cmd++ = 3; /* EXEC_COUNT */
935
936 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
937 drawctxt->shader_save_commands[2].hostptr = cmd;
938 drawctxt->shader_save_commands[2].gpuaddr =
939 virt2gpu(cmd, &drawctxt->gpustate);
940 /*
941 From fixup:
942
943 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
944
945 From regspec:
946 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
947 If bit31 is 1, it means overflow
948 or any long shader.
949
950 src = (HLSQ_SHADOW_BASE + 0x1000)/4
951 */
952 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
953 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
954
955 /* Save fragment shader */
956 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
957 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
958 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
959 *cmd++ = 0x0000FFFF;
960 *cmd++ = 3; /* EXEC_COUNT */
961
962 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
963 drawctxt->shader_save_commands[3].hostptr = cmd;
964 drawctxt->shader_save_commands[3].gpuaddr =
965 virt2gpu(cmd, &drawctxt->gpustate);
966 /*
967 From fixup:
968
969 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
970
971 From regspec:
972 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
973 If bit31 is 1, it means overflow
974 or any long shader.
975
976 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
977 From regspec:
978
979 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
980 First instruction of the whole shader will be stored from
981 the offset in instruction cache, unit = 256bits, a cache line.
982 It can start from 0 if no VS available.
983
984 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
985 */
986 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
987 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
988 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
989
990 /* Create indirect buffer command for above command sequence */
991 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
992
993 tmp_ctx.cmd = cmd;
994}
995
996/*
997 * Make an IB to modify context save IBs with the correct shader instruction
998 * and constant sizes and offsets.
999 */
1000
1001static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1002 struct adreno_context *drawctxt)
1003{
1004 unsigned int *cmd = tmp_ctx.cmd;
1005 unsigned int *start = cmd;
1006
1007 /* Flush HLSQ lazy updates */
1008 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1009 *cmd++ = 0x7; /* HLSQ_FLUSH */
1010 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1011 *cmd++ = 0;
1012
1013 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1014 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1015 *cmd++ = (unsigned int)
1016 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1017 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1018 0; /* No end addr for full invalidate */
1019
1020 /* Make sure registers are flushed */
1021 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1022 *cmd++ = 0;
1023
1024#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1025
1026 /* Save shader sizes */
1027 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1028 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1029 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1033 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1034
1035 /* Save shader offsets */
1036 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1037 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1038 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1039
1040 /* Save constant sizes */
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1043 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1044 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1045 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1046 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1047
1048 /* Save FS constant offset */
1049 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1050 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1051 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1052
1053
1054 /* Save VS instruction store mode */
1055 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1056 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1057 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1058
1059 /* Save FS instruction store mode */
1060 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1061 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1062 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1063#else
1064
1065 /* Shader save */
1066 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1067 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1068 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1069 drawctxt->shader_save_commands[2].gpuaddr);
1070
1071 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1072 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1073 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1074 *cmd++ = 0x00000000; /* AND value */
1075 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1076 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1077 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1078 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1079 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1080 A3XX_CP_SCRATCH_REG2;
1081 *cmd++ = 0x7f000000; /* AND value */
1082 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1083
1084 /*
1085 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1086 * SP_FS_OBJ_OFFSET_REG
1087 */
1088
1089 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1090 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1091 *cmd++ = 0x00000000; /* AND value */
1092 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1093 /*
1094 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1095 * 0x00000000
1096 */
1097 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1098 *cmd++ = A3XX_CP_SCRATCH_REG3;
1099 *cmd++ = 0xfe000000; /* AND value */
1100 *cmd++ = 0x00000000; /* OR value */
1101 /*
1102 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1103 */
1104 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1105 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1106 *cmd++ = 0xffffffff; /* AND value */
1107 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1108
1109 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1110 *cmd++ = A3XX_CP_SCRATCH_REG2;
1111 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1112
1113 /* Constant save */
1114 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1115 17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
1116 drawctxt->constant_save_commands[1].gpuaddr);
1117
1118 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1119 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
1120 drawctxt->constant_save_commands[2].gpuaddr);
1121
1122 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1123 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1124 drawctxt->constant_save_commands[2].gpuaddr
1125 + sizeof(unsigned int));
1126
1127 /* Modify constant save conditionals */
1128 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1129 0, 0, drawctxt->cond_execs[2].gpuaddr);
1130
1131 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1132 0, 0, drawctxt->cond_execs[3].gpuaddr);
1133
1134 /* Save VS instruction store mode */
1135
1136 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1137 31, 0, drawctxt->cond_execs[0].gpuaddr);
1138
1139 /* Save FS instruction store mode */
1140 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1141 31, 0, drawctxt->cond_execs[1].gpuaddr);
1142
1143#endif
1144
1145 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1146
1147 tmp_ctx.cmd = cmd;
1148}
1149
1150/****************************************************************************/
1151/* Functions to build context restore IBs */
1152/****************************************************************************/
1153
1154static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1155 struct adreno_context *drawctxt,
1156 struct gmem_shadow_t *shadow)
1157{
1158 unsigned int *cmds = tmp_ctx.cmd;
1159 unsigned int *start = cmds;
1160
1161 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1162 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1163 /* HLSQ_CONTROL_0_REG */
1164 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1165 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1166 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1167 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1168 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1169 /* HLSQ_CONTROL_1_REG */
1170 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1171 /* HLSQ_CONTROL_2_REG */
1172 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1173 /* HLSQ_CONTROL3_REG */
1174 *cmds++ = 0x00000000;
1175
1176 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1177 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1178 /* RB_MRT_BUF_INFO0 */
1179 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1180 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1181 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1182 (shadow->gmem_pitch * 4 * 8) / 256);
1183 /* RB_MRT_BUF_BASE0 */
1184 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1185
1186 /* Texture samplers */
1187 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1188 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1189 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1190 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1191 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1192 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1193 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1194 *cmds++ = 0x00000240;
1195 *cmds++ = 0x00000000;
1196
1197 /* Texture memobjs */
1198 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1199 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1200 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1201 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1202 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1203 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1204 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1205 *cmds++ = 0x4cc06880;
1206 *cmds++ = shadow->height | (shadow->width << 14);
1207 *cmds++ = (shadow->pitch*4*8) << 9;
1208 *cmds++ = 0x00000000;
1209
1210 /* Mipmap bases */
1211 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1212 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1213 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1214 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1215 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1216 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1217 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1218 *cmds++ = shadow->gmemshadow.gpuaddr;
1219 *cmds++ = 0x00000000;
1220 *cmds++ = 0x00000000;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232
1233 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1234 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1235 /* HLSQ_VS_CONTROL_REG */
1236 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1237 /* HLSQ_FS_CONTROL_REG */
1238 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1239 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1240 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1241 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1242 *cmds++ = 0x00000000;
1243 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1244 *cmds++ = 0x00000000;
1245
1246 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1247 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1248 /* SP_FS_LENGTH_REG */
1249 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1250
1251 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1252 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1253 /* SP_VS_CTRL_REG0 */
1254 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1255 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1256 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1257 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1258 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1259 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1260 /* SP_VS_CTRL_REG1 */
1261 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1262 /* SP_VS_PARAM_REG */
1263 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1264 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1265 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1266 /* SP_VS_OUT_REG0 */
1267 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1268 /* SP_VS_OUT_REG1 */
1269 *cmds++ = 0x00000000;
1270 /* SP_VS_OUT_REG2 */
1271 *cmds++ = 0x00000000;
1272 /* SP_VS_OUT_REG3 */
1273 *cmds++ = 0x00000000;
1274 /* SP_VS_OUT_REG4 */
1275 *cmds++ = 0x00000000;
1276 /* SP_VS_OUT_REG5 */
1277 *cmds++ = 0x00000000;
1278 /* SP_VS_OUT_REG6 */
1279 *cmds++ = 0x00000000;
1280 /* SP_VS_OUT_REG7 */
1281 *cmds++ = 0x00000000;
1282
1283 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1284 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1285 /* SP_VS_VPC_DST_REG0 */
1286 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1287 /* SP_VS_VPC_DST_REG1 */
1288 *cmds++ = 0x00000000;
1289 /* SP_VS_VPC_DST_REG2 */
1290 *cmds++ = 0x00000000;
1291 /* SP_VS_VPC_DST_REG3 */
1292 *cmds++ = 0x00000000;
1293 /* SP_VS_OBJ_OFFSET_REG */
1294 *cmds++ = 0x00000000;
1295 /* SP_VS_OBJ_START_REG */
1296 *cmds++ = 0x00000000;
1297
1298 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1299 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1300 /* SP_VS_LENGTH_REG */
1301 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1302 /* SP_FS_CTRL_REG0 */
1303 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1304 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1305 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1306 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1307 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1308 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1309 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1310 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1311 /* SP_FS_CTRL_REG1 */
1312 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1313 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1314 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1315 /* SP_FS_OBJ_OFFSET_REG */
1316 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128);
1317 /* SP_FS_OBJ_START_REG */
1318 *cmds++ = 0x00000000;
1319
1320 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1321 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1322 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1323 *cmds++ = 0x00000000;
1324 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1325 *cmds++ = 0x00000000;
1326
1327 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1328 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1329 /* SP_FS_OUT_REG */
1330 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1331
1332 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1333 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1334 /* SP_FS_MRT_REG0 */
1335 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1336 /* SP_FS_MRT_REG1 */
1337 *cmds++ = 0;
1338 /* SP_FS_MRT_REG2 */
1339 *cmds++ = 0;
1340 /* SP_FS_MRT_REG3 */
1341 *cmds++ = 0;
1342
1343 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1344 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1345 /* VPC_ATTR */
1346 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1347 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1348 _SET(VPC_VPCATTR_LMSIZE, 1);
1349 /* VPC_PACK */
1350 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1351 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1352 /* VPC_VARYING_INTERP_MODE_0 */
1353 *cmds++ = 0x00000000;
1354 /* VPC_VARYING_INTERP_MODE1 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_INTERP_MODE2 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_IINTERP_MODE3 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARRYING_PS_REPL_MODE_0 */
1361 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1362 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1363 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1377 /* VPC_VARRYING_PS_REPL_MODE_1 */
1378 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1394 /* VPC_VARRYING_PS_REPL_MODE_2 */
1395 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1411 /* VPC_VARRYING_PS_REPL_MODE_3 */
1412 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1428
1429 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1430 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1431 /* SP_SP_CTRL_REG */
1432 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1433
1434 /* Load vertex shader */
1435 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1436 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1437 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1438 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1439 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1440 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1441 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1442 /* (sy)end; */
1443 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1444 /* nop; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450
1451 /* Load fragment shader */
1452 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1453 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1454 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1455 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1456 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1457 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1458 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1459 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1460 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1461 /* (rpt5)nop; */
1462 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1463 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1464 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1465 /* (sy)mov.f32f32 r1.x, r0.x; */
1466 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1467 /* mov.f32f32 r1.y, r0.y; */
1468 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1469 /* mov.f32f32 r1.z, r0.z; */
1470 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1471 /* mov.f32f32 r1.w, r0.w; */
1472 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1473 /* end; */
1474 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1475
1476 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1477 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1478 /* VFD_CONTROL_0 */
1479 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1480 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1481 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1482 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1483 /* VFD_CONTROL_1 */
1484 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1485 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1486 _SET(VFD_CTRLREG1_REGID4INST, 252);
1487
1488 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1489 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1490 /* VFD_FETCH_INSTR_0_0 */
1491 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1492 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1493 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1494 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1495 /* VFD_FETCH_INSTR_1_0 */
1496 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1497 shadow->quad_vertices_restore.gpuaddr);
1498 /* VFD_FETCH_INSTR_0_1 */
1499 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1500 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1501 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1502 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1503 /* VFD_FETCH_INSTR_1_1 */
1504 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1505 shadow->quad_vertices_restore.gpuaddr + 16);
1506
1507 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1508 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1509 /* VFD_DECODE_INSTR_0 */
1510 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1511 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1512 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1513 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1514 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1516 /* VFD_DECODE_INSTR_1 */
1517 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1518 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1519 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1520 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1521 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1522 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1523
1524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1525 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1526 /* RB_DEPTH_CONTROL */
1527 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1528
1529 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1530 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1531 /* RB_STENCIL_CONTROL */
1532 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1533 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1534 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1535 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1537 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1540
1541 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1542 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1543 /* RB_MODE_CONTROL */
1544 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1545 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1546
1547 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1548 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1549 /* RB_RENDER_CONTROL */
1550 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1551 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1552
1553 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1554 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1555 /* RB_MSAA_CONTROL */
1556 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1557 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1558
1559 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1560 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1561 /* RB_MRT_CONTROL0 */
1562 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1563 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1564 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1565 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1566
1567 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1568 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1569 /* RB_MRT_BLENDCONTROL0 */
1570 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1571 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1572 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1573 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1574 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1576 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1577 /* RB_MRT_CONTROL1 */
1578 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1579 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1580 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1581
1582 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1583 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1584 /* RB_MRT_BLENDCONTROL1 */
1585 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1586 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1587 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1588 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1589 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1590 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1591 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1592 /* RB_MRT_CONTROL2 */
1593 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1594 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1595 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1596
1597 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1598 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1599 /* RB_MRT_BLENDCONTROL2 */
1600 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1601 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1602 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1603 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1604 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1605 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1606 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1607 /* RB_MRT_CONTROL3 */
1608 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1609 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1610 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1611
1612 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1613 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1614 /* RB_MRT_BLENDCONTROL3 */
1615 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1616 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1617 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1618 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1619 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1620 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1621 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1622
1623 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1624 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1625 /* VFD_INDEX_MIN */
1626 *cmds++ = 0x00000000;
1627 /* VFD_INDEX_MAX */
1628 *cmds++ = 0xFFFFFFFF;
1629 /* VFD_INDEX_OFFSET */
1630 *cmds++ = 0x00000000;
1631 /* TPL1_TP_VS_TEX_OFFSET */
1632 *cmds++ = 0x00000000;
1633
1634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1635 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1636 /* VFD_VS_THREADING_THRESHOLD */
1637 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1638 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1639
1640 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1641 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1642 /* TPL1_TP_VS_TEX_OFFSET */
1643 *cmds++ = 0x00000000;
1644
1645 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1646 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1647 /* TPL1_TP_FS_TEX_OFFSET */
1648 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1649 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1650 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1651
1652 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1653 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1654 /* GRAS_SC_CONTROL */
1655 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1656
1657 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1658 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1659 /* GRAS_SU_MODE_CONTROL */
1660 *cmds++ = 0x00000000;
1661
1662 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1663 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1664 /* GRAS_SC_WINDOW_SCISSOR_TL */
1665 *cmds++ = 0x00000000;
1666 /* GRAS_SC_WINDOW_SCISSOR_BR */
1667 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1668 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1669
1670 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1671 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1672 /* GRAS_SC_SCREEN_SCISSOR_TL */
1673 *cmds++ = 0x00000000;
1674 /* GRAS_SC_SCREEN_SCISSOR_BR */
1675 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1676 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1677
1678 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1679 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1680 /* GRAS_CL_VPORT_XOFFSET */
1681 *cmds++ = 0x00000000;
1682 /* GRAS_CL_VPORT_XSCALE */
1683 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1684 /* GRAS_CL_VPORT_YOFFSET */
1685 *cmds++ = 0x00000000;
1686 /* GRAS_CL_VPORT_YSCALE */
1687 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1688
1689 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1690 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1691 /* GRAS_CL_VPORT_ZOFFSET */
1692 *cmds++ = 0x00000000;
1693 /* GRAS_CL_VPORT_ZSCALE */
1694 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1695
1696 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1697 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1698 /* GRAS_CL_CLIP_CNTL */
1699 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1700
1701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1702 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1703 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1704 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1705
1706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1707 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1708 /* PC_PRIM_VTX_CONTROL */
1709 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1710 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1711 PC_DRAW_TRIANGLES) |
1712 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1713 PC_DRAW_TRIANGLES) |
1714 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1715
1716 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1717 *cmds++ = 0x00000000; /* Viz query info */
1718 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1719 PC_DI_SRC_SEL_AUTO_INDEX,
1720 PC_DI_INDEX_SIZE_16_BIT,
1721 PC_DI_IGNORE_VISIBILITY);
1722 *cmds++ = 0x00000002; /* Num indices */
1723
1724 /* Create indirect buffer command for above command sequence */
1725 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1726
1727 return cmds;
1728}
1729
1730static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1731 struct adreno_context *drawctxt)
1732{
1733 unsigned int *start = tmp_ctx.cmd;
1734 unsigned int *cmd = start;
1735 unsigned int *lcc_start;
1736
1737 int i;
1738
1739 /* Flush HLSQ lazy updates */
1740 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1741 *cmd++ = 0x7; /* HLSQ_FLUSH */
1742 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1743 *cmd++ = 0;
1744
1745 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1746 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1747 *cmd++ = (unsigned int)
1748 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1749 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1750 0; /* No end addr for full invalidate */
1751
1752 lcc_start = cmd;
1753
1754 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1755 cmd++;
1756
1757#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1758 /* Force mismatch */
1759 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1760#else
1761 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1762#endif
1763
1764 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1765 cmd = reg_range(cmd, context_register_ranges[i * 2],
1766 context_register_ranges[i * 2 + 1]);
1767 }
1768
1769 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1770 (cmd - lcc_start) - 1);
1771
1772#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1773 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1774#else
1775 lcc_start[2] |= (1 << 24) | (4 << 16);
1776#endif
1777
1778 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1779 *cmd++ = cp_type0_packet(global_registers[i], 1);
1780 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1781 *cmd++ = 0x00000000;
1782 }
1783
1784 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1785 tmp_ctx.cmd = cmd;
1786}
1787
1788static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1789 struct adreno_context *drawctxt)
1790{
1791 unsigned int *cmd = tmp_ctx.cmd;
1792 unsigned int *start = cmd;
1793 unsigned int mode = 4; /* Indirect mode */
1794 unsigned int stateblock;
1795 unsigned int numunits;
1796 unsigned int statetype;
1797
1798 drawctxt->cond_execs[2].hostptr = cmd;
1799 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1800 *cmd++ = 0;
1801 drawctxt->cond_execs[3].hostptr = cmd;
1802 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1803 *cmd++ = 0;
1804
1805#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1806 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1807 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1808 *cmd++ = 4 << 16;
1809 *cmd++ = 0x0;
1810#endif
1811 /* HLSQ full update */
1812 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1813 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1814 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1815
1816#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1817 /* Re-enable shadowing */
1818 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1819 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1820 *cmd++ = (4 << 16) | (1 << 24);
1821 *cmd++ = 0x0;
1822#endif
1823
1824 /* Load vertex shader constants */
1825 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1826 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1827 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1828 *cmd++ = 0x0000ffff;
1829 *cmd++ = 3; /* EXEC_COUNT */
1830 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1831 drawctxt->constant_load_commands[0].hostptr = cmd;
1832 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1833 &drawctxt->gpustate);
1834
1835 /*
1836 From fixup:
1837
1838 mode = 4 (indirect)
1839 stateblock = 4 (Vertex constants)
1840 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1841
1842 From register spec:
1843 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1844
1845 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1846 */
1847
1848 *cmd++ = 0; /* ord1 */
1849 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1850
1851 /* Load fragment shader constants */
1852 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1853 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1854 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1855 *cmd++ = 0x0000ffff;
1856 *cmd++ = 3; /* EXEC_COUNT */
1857 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1858 drawctxt->constant_load_commands[1].hostptr = cmd;
1859 drawctxt->constant_load_commands[1].gpuaddr =
1860 virt2gpu(cmd, &drawctxt->gpustate);
1861 /*
1862 From fixup:
1863
1864 mode = 4 (indirect)
1865 stateblock = 6 (Fragment constants)
1866 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1867
1868 From register spec:
1869 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1870
1871 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1872 */
1873
1874 *cmd++ = 0; /* ord1 */
1875 drawctxt->constant_load_commands[2].hostptr = cmd;
1876 drawctxt->constant_load_commands[2].gpuaddr =
1877 virt2gpu(cmd, &drawctxt->gpustate);
1878 /*
1879 From fixup:
1880 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1881 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1882
1883 From register spec:
1884 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1885 start offset in on chip RAM,
1886 128bit aligned
1887
1888 ord2 = base + offset | 1
1889 Because of the base alignment we can use
1890 ord2 = base | offset | 1
1891 */
1892 *cmd++ = 0; /* ord2 */
1893
1894 /* Restore VS texture memory objects */
1895 stateblock = 0;
1896 statetype = 1;
1897 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1898
1899 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1900 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1901 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1902 & 0xfffffffc) | statetype;
1903
1904 /* Restore VS texture mipmap addresses */
1905 stateblock = 1;
1906 statetype = 1;
1907 numunits = TEX_SIZE_MIPMAP / 4;
1908 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1909 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1910 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1911 & 0xfffffffc) | statetype;
1912
1913 /* Restore VS texture sampler objects */
1914 stateblock = 0;
1915 statetype = 0;
1916 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1917 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1918 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1919 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1920 & 0xfffffffc) | statetype;
1921
1922 /* Restore FS texture memory objects */
1923 stateblock = 2;
1924 statetype = 1;
1925 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1926 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1927 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1928 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1929 & 0xfffffffc) | statetype;
1930
1931 /* Restore FS texture mipmap addresses */
1932 stateblock = 3;
1933 statetype = 1;
1934 numunits = TEX_SIZE_MIPMAP / 4;
1935 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1936 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1937 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1938 & 0xfffffffc) | statetype;
1939
1940 /* Restore FS texture sampler objects */
1941 stateblock = 2;
1942 statetype = 0;
1943 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1944 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1945 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1946 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1947 & 0xfffffffc) | statetype;
1948
1949 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1950 tmp_ctx.cmd = cmd;
1951}
1952
1953static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1954 struct adreno_context *drawctxt)
1955{
1956 unsigned int *cmd = tmp_ctx.cmd;
1957 unsigned int *start = cmd;
1958
1959 /* Vertex shader */
1960 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1961 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1962 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1963 *cmd++ = 1;
1964 *cmd++ = 3; /* EXEC_COUNT */
1965
1966 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1967 drawctxt->shader_load_commands[0].hostptr = cmd;
1968 drawctxt->shader_load_commands[0].gpuaddr =
1969 virt2gpu(cmd, &drawctxt->gpustate);
1970 /*
1971 From fixup:
1972
1973 mode = 4 (indirect)
1974 stateblock = 4 (Vertex shader)
1975 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1976
1977 From regspec:
1978 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1979 If bit31 is 1, it means overflow
1980 or any long shader.
1981
1982 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1983 */
1984 *cmd++ = 0; /*ord1 */
1985 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1986
1987 /* Fragment shader */
1988 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1989 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1990 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1991 *cmd++ = 1;
1992 *cmd++ = 3; /* EXEC_COUNT */
1993
1994 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1995 drawctxt->shader_load_commands[1].hostptr = cmd;
1996 drawctxt->shader_load_commands[1].gpuaddr =
1997 virt2gpu(cmd, &drawctxt->gpustate);
1998 /*
1999 From fixup:
2000
2001 mode = 4 (indirect)
2002 stateblock = 6 (Fragment shader)
2003 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2004
2005 From regspec:
2006 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2007 If bit31 is 1, it means overflow
2008 or any long shader.
2009
2010 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2011 */
2012 *cmd++ = 0; /*ord1 */
2013 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2014 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2015
2016 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2017 tmp_ctx.cmd = cmd;
2018}
2019
2020static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2021 struct adreno_context *drawctxt)
2022{
2023 unsigned int *cmd = tmp_ctx.cmd;
2024 unsigned int *start = cmd;
2025
2026 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2027 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2028 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2029 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2030 = virt2gpu(cmd, &drawctxt->gpustate);
2031 *cmd++ = 0;
2032
2033 /* Create indirect buffer command for above command sequence */
2034 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2035
2036 tmp_ctx.cmd = cmd;
2037}
2038
2039/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2040static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2041 struct adreno_context *drawctxt)
2042{
2043 unsigned int *cmd = tmp_ctx.cmd;
2044 unsigned int *start = cmd;
2045
2046#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2047 /* Save shader sizes */
2048 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2049 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2050 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2051
2052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2053 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2054 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2055
2056 /* Save constant sizes */
2057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2058 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2059 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2060
2061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2062 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2063 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2064
2065 /* Save constant offsets */
2066 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2067 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2068 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2069#else
2070 /* Save shader sizes */
2071 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2072 30, (4 << 19) | (4 << 16),
2073 drawctxt->shader_load_commands[0].gpuaddr);
2074
2075 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2076 30, (6 << 19) | (4 << 16),
2077 drawctxt->shader_load_commands[1].gpuaddr);
2078
2079 /* Save constant sizes */
2080 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2081 23, (4 << 19) | (4 << 16),
2082 drawctxt->constant_load_commands[0].gpuaddr);
2083
2084 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2085 23, (6 << 19) | (4 << 16),
2086 drawctxt->constant_load_commands[1].gpuaddr);
2087
2088 /* Modify constant restore conditionals */
2089 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2090 0, 0, drawctxt->cond_execs[2].gpuaddr);
2091
2092 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2093 0, 0, drawctxt->cond_execs[3].gpuaddr);
2094
2095 /* Save fragment constant shadow offset */
2096 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2097 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2098 drawctxt->constant_load_commands[2].gpuaddr);
2099#endif
2100
2101 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2102 discard all the shader data */
2103
2104 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2105 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2106
2107 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2108
2109 tmp_ctx.cmd = cmd;
2110}
2111
2112static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2113 struct adreno_context *drawctxt)
2114{
2115 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2116
2117 build_regrestore_cmds(adreno_dev, drawctxt);
2118 build_constantrestore_cmds(adreno_dev, drawctxt);
2119 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2120 build_regconstantsave_cmds(adreno_dev, drawctxt);
2121 build_shader_save_cmds(adreno_dev, drawctxt);
2122 build_shader_restore_cmds(adreno_dev, drawctxt);
2123 build_restore_fixup_cmds(adreno_dev, drawctxt);
2124 build_save_fixup_cmds(adreno_dev, drawctxt);
2125
2126 return 0;
2127}
2128
2129/* create buffers for saving/restoring registers, constants, & GMEM */
2130static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2131 struct adreno_context *drawctxt)
2132{
2133 calc_gmemsize(&drawctxt->context_gmem_shadow,
2134 adreno_dev->gmemspace.sizebytes);
2135 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2136
2137 if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) {
2138 int result =
2139 kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2140 drawctxt->pagetable,
2141 drawctxt->context_gmem_shadow.size);
2142
2143 if (result)
2144 return result;
2145 } else {
2146 memset(&drawctxt->context_gmem_shadow.gmemshadow, 0,
2147 sizeof(drawctxt->context_gmem_shadow.gmemshadow));
2148
2149 return 0;
2150 }
2151
2152 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2153 &tmp_ctx.cmd);
2154
2155 /* Dow we need to idle? */
2156 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2157
2158 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2159 &drawctxt->context_gmem_shadow);
2160 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2161 &drawctxt->context_gmem_shadow);
2162
2163 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2164 KGSL_CACHE_OP_FLUSH);
2165
2166 return 0;
2167}
2168
2169static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2170 struct adreno_context *drawctxt)
2171{
2172 int ret;
2173
2174 /*
2175 * Allocate memory for the GPU state and the context commands.
2176 * Despite the name, this is much more then just storage for
2177 * the gpustate. This contains command space for gmem save
2178 * and texture and vertex buffer storage too
2179 */
2180
2181 ret = kgsl_allocate(&drawctxt->gpustate,
2182 drawctxt->pagetable, CONTEXT_SIZE);
2183
2184 if (ret)
2185 return ret;
2186
2187 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2188 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2189
2190 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2191 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2192 if (ret)
2193 goto done;
2194
2195 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2196 }
2197
2198 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2199 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2200
2201done:
2202 if (ret)
2203 kgsl_sharedmem_free(&drawctxt->gpustate);
2204
2205 return ret;
2206}
2207
2208static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2209 struct adreno_context *context)
2210{
2211 struct kgsl_device *device = &adreno_dev->dev;
2212
2213 if (context == NULL)
2214 return;
2215
2216 if (context->flags & CTXT_FLAGS_GPU_HANG)
2217 KGSL_CTXT_WARN(device,
2218 "Current active context has caused gpu hang\n");
2219
2220 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2221 /* Fixup self modifying IBs for save operations */
2222 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2223 context->save_fixup, 3);
2224
2225 /* save registers and constants. */
2226 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2227 context->regconstant_save, 3);
2228
2229 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2230 /* Save shader instructions */
2231 adreno_ringbuffer_issuecmds(device,
2232 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2233
2234 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2235 }
2236 }
2237
2238 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2239 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2240 /*
2241 * Save GMEM (note: changes shader. shader must
2242 * already be saved.)
2243 */
2244
2245 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2246 context->context_gmem_shadow.
2247 gmem_save, 3);
2248 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2249 }
2250}
2251
2252static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2253 struct adreno_context *context)
2254{
2255 struct kgsl_device *device = &adreno_dev->dev;
2256 unsigned int cmds[5];
2257
2258 if (context == NULL) {
2259 /* No context - set the default pagetable and thats it */
2260 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2261 return;
2262 }
2263
2264 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2265
2266 cmds[0] = cp_nop_packet(1);
2267 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2268 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2269 cmds[3] = device->memstore.gpuaddr +
2270 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2271 cmds[4] = (unsigned int)context;
2272 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2273 kgsl_mmu_setstate(device, context->pagetable);
2274
2275 /*
2276 * Restore GMEM. (note: changes shader.
2277 * Shader must not already be restored.)
2278 */
2279
2280 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2281 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2282 context->context_gmem_shadow.
2283 gmem_restore, 3);
2284 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2285 }
2286
2287 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2288 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2289 context->reg_restore, 3);
2290
2291 /* Fixup self modifying IBs for restore operations */
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->restore_fixup, 3);
2294
2295 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2296 context->constant_restore, 3);
2297
2298 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2299 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2300 context->shader_restore, 3);
2301
2302 /* Restore HLSQ_CONTROL_0 register */
2303 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2304 context->hlsqcontrol_restore, 3);
2305 }
2306}
2307
2308static void a3xx_rb_init(struct adreno_device *adreno_dev,
2309 struct adreno_ringbuffer *rb)
2310{
2311 unsigned int *cmds, cmds_gpu;
2312 cmds = adreno_ringbuffer_allocspace(rb, 18);
2313 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2314
2315 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 /* Protected mode control - turned off for A3XX */
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2334
2335 adreno_ringbuffer_submit(rb);
2336}
2337
2338static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2339{
2340 struct kgsl_device *device = &adreno_dev->dev;
2341 const char *err = "";
2342
2343 switch (bit) {
2344 case A3XX_INT_RBBM_AHB_ERROR: {
2345 unsigned int reg;
2346
2347 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2348
2349 /*
2350 * Return the word address of the erroring register so that it
2351 * matches the register specification
2352 */
2353
2354 KGSL_DRV_CRIT(device,
2355 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2356 reg & (1 << 28) ? "WRITE" : "READ",
2357 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2358 (reg >> 24) & 0x3);
2359
2360 /* Clear the error */
2361 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2362 return;
2363 }
2364 case A3XX_INT_RBBM_REG_TIMEOUT:
2365 err = "RBBM: AHB register timeout";
2366 break;
2367 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2368 err = "RBBM: ME master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2371 err = "RBBM: PFP master split timeout";
2372 break;
2373 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2374 err = "RBBM: ATB bus oveflow";
2375 break;
2376 case A3XX_INT_VFD_ERROR:
2377 err = "VFD: Out of bounds access";
2378 break;
2379 case A3XX_INT_CP_T0_PACKET_IN_IB:
2380 err = "ringbuffer TO packet in IB interrupt";
2381 break;
2382 case A3XX_INT_CP_OPCODE_ERROR:
2383 err = "ringbuffer opcode error interrupt";
2384 break;
2385 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2386 err = "ringbuffer reserved bit error interrupt";
2387 break;
2388 case A3XX_INT_CP_HW_FAULT:
2389 err = "ringbuffer hardware fault";
2390 break;
2391 case A3XX_INT_CP_REG_PROTECT_FAULT:
2392 err = "ringbuffer protected mode error interrupt";
2393 break;
2394 case A3XX_INT_CP_AHB_ERROR_HALT:
2395 err = "ringbuffer AHB error interrupt";
2396 break;
2397 case A3XX_INT_MISC_HANG_DETECT:
2398 err = "MISC: GPU hang detected";
2399 break;
2400 case A3XX_INT_UCHE_OOB_ACCESS:
2401 err = "UCHE: Out of bounds access";
2402 break;
2403 }
2404
2405 KGSL_DRV_CRIT(device, "%s\n", err);
2406 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2407}
2408
2409static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2410{
2411 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2412
2413 if (irq == A3XX_INT_CP_RB_INT) {
2414 kgsl_sharedmem_writel(&rb->device->memstore,
2415 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2416 wmb();
2417 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2418 }
2419
2420 wake_up_interruptible_all(&rb->device->wait_queue);
2421
2422 /* Schedule work to free mem and issue ibs */
2423 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2424
2425 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2426 rb->device->id, NULL);
2427}
2428
2429#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2430
2431#define A3XX_INT_MASK \
2432 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2433 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
2434 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \
2435 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \
2436 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
2437 (1 << A3XX_INT_VFD_ERROR) | \
2438 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2439 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2440 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2441 (1 << A3XX_INT_CP_HW_FAULT) | \
2442 (1 << A3XX_INT_CP_IB1_INT) | \
2443 (1 << A3XX_INT_CP_IB2_INT) | \
2444 (1 << A3XX_INT_CP_RB_INT) | \
2445 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2446 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
2447 (1 << A3XX_INT_MISC_HANG_DETECT) | \
2448 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2449
2450static struct {
2451 void (*func)(struct adreno_device *, int);
2452} a3xx_irq_funcs[] = {
2453 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2460 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2461 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2462 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2464 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2465 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2466 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2467 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2468 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2469 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2470 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2471 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2472 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2473 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2474 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2475 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2476 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
2477 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */
2478 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2479 /* 26 to 31 - Unused */
2480};
2481
2482static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2483{
2484 struct kgsl_device *device = &adreno_dev->dev;
2485 irqreturn_t ret = IRQ_NONE;
2486 unsigned int status, tmp;
2487 int i;
2488
2489 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2490
2491 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2492 if (tmp & 1) {
2493 if (a3xx_irq_funcs[i].func != NULL) {
2494 a3xx_irq_funcs[i].func(adreno_dev, i);
2495 ret = IRQ_HANDLED;
2496 } else {
2497 KGSL_DRV_CRIT(device,
2498 "Unhandled interrupt bit %x\n", i);
2499 }
2500 }
2501
2502 tmp >>= 1;
2503 }
2504
2505 if (status)
2506 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2507 status);
2508 return ret;
2509}
2510
2511static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2512{
2513 struct kgsl_device *device = &adreno_dev->dev;
2514
2515 if (state)
2516 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2517 else
2518 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2519}
2520
2521static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2522{
2523 struct kgsl_device *device = &adreno_dev->dev;
2524 unsigned int reg, val;
2525
2526 /* Freeze the counter */
2527 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2528 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2529 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2530
2531 /* Read the value */
2532 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2533
2534 /* Reset the counter */
2535 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2536 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2537
2538 /* Re-enable the counter */
2539 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2540 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2541 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2542
2543 return val;
2544}
2545
2546static void a3xx_start(struct adreno_device *adreno_dev)
2547{
2548 struct kgsl_device *device = &adreno_dev->dev;
2549
2550 /* Reset the core */
2551 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2552 0x00000001);
2553 msleep(20);
2554
2555 /*
2556 * enable fixed master AXI port of 0x0 for all clients to keep
2557 * traffic from going to random places
2558 */
2559
2560 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2561 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2562 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2563
2564 /* Make all blocks contribute to the GPU BUSY perf counter */
2565 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2566
2567 /* Enable the RBBM error reporting bits. This lets us get
2568 useful information on failure */
2569
2570 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2571
2572 /* Enable AHB error reporting */
2573 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
2574
2575 /* Turn on the power counters */
2576 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
2577}
2578
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002579/* Defined in adreno_a3xx_snapshot.c */
2580void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2581 int *remain, int hang);
2582
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002583struct adreno_gpudev adreno_a3xx_gpudev = {
2584 .reg_rbbm_status = A3XX_RBBM_STATUS,
2585 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2586 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2587
2588 .ctxt_create = a3xx_drawctxt_create,
2589 .ctxt_save = a3xx_drawctxt_save,
2590 .ctxt_restore = a3xx_drawctxt_restore,
2591 .rb_init = a3xx_rb_init,
2592 .irq_control = a3xx_irq_control,
2593 .irq_handler = a3xx_irq_handler,
2594 .busy_cycles = a3xx_busy_cycles,
2595 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002596 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002597};