blob: 14d4e0aa4376e723bb5a992e84b1913f58ba1db1 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070037 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070038 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
62 0x2750, 0x2756, 0x2760, 0x2760,
63};
64
65const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
66
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070067/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
68 * functions.
69 */
70
71#define _SET(_shift, _val) ((_val) << (_shift))
72
73/*
74 ****************************************************************************
75 *
76 * Context state shadow structure:
77 *
78 * +---------------------+------------+-------------+---------------------+---+
79 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
80 * +---------------------+------------+-------------+---------------------+---+
81 *
82 * 8K - ALU Constant Shadow (8K aligned)
83 * 4K - H/W Register Shadow (8K aligned)
84 * 5K - Command and Vertex Buffers
85 * 8K - Shader Instruction Shadow
86 * ~6K - Texture Constant Shadow
87 *
88 *
89 ***************************************************************************
90 */
91
92/* Sizes of all sections in state shadow memory */
93#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
94#define REG_SHADOW_SIZE (4*1024) /* 4KB */
95#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
96#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
97#define TEX_SIZE_MIPMAP 1936 /* bytes */
98#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
99#define TEX_SHADOW_SIZE \
100 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
101 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
102#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
103
104/* Total context size, excluding GMEM shadow */
105#define CONTEXT_SIZE \
106 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
107 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
108 TEX_SHADOW_SIZE)
109
110/* Offsets to different sections in context shadow memory */
111#define REG_OFFSET ALU_SHADOW_SIZE
112#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
113#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
114#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
115#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
116#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
117#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
118#define FS_TEX_OFFSET_MEM_OBJECTS \
119 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
120#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122
123/* The offset for fragment shader data in HLSQ context */
124#define SSIZE (16*1024)
125
126#define HLSQ_SAMPLER_OFFSET 0x000
127#define HLSQ_MEMOBJ_OFFSET 0x400
128#define HLSQ_MIPMAP_OFFSET 0x800
129
130#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM
131/* Use shadow RAM */
132#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
133#else
134/* Use working RAM */
135#define HLSQ_SHADOW_BASE 0x10000
136#endif
137
138#define REG_TO_MEM_LOOP_COUNT_SHIFT 15
139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
261 unsigned int *start = cmd;
262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
269 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
270 *cmd++ = 0;
271
272#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
273 /*
274 * Context registers are already shadowed; just need to
275 * disable shadowing to prevent corruption.
276 */
277
278 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
279 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
280 *cmd++ = 4 << 16; /* regs, start=0 */
281 *cmd++ = 0x0; /* count = 0 */
282
283#else
284 /*
285 * Make sure the HW context has the correct register values before
286 * reading them.
287 */
288
289 /* Write context registers into shadow */
290 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
291 unsigned int start = context_register_ranges[i * 2];
292 unsigned int end = context_register_ranges[i * 2 + 1];
293 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
294 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
295 start;
296 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
297 & 0xFFFFE000) + (start - 0x2000) * 4;
298 }
299#endif
300
301 /* Need to handle some of the global registers separately */
302 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
303 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
304 *cmd++ = global_registers[i];
305 *cmd++ = tmp_ctx.reg_values[i];
306 }
307
308 /* Save vertex shader constants */
309 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
310 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
311 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
312 *cmd++ = 0x0000FFFF;
313 *cmd++ = 3; /* EXEC_COUNT */
314 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
315 drawctxt->constant_save_commands[1].hostptr = cmd;
316 drawctxt->constant_save_commands[1].gpuaddr =
317 virt2gpu(cmd, &drawctxt->gpustate);
318 /*
319 From fixup:
320
321 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
322 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
323
324 From register spec:
325 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
326 */
327 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
328 /* ALU constant shadow base */
329 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
330
331 /* Save fragment shader constants */
332 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
333 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
334 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
335 *cmd++ = 0x0000FFFF;
336 *cmd++ = 3; /* EXEC_COUNT */
337 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
338 drawctxt->constant_save_commands[2].hostptr = cmd;
339 drawctxt->constant_save_commands[2].gpuaddr =
340 virt2gpu(cmd, &drawctxt->gpustate);
341 /*
342 From fixup:
343
344 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
345 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
346
347 From register spec:
348 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
349 */
350 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
351
352 /*
353 From fixup:
354
355 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
356 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
357
358 From register spec:
359 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
360 start offset in on chip RAM,
361 128bit aligned
362
363 dst = base + offset
364 Because of the base alignment we can use
365 dst = base | offset
366 */
367 *cmd++ = 0; /* dst */
368
369 /* Save VS texture memory objects */
370 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
371 *cmd++ =
372 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
373 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
374 *cmd++ =
375 (drawctxt->gpustate.gpuaddr +
376 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
377
378 /* Save VS texture mipmap pointers */
379 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
380 *cmd++ =
381 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
382 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
383 *cmd++ =
384 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
385
386 /* Save VS texture sampler objects */
387 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
388 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
389 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
390 *cmd++ =
391 (drawctxt->gpustate.gpuaddr +
392 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
393
394 /* Save FS texture memory objects */
395 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
396 *cmd++ =
397 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
398 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
399 *cmd++ =
400 (drawctxt->gpustate.gpuaddr +
401 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
402
403 /* Save FS texture mipmap pointers */
404 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
405 *cmd++ =
406 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
407 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
408 *cmd++ =
409 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
410
411 /* Save FS texture sampler objects */
412 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
413 *cmd++ =
414 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
415 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
416 *cmd++ =
417 (drawctxt->gpustate.gpuaddr +
418 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
419
420 /* Create indirect buffer command for above command sequence */
421 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
422
423 tmp_ctx.cmd = cmd;
424}
425
426/* Copy GMEM contents to system memory shadow. */
427static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
428 struct adreno_context *drawctxt,
429 struct gmem_shadow_t *shadow)
430{
431 unsigned int *cmds = tmp_ctx.cmd;
432 unsigned int *start = cmds;
433
434 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
435 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
436
437 /* RB_MODE_CONTROL */
438 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
439 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
440 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
441 /* RB_RENDER_CONTROL */
442 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
443 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
444
445 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
446 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
447 /* RB_COPY_CONTROL */
448 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
449 RB_CLEAR_MODE_RESOLVE) |
450 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
451 tmp_ctx.gmem_base >> 14);
452 /* RB_COPY_DEST_BASE */
453 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
454 shadow->gmemshadow.gpuaddr >> 5);
455 /* RB_COPY_DEST_PITCH */
456 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
457 (shadow->pitch * 4) / 32);
458 /* RB_COPY_DEST_INFO */
459 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
460 RB_TILINGMODE_LINEAR) |
461 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
462 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
463 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
464
465 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
466 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
467 /* GRAS_SC_CONTROL */
468 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
471 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
472 /* VFD_CONTROL_0 */
473 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
474 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
475 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
476 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
477 /* VFD_CONTROL_1 */
478 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
479 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
480 _SET(VFD_CTRLREG1_REGID4INST, 252);
481
482 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
483 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
484 /* VFD_FETCH_INSTR_0_0 */
485 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
486 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
487 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
488 /* VFD_FETCH_INSTR_1_0 */
489 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
490 shadow->quad_vertices.gpuaddr);
491
492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
493 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
494 /* VFD_DECODE_INSTR_0 */
495 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
496 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
497 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
498 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
499 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
500 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
501
502 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
503 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
504 /* HLSQ_CONTROL_0_REG */
505 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
506 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
507 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
508 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
509 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
510 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
511 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
513 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
514 /* HLSQ_CONTROL_1_REG */
515 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
516 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
517 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
518 /* HLSQ_CONTROL_2_REG */
519 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
520 /* HLSQ_CONTROL_3_REG */
521 *cmds++ = 0x00000000;
522
523 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
524 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
525 /* HLSQ_VS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
527 /* HLSQ_FS_CONTROL_REG */
528 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
529 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
530 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
531 /* HLSQ_CONST_VSPRESV_RANGE_REG */
532 *cmds++ = 0x00000000;
533 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
534 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
535 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
536
537 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
538 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
539 /* SP_FS_LENGTH_REG */
540 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
541
542 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
543 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
544 /* SP_SP_CTRL_REG */
545 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
546 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
547
548 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
549 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
550 /* SP_VS_CTRL_REG0 */
551 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
552 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
553 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
554 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
555 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
556 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
557 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
558 /* SP_VS_CTRL_REG1 */
559 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
560 /* SP_VS_PARAM_REG */
561 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
562 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
563 /* SP_VS_OUT_REG_0 */
564 *cmds++ = 0x00000000;
565 /* SP_VS_OUT_REG_1 */
566 *cmds++ = 0x00000000;
567 /* SP_VS_OUT_REG_2 */
568 *cmds++ = 0x00000000;
569 /* SP_VS_OUT_REG_3 */
570 *cmds++ = 0x00000000;
571 /* SP_VS_OUT_REG_4 */
572 *cmds++ = 0x00000000;
573 /* SP_VS_OUT_REG_5 */
574 *cmds++ = 0x00000000;
575 /* SP_VS_OUT_REG_6 */
576 *cmds++ = 0x00000000;
577 /* SP_VS_OUT_REG_7 */
578 *cmds++ = 0x00000000;
579
580 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
581 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
582 /* SP_VS_VPC_DST_REG_0 */
583 *cmds++ = 0x00000000;
584 /* SP_VS_VPC_DST_REG_1 */
585 *cmds++ = 0x00000000;
586 /* SP_VS_VPC_DST_REG_2 */
587 *cmds++ = 0x00000000;
588 /* SP_VS_VPC_DST_REG_3 */
589 *cmds++ = 0x00000000;
590 /* SP_VS_OBJ_OFFSET_REG */
591 *cmds++ = 0x00000000;
592 /* SP_VS_OBJ_START_REG */
593 *cmds++ = 0x00000000;
594
595 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
596 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
597 /* SP_VS_LENGTH_REG */
598 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
599 /* SP_FS_CTRL_REG0 */
600 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
601 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
602 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
603 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
604 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
605 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
606 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
607 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
608 /* SP_FS_CTRL_REG1 */
609 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
610 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
611 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
612 /* SP_FS_OBJ_OFFSET_REG */
613 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
614 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
615 /* SP_FS_OBJ_START_REG */
616 *cmds++ = 0x00000000;
617
618 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
619 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
620 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
621 *cmds++ = 0x00000000;
622 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
623 *cmds++ = 0x00000000;
624
625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
626 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
627 /* SP_FS_OUTPUT_REG */
628 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
629
630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
631 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
632 /* SP_FS_MRT_REG_0 */
633 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
634 /* SP_FS_MRT_REG_1 */
635 *cmds++ = 0x00000000;
636 /* SP_FS_MRT_REG_2 */
637 *cmds++ = 0x00000000;
638 /* SP_FS_MRT_REG_3 */
639 *cmds++ = 0x00000000;
640
641 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
642 *cmds++ = CP_REG(A3XX_VPC_ATTR);
643 /* VPC_ATTR */
644 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
645 _SET(VPC_VPCATTR_LMSIZE, 1);
646 /* VPC_PACK */
647 *cmds++ = 0x00000000;
648 /* VPC_VARRYING_INTERUPT_MODE_0 */
649 *cmds++ = 0x00000000;
650 /* VPC_VARRYING_INTERUPT_MODE_1 */
651 *cmds++ = 0x00000000;
652 /* VPC_VARRYING_INTERUPT_MODE_2 */
653 *cmds++ = 0x00000000;
654 /* VPC_VARRYING_INTERUPT_MODE_3 */
655 *cmds++ = 0x00000000;
656 /* VPC_VARYING_PS_REPL_MODE_0 */
657 *cmds++ = 0x00000000;
658 /* VPC_VARYING_PS_REPL_MODE_1 */
659 *cmds++ = 0x00000000;
660 /* VPC_VARYING_PS_REPL_MODE_2 */
661 *cmds++ = 0x00000000;
662 /* VPC_VARYING_PS_REPL_MODE_3 */
663 *cmds++ = 0x00000000;
664
665 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
666 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
667 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
668 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
669 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
670 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
671 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
672
673 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
674 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
675 /* end; */
676 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
677 /* nop; */
678 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
679 /* nop; */
680 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
681
682 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
683 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
684 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
685 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
686 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
687 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
688 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
689
690 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
691 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
692 /* end; */
693 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
694 /* nop; */
695 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
696 /* nop; */
697 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
698
699 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
700 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
701 /* RB_MSAA_CONTROL */
702 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
703 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
704
705 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
706 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
707 /* RB_DEPTH_CONTROL */
708 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
709
710 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
711 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
712 /* RB_MRT_CONTROL0 */
713 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
714 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
715 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
716 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
719 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
720 /* RB_MRT_BLEND_CONTROL0 */
721 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
723 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
724 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
725 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
726 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
727 /* RB_MRT_CONTROL1 */
728 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
729 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
730 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
731
732 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
733 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
734 /* RB_MRT_BLEND_CONTROL1 */
735 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
736 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
737 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
738 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
739 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
740 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
741 /* RB_MRT_CONTROL2 */
742 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
743 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
744 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
745
746 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
747 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
748 /* RB_MRT_BLEND_CONTROL2 */
749 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
750 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
751 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
752 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
753 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
755 /* RB_MRT_CONTROL3 */
756 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
757 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
758 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
759
760 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
761 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
762 /* RB_MRT_BLEND_CONTROL3 */
763 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
764 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
765 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
766 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
767 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
769
770 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
771 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
772 /* VFD_INDEX_MIN */
773 *cmds++ = 0x00000000;
774 /* VFD_INDEX_MAX */
775 *cmds++ = 0xFFFFFFFF;
776 /* VFD_INSTANCEID_OFFSET */
777 *cmds++ = 0x00000000;
778 /* VFD_INDEX_OFFSET */
779 *cmds++ = 0x00000000;
780
781 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
782 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
783 /* VFD_VS_THREADING_THRESHOLD */
784 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
785 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
786
787 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
788 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
789 /* TPL1_TP_VS_TEX_OFFSET */
790 *cmds++ = 0;
791
792 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
793 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
794 /* TPL1_TP_FS_TEX_OFFSET */
795 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
796 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
797 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
798
799 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
800 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
801 /* PC_PRIM_VTX_CNTL */
802 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
803 PC_DRAW_TRIANGLES) |
804 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
805 PC_DRAW_TRIANGLES) |
806 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
807
808 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
809 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
810 /* GRAS_SC_WINDOW_SCISSOR_TL */
811 *cmds++ = 0x00000000;
812 /* GRAS_SC_WINDOW_SCISSOR_BR */
813 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
814 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
815
816 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
817 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
818 /* GRAS_SC_SCREEN_SCISSOR_TL */
819 *cmds++ = 0x00000000;
820 /* GRAS_SC_SCREEN_SCISSOR_BR */
821 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
822 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
823
824 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
825 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
826 /* GRAS_CL_VPORT_XOFFSET */
827 *cmds++ = 0x00000000;
828 /* GRAS_CL_VPORT_XSCALE */
829 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
830 /* GRAS_CL_VPORT_YOFFSET */
831 *cmds++ = 0x00000000;
832 /* GRAS_CL_VPORT_YSCALE */
833 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
834
835 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
836 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
837 /* GRAS_CL_VPORT_ZOFFSET */
838 *cmds++ = 0x00000000;
839 /* GRAS_CL_VPORT_ZSCALE */
840 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
841
842 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
843 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
844 /* GRAS_CL_CLIP_CNTL */
845 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
850
851 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
852 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
853 /* GRAS_CL_GB_CLIP_ADJ */
854 *cmds++ = 0x00000000;
855
856 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
857 *cmds++ = 0x00000000;
858
859 /*
860 * Resolve using two draw calls with a dummy register
861 * write in between. This is a HLM workaround
862 * that should be removed later.
863 */
864 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
865 *cmds++ = 0x00000000; /* Viz query info */
866 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
867 PC_DI_SRC_SEL_IMMEDIATE,
868 PC_DI_INDEX_SIZE_32_BIT,
869 PC_DI_IGNORE_VISIBILITY);
870 *cmds++ = 0x00000003; /* Num indices */
871 *cmds++ = 0x00000000; /* Index 0 */
872 *cmds++ = 0x00000001; /* Index 1 */
873 *cmds++ = 0x00000002; /* Index 2 */
874
875 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
876 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
877 *cmds++ = 0x00000000;
878
879 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
880 *cmds++ = 0x00000000; /* Viz query info */
881 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
882 PC_DI_SRC_SEL_IMMEDIATE,
883 PC_DI_INDEX_SIZE_32_BIT,
884 PC_DI_IGNORE_VISIBILITY);
885 *cmds++ = 0x00000003; /* Num indices */
886 *cmds++ = 0x00000002; /* Index 0 */
887 *cmds++ = 0x00000001; /* Index 1 */
888 *cmds++ = 0x00000003; /* Index 2 */
889
890 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
891 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
892 *cmds++ = 0x00000000;
893
894 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
895 *cmds++ = 0x00000000;
896
897 /* Create indirect buffer command for above command sequence */
898 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
899
900 return cmds;
901}
902
903static void build_shader_save_cmds(struct adreno_device *adreno_dev,
904 struct adreno_context *drawctxt)
905{
906 unsigned int *cmd = tmp_ctx.cmd;
907 unsigned int *start;
908
909 /* Reserve space for boolean values used for COND_EXEC packet */
910 drawctxt->cond_execs[0].hostptr = cmd;
911 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
912 *cmd++ = 0;
913 drawctxt->cond_execs[1].hostptr = cmd;
914 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
915 *cmd++ = 0;
916
917 drawctxt->shader_save_commands[0].hostptr = cmd;
918 drawctxt->shader_save_commands[0].gpuaddr =
919 virt2gpu(cmd, &drawctxt->gpustate);
920 *cmd++ = 0;
921 drawctxt->shader_save_commands[1].hostptr = cmd;
922 drawctxt->shader_save_commands[1].gpuaddr =
923 virt2gpu(cmd, &drawctxt->gpustate);
924 *cmd++ = 0;
925
926 start = cmd;
927
928 /* Save vertex shader */
929
930 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
931 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
932 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
933 *cmd++ = 0x0000FFFF;
934 *cmd++ = 3; /* EXEC_COUNT */
935
936 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
937 drawctxt->shader_save_commands[2].hostptr = cmd;
938 drawctxt->shader_save_commands[2].gpuaddr =
939 virt2gpu(cmd, &drawctxt->gpustate);
940 /*
941 From fixup:
942
943 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
944
945 From regspec:
946 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
947 If bit31 is 1, it means overflow
948 or any long shader.
949
950 src = (HLSQ_SHADOW_BASE + 0x1000)/4
951 */
952 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
953 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
954
955 /* Save fragment shader */
956 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
957 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
958 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
959 *cmd++ = 0x0000FFFF;
960 *cmd++ = 3; /* EXEC_COUNT */
961
962 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
963 drawctxt->shader_save_commands[3].hostptr = cmd;
964 drawctxt->shader_save_commands[3].gpuaddr =
965 virt2gpu(cmd, &drawctxt->gpustate);
966 /*
967 From fixup:
968
969 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
970
971 From regspec:
972 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
973 If bit31 is 1, it means overflow
974 or any long shader.
975
976 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
977 From regspec:
978
979 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
980 First instruction of the whole shader will be stored from
981 the offset in instruction cache, unit = 256bits, a cache line.
982 It can start from 0 if no VS available.
983
984 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
985 */
986 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
987 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
988 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
989
990 /* Create indirect buffer command for above command sequence */
991 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
992
993 tmp_ctx.cmd = cmd;
994}
995
996/*
997 * Make an IB to modify context save IBs with the correct shader instruction
998 * and constant sizes and offsets.
999 */
1000
1001static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1002 struct adreno_context *drawctxt)
1003{
1004 unsigned int *cmd = tmp_ctx.cmd;
1005 unsigned int *start = cmd;
1006
1007 /* Flush HLSQ lazy updates */
1008 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1009 *cmd++ = 0x7; /* HLSQ_FLUSH */
1010 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1011 *cmd++ = 0;
1012
1013 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1014 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1015 *cmd++ = (unsigned int)
1016 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1017 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1018 0; /* No end addr for full invalidate */
1019
1020 /* Make sure registers are flushed */
1021 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1022 *cmd++ = 0;
1023
1024#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1025
1026 /* Save shader sizes */
1027 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1028 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1029 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1033 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1034
1035 /* Save shader offsets */
1036 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1037 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1038 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1039
1040 /* Save constant sizes */
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1043 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1044 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1045 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1046 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1047
1048 /* Save FS constant offset */
1049 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1050 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1051 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1052
1053
1054 /* Save VS instruction store mode */
1055 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1056 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1057 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1058
1059 /* Save FS instruction store mode */
1060 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1061 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1062 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1063#else
1064
1065 /* Shader save */
1066 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1067 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1068 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1069 drawctxt->shader_save_commands[2].gpuaddr);
1070
1071 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1072 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1073 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1074 *cmd++ = 0x00000000; /* AND value */
1075 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1076 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1077 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1078 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1079 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1080 A3XX_CP_SCRATCH_REG2;
1081 *cmd++ = 0x7f000000; /* AND value */
1082 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1083
1084 /*
1085 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1086 * SP_FS_OBJ_OFFSET_REG
1087 */
1088
1089 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1090 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1091 *cmd++ = 0x00000000; /* AND value */
1092 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1093 /*
1094 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1095 * 0x00000000
1096 */
1097 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1098 *cmd++ = A3XX_CP_SCRATCH_REG3;
1099 *cmd++ = 0xfe000000; /* AND value */
1100 *cmd++ = 0x00000000; /* OR value */
1101 /*
1102 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1103 */
1104 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1105 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1106 *cmd++ = 0xffffffff; /* AND value */
1107 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1108
1109 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1110 *cmd++ = A3XX_CP_SCRATCH_REG2;
1111 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1112
1113 /* Constant save */
1114 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1115 17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
1116 drawctxt->constant_save_commands[1].gpuaddr);
1117
1118 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1119 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
1120 drawctxt->constant_save_commands[2].gpuaddr);
1121
1122 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1123 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1124 drawctxt->constant_save_commands[2].gpuaddr
1125 + sizeof(unsigned int));
1126
1127 /* Modify constant save conditionals */
1128 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1129 0, 0, drawctxt->cond_execs[2].gpuaddr);
1130
1131 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1132 0, 0, drawctxt->cond_execs[3].gpuaddr);
1133
1134 /* Save VS instruction store mode */
1135
1136 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1137 31, 0, drawctxt->cond_execs[0].gpuaddr);
1138
1139 /* Save FS instruction store mode */
1140 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1141 31, 0, drawctxt->cond_execs[1].gpuaddr);
1142
1143#endif
1144
1145 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1146
1147 tmp_ctx.cmd = cmd;
1148}
1149
1150/****************************************************************************/
1151/* Functions to build context restore IBs */
1152/****************************************************************************/
1153
1154static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1155 struct adreno_context *drawctxt,
1156 struct gmem_shadow_t *shadow)
1157{
1158 unsigned int *cmds = tmp_ctx.cmd;
1159 unsigned int *start = cmds;
1160
1161 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1162 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1163 /* HLSQ_CONTROL_0_REG */
1164 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1165 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1166 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1167 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1168 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1169 /* HLSQ_CONTROL_1_REG */
1170 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1171 /* HLSQ_CONTROL_2_REG */
1172 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1173 /* HLSQ_CONTROL3_REG */
1174 *cmds++ = 0x00000000;
1175
1176 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1177 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1178 /* RB_MRT_BUF_INFO0 */
1179 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1180 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1181 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1182 (shadow->gmem_pitch * 4 * 8) / 256);
1183 /* RB_MRT_BUF_BASE0 */
1184 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1185
1186 /* Texture samplers */
1187 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1188 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1189 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1190 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1191 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1192 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1193 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1194 *cmds++ = 0x00000240;
1195 *cmds++ = 0x00000000;
1196
1197 /* Texture memobjs */
1198 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1199 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1200 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1201 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1202 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1203 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1204 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1205 *cmds++ = 0x4cc06880;
1206 *cmds++ = shadow->height | (shadow->width << 14);
1207 *cmds++ = (shadow->pitch*4*8) << 9;
1208 *cmds++ = 0x00000000;
1209
1210 /* Mipmap bases */
1211 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1212 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1213 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1214 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1215 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1216 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1217 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1218 *cmds++ = shadow->gmemshadow.gpuaddr;
1219 *cmds++ = 0x00000000;
1220 *cmds++ = 0x00000000;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232
1233 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1234 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1235 /* HLSQ_VS_CONTROL_REG */
1236 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1237 /* HLSQ_FS_CONTROL_REG */
1238 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1239 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1240 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1241 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1242 *cmds++ = 0x00000000;
1243 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1244 *cmds++ = 0x00000000;
1245
1246 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1247 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1248 /* SP_FS_LENGTH_REG */
1249 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1250
1251 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1252 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1253 /* SP_VS_CTRL_REG0 */
1254 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1255 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1256 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1257 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1258 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1259 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1260 /* SP_VS_CTRL_REG1 */
1261 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1262 /* SP_VS_PARAM_REG */
1263 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1264 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1265 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1266 /* SP_VS_OUT_REG0 */
1267 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1268 /* SP_VS_OUT_REG1 */
1269 *cmds++ = 0x00000000;
1270 /* SP_VS_OUT_REG2 */
1271 *cmds++ = 0x00000000;
1272 /* SP_VS_OUT_REG3 */
1273 *cmds++ = 0x00000000;
1274 /* SP_VS_OUT_REG4 */
1275 *cmds++ = 0x00000000;
1276 /* SP_VS_OUT_REG5 */
1277 *cmds++ = 0x00000000;
1278 /* SP_VS_OUT_REG6 */
1279 *cmds++ = 0x00000000;
1280 /* SP_VS_OUT_REG7 */
1281 *cmds++ = 0x00000000;
1282
1283 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1284 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1285 /* SP_VS_VPC_DST_REG0 */
1286 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1287 /* SP_VS_VPC_DST_REG1 */
1288 *cmds++ = 0x00000000;
1289 /* SP_VS_VPC_DST_REG2 */
1290 *cmds++ = 0x00000000;
1291 /* SP_VS_VPC_DST_REG3 */
1292 *cmds++ = 0x00000000;
1293 /* SP_VS_OBJ_OFFSET_REG */
1294 *cmds++ = 0x00000000;
1295 /* SP_VS_OBJ_START_REG */
1296 *cmds++ = 0x00000000;
1297
1298 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1299 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1300 /* SP_VS_LENGTH_REG */
1301 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1302 /* SP_FS_CTRL_REG0 */
1303 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1304 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1305 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1306 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1307 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1308 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1309 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1310 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1311 /* SP_FS_CTRL_REG1 */
1312 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1313 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1314 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1315 /* SP_FS_OBJ_OFFSET_REG */
1316 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128);
1317 /* SP_FS_OBJ_START_REG */
1318 *cmds++ = 0x00000000;
1319
1320 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1321 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1322 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1323 *cmds++ = 0x00000000;
1324 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1325 *cmds++ = 0x00000000;
1326
1327 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1328 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1329 /* SP_FS_OUT_REG */
1330 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1331
1332 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1333 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1334 /* SP_FS_MRT_REG0 */
1335 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1336 /* SP_FS_MRT_REG1 */
1337 *cmds++ = 0;
1338 /* SP_FS_MRT_REG2 */
1339 *cmds++ = 0;
1340 /* SP_FS_MRT_REG3 */
1341 *cmds++ = 0;
1342
1343 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1344 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1345 /* VPC_ATTR */
1346 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1347 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1348 _SET(VPC_VPCATTR_LMSIZE, 1);
1349 /* VPC_PACK */
1350 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1351 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1352 /* VPC_VARYING_INTERP_MODE_0 */
1353 *cmds++ = 0x00000000;
1354 /* VPC_VARYING_INTERP_MODE1 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_INTERP_MODE2 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_IINTERP_MODE3 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARRYING_PS_REPL_MODE_0 */
1361 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1362 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1363 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1377 /* VPC_VARRYING_PS_REPL_MODE_1 */
1378 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1394 /* VPC_VARRYING_PS_REPL_MODE_2 */
1395 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1411 /* VPC_VARRYING_PS_REPL_MODE_3 */
1412 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1428
1429 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1430 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1431 /* SP_SP_CTRL_REG */
1432 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1433
1434 /* Load vertex shader */
1435 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1436 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1437 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1438 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1439 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1440 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1441 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1442 /* (sy)end; */
1443 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1444 /* nop; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450
1451 /* Load fragment shader */
1452 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1453 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1454 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1455 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1456 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1457 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1458 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1459 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1460 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1461 /* (rpt5)nop; */
1462 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1463 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1464 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1465 /* (sy)mov.f32f32 r1.x, r0.x; */
1466 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1467 /* mov.f32f32 r1.y, r0.y; */
1468 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1469 /* mov.f32f32 r1.z, r0.z; */
1470 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1471 /* mov.f32f32 r1.w, r0.w; */
1472 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1473 /* end; */
1474 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1475
1476 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1477 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1478 /* VFD_CONTROL_0 */
1479 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1480 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1481 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1482 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1483 /* VFD_CONTROL_1 */
1484 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1485 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1486 _SET(VFD_CTRLREG1_REGID4INST, 252);
1487
1488 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1489 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1490 /* VFD_FETCH_INSTR_0_0 */
1491 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1492 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1493 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1494 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1495 /* VFD_FETCH_INSTR_1_0 */
1496 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1497 shadow->quad_vertices_restore.gpuaddr);
1498 /* VFD_FETCH_INSTR_0_1 */
1499 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1500 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1501 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1502 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1503 /* VFD_FETCH_INSTR_1_1 */
1504 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1505 shadow->quad_vertices_restore.gpuaddr + 16);
1506
1507 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1508 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1509 /* VFD_DECODE_INSTR_0 */
1510 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1511 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1512 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1513 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1514 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1516 /* VFD_DECODE_INSTR_1 */
1517 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1518 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1519 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1520 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1521 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1522 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1523
1524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1525 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1526 /* RB_DEPTH_CONTROL */
1527 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1528
1529 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1530 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1531 /* RB_STENCIL_CONTROL */
1532 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1533 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1534 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1535 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1537 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1540
1541 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1542 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1543 /* RB_MODE_CONTROL */
1544 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1545 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1546
1547 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1548 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1549 /* RB_RENDER_CONTROL */
1550 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1551 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1552
1553 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1554 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1555 /* RB_MSAA_CONTROL */
1556 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1557 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1558
1559 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1560 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1561 /* RB_MRT_CONTROL0 */
1562 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1563 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1564 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1565 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1566
1567 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1568 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1569 /* RB_MRT_BLENDCONTROL0 */
1570 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1571 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1572 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1573 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1574 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1576 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1577 /* RB_MRT_CONTROL1 */
1578 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1579 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1580 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1581
1582 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1583 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1584 /* RB_MRT_BLENDCONTROL1 */
1585 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1586 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1587 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1588 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1589 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1590 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1591 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1592 /* RB_MRT_CONTROL2 */
1593 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1594 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1595 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1596
1597 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1598 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1599 /* RB_MRT_BLENDCONTROL2 */
1600 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1601 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1602 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1603 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1604 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1605 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1606 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1607 /* RB_MRT_CONTROL3 */
1608 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1609 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1610 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1611
1612 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1613 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1614 /* RB_MRT_BLENDCONTROL3 */
1615 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1616 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1617 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1618 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1619 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1620 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1621 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1622
1623 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1624 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1625 /* VFD_INDEX_MIN */
1626 *cmds++ = 0x00000000;
1627 /* VFD_INDEX_MAX */
1628 *cmds++ = 0xFFFFFFFF;
1629 /* VFD_INDEX_OFFSET */
1630 *cmds++ = 0x00000000;
1631 /* TPL1_TP_VS_TEX_OFFSET */
1632 *cmds++ = 0x00000000;
1633
1634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1635 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1636 /* VFD_VS_THREADING_THRESHOLD */
1637 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1638 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1639
1640 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1641 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1642 /* TPL1_TP_VS_TEX_OFFSET */
1643 *cmds++ = 0x00000000;
1644
1645 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1646 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1647 /* TPL1_TP_FS_TEX_OFFSET */
1648 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1649 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1650 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1651
1652 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1653 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1654 /* GRAS_SC_CONTROL */
1655 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1656
1657 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1658 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1659 /* GRAS_SU_MODE_CONTROL */
1660 *cmds++ = 0x00000000;
1661
1662 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1663 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1664 /* GRAS_SC_WINDOW_SCISSOR_TL */
1665 *cmds++ = 0x00000000;
1666 /* GRAS_SC_WINDOW_SCISSOR_BR */
1667 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1668 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1669
1670 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1671 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1672 /* GRAS_SC_SCREEN_SCISSOR_TL */
1673 *cmds++ = 0x00000000;
1674 /* GRAS_SC_SCREEN_SCISSOR_BR */
1675 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1676 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1677
1678 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1679 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1680 /* GRAS_CL_VPORT_XOFFSET */
1681 *cmds++ = 0x00000000;
1682 /* GRAS_CL_VPORT_XSCALE */
1683 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1684 /* GRAS_CL_VPORT_YOFFSET */
1685 *cmds++ = 0x00000000;
1686 /* GRAS_CL_VPORT_YSCALE */
1687 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1688
1689 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1690 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1691 /* GRAS_CL_VPORT_ZOFFSET */
1692 *cmds++ = 0x00000000;
1693 /* GRAS_CL_VPORT_ZSCALE */
1694 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1695
1696 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1697 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1698 /* GRAS_CL_CLIP_CNTL */
1699 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1700
1701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1702 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1703 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1704 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1705
1706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1707 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1708 /* PC_PRIM_VTX_CONTROL */
1709 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1710 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1711 PC_DRAW_TRIANGLES) |
1712 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1713 PC_DRAW_TRIANGLES) |
1714 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1715
1716 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1717 *cmds++ = 0x00000000; /* Viz query info */
1718 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1719 PC_DI_SRC_SEL_AUTO_INDEX,
1720 PC_DI_INDEX_SIZE_16_BIT,
1721 PC_DI_IGNORE_VISIBILITY);
1722 *cmds++ = 0x00000002; /* Num indices */
1723
1724 /* Create indirect buffer command for above command sequence */
1725 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1726
1727 return cmds;
1728}
1729
1730static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1731 struct adreno_context *drawctxt)
1732{
1733 unsigned int *start = tmp_ctx.cmd;
1734 unsigned int *cmd = start;
1735 unsigned int *lcc_start;
1736
1737 int i;
1738
1739 /* Flush HLSQ lazy updates */
1740 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1741 *cmd++ = 0x7; /* HLSQ_FLUSH */
1742 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1743 *cmd++ = 0;
1744
1745 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1746 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1747 *cmd++ = (unsigned int)
1748 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1749 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1750 0; /* No end addr for full invalidate */
1751
1752 lcc_start = cmd;
1753
1754 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1755 cmd++;
1756
1757#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1758 /* Force mismatch */
1759 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1760#else
1761 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1762#endif
1763
1764 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1765 cmd = reg_range(cmd, context_register_ranges[i * 2],
1766 context_register_ranges[i * 2 + 1]);
1767 }
1768
1769 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1770 (cmd - lcc_start) - 1);
1771
1772#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1773 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1774#else
1775 lcc_start[2] |= (1 << 24) | (4 << 16);
1776#endif
1777
1778 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1779 *cmd++ = cp_type0_packet(global_registers[i], 1);
1780 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1781 *cmd++ = 0x00000000;
1782 }
1783
1784 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1785 tmp_ctx.cmd = cmd;
1786}
1787
1788static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1789 struct adreno_context *drawctxt)
1790{
1791 unsigned int *cmd = tmp_ctx.cmd;
1792 unsigned int *start = cmd;
1793 unsigned int mode = 4; /* Indirect mode */
1794 unsigned int stateblock;
1795 unsigned int numunits;
1796 unsigned int statetype;
1797
1798 drawctxt->cond_execs[2].hostptr = cmd;
1799 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1800 *cmd++ = 0;
1801 drawctxt->cond_execs[3].hostptr = cmd;
1802 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1803 *cmd++ = 0;
1804
1805#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1806 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1807 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1808 *cmd++ = 4 << 16;
1809 *cmd++ = 0x0;
1810#endif
1811 /* HLSQ full update */
1812 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1813 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1814 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1815
1816#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1817 /* Re-enable shadowing */
1818 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1819 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1820 *cmd++ = (4 << 16) | (1 << 24);
1821 *cmd++ = 0x0;
1822#endif
1823
1824 /* Load vertex shader constants */
1825 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1826 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1827 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1828 *cmd++ = 0x0000ffff;
1829 *cmd++ = 3; /* EXEC_COUNT */
1830 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1831 drawctxt->constant_load_commands[0].hostptr = cmd;
1832 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1833 &drawctxt->gpustate);
1834
1835 /*
1836 From fixup:
1837
1838 mode = 4 (indirect)
1839 stateblock = 4 (Vertex constants)
1840 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1841
1842 From register spec:
1843 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1844
1845 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1846 */
1847
1848 *cmd++ = 0; /* ord1 */
1849 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1850
1851 /* Load fragment shader constants */
1852 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1853 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1854 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1855 *cmd++ = 0x0000ffff;
1856 *cmd++ = 3; /* EXEC_COUNT */
1857 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1858 drawctxt->constant_load_commands[1].hostptr = cmd;
1859 drawctxt->constant_load_commands[1].gpuaddr =
1860 virt2gpu(cmd, &drawctxt->gpustate);
1861 /*
1862 From fixup:
1863
1864 mode = 4 (indirect)
1865 stateblock = 6 (Fragment constants)
1866 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1867
1868 From register spec:
1869 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1870
1871 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1872 */
1873
1874 *cmd++ = 0; /* ord1 */
1875 drawctxt->constant_load_commands[2].hostptr = cmd;
1876 drawctxt->constant_load_commands[2].gpuaddr =
1877 virt2gpu(cmd, &drawctxt->gpustate);
1878 /*
1879 From fixup:
1880 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1881 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1882
1883 From register spec:
1884 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1885 start offset in on chip RAM,
1886 128bit aligned
1887
1888 ord2 = base + offset | 1
1889 Because of the base alignment we can use
1890 ord2 = base | offset | 1
1891 */
1892 *cmd++ = 0; /* ord2 */
1893
1894 /* Restore VS texture memory objects */
1895 stateblock = 0;
1896 statetype = 1;
1897 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1898
1899 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1900 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1901 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1902 & 0xfffffffc) | statetype;
1903
1904 /* Restore VS texture mipmap addresses */
1905 stateblock = 1;
1906 statetype = 1;
1907 numunits = TEX_SIZE_MIPMAP / 4;
1908 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1909 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1910 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1911 & 0xfffffffc) | statetype;
1912
1913 /* Restore VS texture sampler objects */
1914 stateblock = 0;
1915 statetype = 0;
1916 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1917 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1918 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1919 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1920 & 0xfffffffc) | statetype;
1921
1922 /* Restore FS texture memory objects */
1923 stateblock = 2;
1924 statetype = 1;
1925 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1926 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1927 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1928 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1929 & 0xfffffffc) | statetype;
1930
1931 /* Restore FS texture mipmap addresses */
1932 stateblock = 3;
1933 statetype = 1;
1934 numunits = TEX_SIZE_MIPMAP / 4;
1935 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1936 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1937 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1938 & 0xfffffffc) | statetype;
1939
1940 /* Restore FS texture sampler objects */
1941 stateblock = 2;
1942 statetype = 0;
1943 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1944 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1945 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1946 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1947 & 0xfffffffc) | statetype;
1948
1949 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1950 tmp_ctx.cmd = cmd;
1951}
1952
1953static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1954 struct adreno_context *drawctxt)
1955{
1956 unsigned int *cmd = tmp_ctx.cmd;
1957 unsigned int *start = cmd;
1958
1959 /* Vertex shader */
1960 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1961 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1962 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1963 *cmd++ = 1;
1964 *cmd++ = 3; /* EXEC_COUNT */
1965
1966 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1967 drawctxt->shader_load_commands[0].hostptr = cmd;
1968 drawctxt->shader_load_commands[0].gpuaddr =
1969 virt2gpu(cmd, &drawctxt->gpustate);
1970 /*
1971 From fixup:
1972
1973 mode = 4 (indirect)
1974 stateblock = 4 (Vertex shader)
1975 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1976
1977 From regspec:
1978 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1979 If bit31 is 1, it means overflow
1980 or any long shader.
1981
1982 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1983 */
1984 *cmd++ = 0; /*ord1 */
1985 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1986
1987 /* Fragment shader */
1988 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1989 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1990 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1991 *cmd++ = 1;
1992 *cmd++ = 3; /* EXEC_COUNT */
1993
1994 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1995 drawctxt->shader_load_commands[1].hostptr = cmd;
1996 drawctxt->shader_load_commands[1].gpuaddr =
1997 virt2gpu(cmd, &drawctxt->gpustate);
1998 /*
1999 From fixup:
2000
2001 mode = 4 (indirect)
2002 stateblock = 6 (Fragment shader)
2003 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2004
2005 From regspec:
2006 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2007 If bit31 is 1, it means overflow
2008 or any long shader.
2009
2010 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2011 */
2012 *cmd++ = 0; /*ord1 */
2013 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2014 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2015
2016 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2017 tmp_ctx.cmd = cmd;
2018}
2019
2020static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2021 struct adreno_context *drawctxt)
2022{
2023 unsigned int *cmd = tmp_ctx.cmd;
2024 unsigned int *start = cmd;
2025
2026 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2027 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2028 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2029 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2030 = virt2gpu(cmd, &drawctxt->gpustate);
2031 *cmd++ = 0;
2032
2033 /* Create indirect buffer command for above command sequence */
2034 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2035
2036 tmp_ctx.cmd = cmd;
2037}
2038
2039/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2040static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2041 struct adreno_context *drawctxt)
2042{
2043 unsigned int *cmd = tmp_ctx.cmd;
2044 unsigned int *start = cmd;
2045
2046#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2047 /* Save shader sizes */
2048 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2049 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2050 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2051
2052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2053 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2054 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2055
2056 /* Save constant sizes */
2057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2058 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2059 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2060
2061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2062 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2063 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2064
2065 /* Save constant offsets */
2066 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2067 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2068 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2069#else
2070 /* Save shader sizes */
2071 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2072 30, (4 << 19) | (4 << 16),
2073 drawctxt->shader_load_commands[0].gpuaddr);
2074
2075 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2076 30, (6 << 19) | (4 << 16),
2077 drawctxt->shader_load_commands[1].gpuaddr);
2078
2079 /* Save constant sizes */
2080 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2081 23, (4 << 19) | (4 << 16),
2082 drawctxt->constant_load_commands[0].gpuaddr);
2083
2084 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2085 23, (6 << 19) | (4 << 16),
2086 drawctxt->constant_load_commands[1].gpuaddr);
2087
2088 /* Modify constant restore conditionals */
2089 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2090 0, 0, drawctxt->cond_execs[2].gpuaddr);
2091
2092 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2093 0, 0, drawctxt->cond_execs[3].gpuaddr);
2094
2095 /* Save fragment constant shadow offset */
2096 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2097 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2098 drawctxt->constant_load_commands[2].gpuaddr);
2099#endif
2100
2101 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2102 discard all the shader data */
2103
2104 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2105 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2106
2107 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2108
2109 tmp_ctx.cmd = cmd;
2110}
2111
2112static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2113 struct adreno_context *drawctxt)
2114{
2115 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2116
2117 build_regrestore_cmds(adreno_dev, drawctxt);
2118 build_constantrestore_cmds(adreno_dev, drawctxt);
2119 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2120 build_regconstantsave_cmds(adreno_dev, drawctxt);
2121 build_shader_save_cmds(adreno_dev, drawctxt);
2122 build_shader_restore_cmds(adreno_dev, drawctxt);
2123 build_restore_fixup_cmds(adreno_dev, drawctxt);
2124 build_save_fixup_cmds(adreno_dev, drawctxt);
2125
2126 return 0;
2127}
2128
2129/* create buffers for saving/restoring registers, constants, & GMEM */
2130static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2131 struct adreno_context *drawctxt)
2132{
2133 calc_gmemsize(&drawctxt->context_gmem_shadow,
2134 adreno_dev->gmemspace.sizebytes);
2135 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2136
2137 if (drawctxt->flags & CTXT_FLAGS_GMEM_SHADOW) {
2138 int result =
2139 kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2140 drawctxt->pagetable,
2141 drawctxt->context_gmem_shadow.size);
2142
2143 if (result)
2144 return result;
2145 } else {
2146 memset(&drawctxt->context_gmem_shadow.gmemshadow, 0,
2147 sizeof(drawctxt->context_gmem_shadow.gmemshadow));
2148
2149 return 0;
2150 }
2151
2152 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2153 &tmp_ctx.cmd);
2154
2155 /* Dow we need to idle? */
2156 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2157
2158 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2159 &drawctxt->context_gmem_shadow);
2160 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2161 &drawctxt->context_gmem_shadow);
2162
2163 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2164 KGSL_CACHE_OP_FLUSH);
2165
2166 return 0;
2167}
2168
2169static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2170 struct adreno_context *drawctxt)
2171{
2172 int ret;
2173
2174 /*
2175 * Allocate memory for the GPU state and the context commands.
2176 * Despite the name, this is much more then just storage for
2177 * the gpustate. This contains command space for gmem save
2178 * and texture and vertex buffer storage too
2179 */
2180
2181 ret = kgsl_allocate(&drawctxt->gpustate,
2182 drawctxt->pagetable, CONTEXT_SIZE);
2183
2184 if (ret)
2185 return ret;
2186
2187 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2188 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2189
2190 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2191 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2192 if (ret)
2193 goto done;
2194
2195 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2196 }
2197
2198 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2199 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2200
2201done:
2202 if (ret)
2203 kgsl_sharedmem_free(&drawctxt->gpustate);
2204
2205 return ret;
2206}
2207
2208static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2209 struct adreno_context *context)
2210{
2211 struct kgsl_device *device = &adreno_dev->dev;
2212
2213 if (context == NULL)
2214 return;
2215
2216 if (context->flags & CTXT_FLAGS_GPU_HANG)
2217 KGSL_CTXT_WARN(device,
2218 "Current active context has caused gpu hang\n");
2219
2220 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2221 /* Fixup self modifying IBs for save operations */
2222 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2223 context->save_fixup, 3);
2224
2225 /* save registers and constants. */
2226 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2227 context->regconstant_save, 3);
2228
2229 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2230 /* Save shader instructions */
2231 adreno_ringbuffer_issuecmds(device,
2232 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2233
2234 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2235 }
2236 }
2237
2238 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2239 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2240 /*
2241 * Save GMEM (note: changes shader. shader must
2242 * already be saved.)
2243 */
2244
2245 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2246 context->context_gmem_shadow.
2247 gmem_save, 3);
2248 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2249 }
2250}
2251
2252static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2253 struct adreno_context *context)
2254{
2255 struct kgsl_device *device = &adreno_dev->dev;
2256 unsigned int cmds[5];
2257
2258 if (context == NULL) {
2259 /* No context - set the default pagetable and thats it */
2260 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2261 return;
2262 }
2263
2264 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2265
2266 cmds[0] = cp_nop_packet(1);
2267 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2268 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2269 cmds[3] = device->memstore.gpuaddr +
2270 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2271 cmds[4] = (unsigned int)context;
2272 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2273 kgsl_mmu_setstate(device, context->pagetable);
2274
2275 /*
2276 * Restore GMEM. (note: changes shader.
2277 * Shader must not already be restored.)
2278 */
2279
2280 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2281 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2282 context->context_gmem_shadow.
2283 gmem_restore, 3);
2284 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2285 }
2286
2287 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2288 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2289 context->reg_restore, 3);
2290
2291 /* Fixup self modifying IBs for restore operations */
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->restore_fixup, 3);
2294
2295 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2296 context->constant_restore, 3);
2297
2298 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2299 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2300 context->shader_restore, 3);
2301
2302 /* Restore HLSQ_CONTROL_0 register */
2303 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2304 context->hlsqcontrol_restore, 3);
2305 }
2306}
2307
2308static void a3xx_rb_init(struct adreno_device *adreno_dev,
2309 struct adreno_ringbuffer *rb)
2310{
2311 unsigned int *cmds, cmds_gpu;
2312 cmds = adreno_ringbuffer_allocspace(rb, 18);
2313 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2314
2315 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 /* Protected mode control - turned off for A3XX */
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2334
2335 adreno_ringbuffer_submit(rb);
2336}
2337
2338static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2339{
2340 struct kgsl_device *device = &adreno_dev->dev;
2341 const char *err = "";
2342
2343 switch (bit) {
2344 case A3XX_INT_RBBM_AHB_ERROR: {
2345 unsigned int reg;
2346
2347 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2348
2349 /*
2350 * Return the word address of the erroring register so that it
2351 * matches the register specification
2352 */
2353
2354 KGSL_DRV_CRIT(device,
2355 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2356 reg & (1 << 28) ? "WRITE" : "READ",
2357 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2358 (reg >> 24) & 0x3);
2359
2360 /* Clear the error */
2361 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2362 return;
2363 }
2364 case A3XX_INT_RBBM_REG_TIMEOUT:
2365 err = "RBBM: AHB register timeout";
2366 break;
2367 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2368 err = "RBBM: ME master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2371 err = "RBBM: PFP master split timeout";
2372 break;
2373 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2374 err = "RBBM: ATB bus oveflow";
2375 break;
2376 case A3XX_INT_VFD_ERROR:
2377 err = "VFD: Out of bounds access";
2378 break;
2379 case A3XX_INT_CP_T0_PACKET_IN_IB:
2380 err = "ringbuffer TO packet in IB interrupt";
2381 break;
2382 case A3XX_INT_CP_OPCODE_ERROR:
2383 err = "ringbuffer opcode error interrupt";
2384 break;
2385 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2386 err = "ringbuffer reserved bit error interrupt";
2387 break;
2388 case A3XX_INT_CP_HW_FAULT:
2389 err = "ringbuffer hardware fault";
2390 break;
2391 case A3XX_INT_CP_REG_PROTECT_FAULT:
2392 err = "ringbuffer protected mode error interrupt";
2393 break;
2394 case A3XX_INT_CP_AHB_ERROR_HALT:
2395 err = "ringbuffer AHB error interrupt";
2396 break;
2397 case A3XX_INT_MISC_HANG_DETECT:
2398 err = "MISC: GPU hang detected";
2399 break;
2400 case A3XX_INT_UCHE_OOB_ACCESS:
2401 err = "UCHE: Out of bounds access";
2402 break;
2403 }
2404
2405 KGSL_DRV_CRIT(device, "%s\n", err);
2406 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2407}
2408
2409static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2410{
2411 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2412
2413 if (irq == A3XX_INT_CP_RB_INT) {
2414 kgsl_sharedmem_writel(&rb->device->memstore,
2415 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2416 wmb();
2417 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2418 }
2419
2420 wake_up_interruptible_all(&rb->device->wait_queue);
2421
2422 /* Schedule work to free mem and issue ibs */
2423 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2424
2425 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2426 rb->device->id, NULL);
2427}
2428
2429#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2430
2431#define A3XX_INT_MASK \
2432 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2433 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
2434 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \
2435 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \
2436 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
2437 (1 << A3XX_INT_VFD_ERROR) | \
2438 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2439 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2440 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2441 (1 << A3XX_INT_CP_HW_FAULT) | \
2442 (1 << A3XX_INT_CP_IB1_INT) | \
2443 (1 << A3XX_INT_CP_IB2_INT) | \
2444 (1 << A3XX_INT_CP_RB_INT) | \
2445 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2446 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
2447 (1 << A3XX_INT_MISC_HANG_DETECT) | \
2448 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2449
2450static struct {
2451 void (*func)(struct adreno_device *, int);
2452} a3xx_irq_funcs[] = {
2453 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2460 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2461 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2462 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2464 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2465 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2466 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2467 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2468 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2469 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2470 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2471 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2472 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2473 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2474 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2475 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2476 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
2477 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */
2478 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2479 /* 26 to 31 - Unused */
2480};
2481
2482static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2483{
2484 struct kgsl_device *device = &adreno_dev->dev;
2485 irqreturn_t ret = IRQ_NONE;
2486 unsigned int status, tmp;
2487 int i;
2488
2489 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2490
2491 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2492 if (tmp & 1) {
2493 if (a3xx_irq_funcs[i].func != NULL) {
2494 a3xx_irq_funcs[i].func(adreno_dev, i);
2495 ret = IRQ_HANDLED;
2496 } else {
2497 KGSL_DRV_CRIT(device,
2498 "Unhandled interrupt bit %x\n", i);
2499 }
2500 }
2501
2502 tmp >>= 1;
2503 }
2504
2505 if (status)
2506 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2507 status);
2508 return ret;
2509}
2510
2511static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2512{
2513 struct kgsl_device *device = &adreno_dev->dev;
2514
2515 if (state)
2516 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2517 else
2518 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2519}
2520
2521static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2522{
2523 struct kgsl_device *device = &adreno_dev->dev;
2524 unsigned int reg, val;
2525
2526 /* Freeze the counter */
2527 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2528 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2529 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2530
2531 /* Read the value */
2532 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2533
2534 /* Reset the counter */
2535 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2536 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2537
2538 /* Re-enable the counter */
2539 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2540 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2541 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2542
2543 return val;
2544}
2545
2546static void a3xx_start(struct adreno_device *adreno_dev)
2547{
2548 struct kgsl_device *device = &adreno_dev->dev;
2549
Jordan Crousec6b3a992012-02-04 10:23:51 -07002550 /* GMEM size on A320 is 512K */
2551 adreno_dev->gmemspace.sizebytes = SZ_512K;
2552
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002553 /* Reset the core */
2554 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2555 0x00000001);
2556 msleep(20);
2557
2558 /*
2559 * enable fixed master AXI port of 0x0 for all clients to keep
2560 * traffic from going to random places
2561 */
2562
2563 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2564 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2565 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2566
2567 /* Make all blocks contribute to the GPU BUSY perf counter */
2568 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2569
2570 /* Enable the RBBM error reporting bits. This lets us get
2571 useful information on failure */
2572
2573 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2574
2575 /* Enable AHB error reporting */
2576 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
2577
2578 /* Turn on the power counters */
2579 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
2580}
2581
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002582/* Defined in adreno_a3xx_snapshot.c */
2583void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2584 int *remain, int hang);
2585
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002586struct adreno_gpudev adreno_a3xx_gpudev = {
2587 .reg_rbbm_status = A3XX_RBBM_STATUS,
2588 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2589 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2590
2591 .ctxt_create = a3xx_drawctxt_create,
2592 .ctxt_save = a3xx_drawctxt_save,
2593 .ctxt_restore = a3xx_drawctxt_restore,
2594 .rb_init = a3xx_rb_init,
2595 .irq_control = a3xx_irq_control,
2596 .irq_handler = a3xx_irq_handler,
2597 .busy_cycles = a3xx_busy_cycles,
2598 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002599 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002600};