blob: 51008ae443eb1996cd4165b6a14e0dcc5a4b0274 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070037 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070038 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
62 0x2750, 0x2756, 0x2760, 0x2760,
63};
64
65const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
66
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070067/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
68 * functions.
69 */
70
71#define _SET(_shift, _val) ((_val) << (_shift))
72
73/*
74 ****************************************************************************
75 *
76 * Context state shadow structure:
77 *
78 * +---------------------+------------+-------------+---------------------+---+
79 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
80 * +---------------------+------------+-------------+---------------------+---+
81 *
82 * 8K - ALU Constant Shadow (8K aligned)
83 * 4K - H/W Register Shadow (8K aligned)
84 * 5K - Command and Vertex Buffers
85 * 8K - Shader Instruction Shadow
86 * ~6K - Texture Constant Shadow
87 *
88 *
89 ***************************************************************************
90 */
91
92/* Sizes of all sections in state shadow memory */
93#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
94#define REG_SHADOW_SIZE (4*1024) /* 4KB */
95#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
96#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
97#define TEX_SIZE_MIPMAP 1936 /* bytes */
98#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
99#define TEX_SHADOW_SIZE \
100 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
101 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
102#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
103
104/* Total context size, excluding GMEM shadow */
105#define CONTEXT_SIZE \
106 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
107 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
108 TEX_SHADOW_SIZE)
109
110/* Offsets to different sections in context shadow memory */
111#define REG_OFFSET ALU_SHADOW_SIZE
112#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
113#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
114#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
115#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
116#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
117#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
118#define FS_TEX_OFFSET_MEM_OBJECTS \
119 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
120#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122
123/* The offset for fragment shader data in HLSQ context */
124#define SSIZE (16*1024)
125
126#define HLSQ_SAMPLER_OFFSET 0x000
127#define HLSQ_MEMOBJ_OFFSET 0x400
128#define HLSQ_MIPMAP_OFFSET 0x800
129
130#ifdef GSL_USE_A3XX_HLSQ_SHADOW_RAM
131/* Use shadow RAM */
132#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
133#else
134/* Use working RAM */
135#define HLSQ_SHADOW_BASE 0x10000
136#endif
137
138#define REG_TO_MEM_LOOP_COUNT_SHIFT 15
139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700261 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 start = cmd;
270
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700271 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
272 *cmd++ = 0;
273
274#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
275 /*
276 * Context registers are already shadowed; just need to
277 * disable shadowing to prevent corruption.
278 */
279
280 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
281 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
282 *cmd++ = 4 << 16; /* regs, start=0 */
283 *cmd++ = 0x0; /* count = 0 */
284
285#else
286 /*
287 * Make sure the HW context has the correct register values before
288 * reading them.
289 */
290
291 /* Write context registers into shadow */
292 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
293 unsigned int start = context_register_ranges[i * 2];
294 unsigned int end = context_register_ranges[i * 2 + 1];
295 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
296 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
297 start;
298 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
299 & 0xFFFFE000) + (start - 0x2000) * 4;
300 }
301#endif
302
303 /* Need to handle some of the global registers separately */
304 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
305 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
306 *cmd++ = global_registers[i];
307 *cmd++ = tmp_ctx.reg_values[i];
308 }
309
310 /* Save vertex shader constants */
311 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
314 *cmd++ = 0x0000FFFF;
315 *cmd++ = 3; /* EXEC_COUNT */
316 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
317 drawctxt->constant_save_commands[1].hostptr = cmd;
318 drawctxt->constant_save_commands[1].gpuaddr =
319 virt2gpu(cmd, &drawctxt->gpustate);
320 /*
321 From fixup:
322
323 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
324 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
325
326 From register spec:
327 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
328 */
329 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
330 /* ALU constant shadow base */
331 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
332
333 /* Save fragment shader constants */
334 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
337 *cmd++ = 0x0000FFFF;
338 *cmd++ = 3; /* EXEC_COUNT */
339 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
340 drawctxt->constant_save_commands[2].hostptr = cmd;
341 drawctxt->constant_save_commands[2].gpuaddr =
342 virt2gpu(cmd, &drawctxt->gpustate);
343 /*
344 From fixup:
345
346 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
347 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
348
349 From register spec:
350 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
351 */
352 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
353
354 /*
355 From fixup:
356
357 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
358 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
359
360 From register spec:
361 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
362 start offset in on chip RAM,
363 128bit aligned
364
365 dst = base + offset
366 Because of the base alignment we can use
367 dst = base | offset
368 */
369 *cmd++ = 0; /* dst */
370
371 /* Save VS texture memory objects */
372 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
373 *cmd++ =
374 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
375 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
376 *cmd++ =
377 (drawctxt->gpustate.gpuaddr +
378 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
379
380 /* Save VS texture mipmap pointers */
381 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
382 *cmd++ =
383 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
384 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
385 *cmd++ =
386 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
387
388 /* Save VS texture sampler objects */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
391 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
392 *cmd++ =
393 (drawctxt->gpustate.gpuaddr +
394 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
395
396 /* Save FS texture memory objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ =
399 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
400 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
401 *cmd++ =
402 (drawctxt->gpustate.gpuaddr +
403 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
404
405 /* Save FS texture mipmap pointers */
406 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
407 *cmd++ =
408 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
409 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
410 *cmd++ =
411 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
412
413 /* Save FS texture sampler objects */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr +
420 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
421
422 /* Create indirect buffer command for above command sequence */
423 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
424
425 tmp_ctx.cmd = cmd;
426}
427
428/* Copy GMEM contents to system memory shadow. */
429static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
430 struct adreno_context *drawctxt,
431 struct gmem_shadow_t *shadow)
432{
433 unsigned int *cmds = tmp_ctx.cmd;
434 unsigned int *start = cmds;
435
436 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
437 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
438
439 /* RB_MODE_CONTROL */
440 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
441 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
442 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
443 /* RB_RENDER_CONTROL */
444 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
445 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
446
447 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
448 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
449 /* RB_COPY_CONTROL */
450 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
451 RB_CLEAR_MODE_RESOLVE) |
452 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
453 tmp_ctx.gmem_base >> 14);
454 /* RB_COPY_DEST_BASE */
455 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
456 shadow->gmemshadow.gpuaddr >> 5);
457 /* RB_COPY_DEST_PITCH */
458 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
459 (shadow->pitch * 4) / 32);
460 /* RB_COPY_DEST_INFO */
461 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
462 RB_TILINGMODE_LINEAR) |
463 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
464 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
465 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
466
467 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
468 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
469 /* GRAS_SC_CONTROL */
470 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
471
472 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
473 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
474 /* VFD_CONTROL_0 */
475 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
476 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
477 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
478 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
479 /* VFD_CONTROL_1 */
480 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
481 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
482 _SET(VFD_CTRLREG1_REGID4INST, 252);
483
484 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
485 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
486 /* VFD_FETCH_INSTR_0_0 */
487 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
488 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
489 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
490 /* VFD_FETCH_INSTR_1_0 */
491 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
492 shadow->quad_vertices.gpuaddr);
493
494 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
495 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
496 /* VFD_DECODE_INSTR_0 */
497 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
498 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
499 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
500 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
501 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
502 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
503
504 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
505 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
506 /* HLSQ_CONTROL_0_REG */
507 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
508 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
509 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
510 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
511 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
512 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
513 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
514 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
515 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
516 /* HLSQ_CONTROL_1_REG */
517 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
518 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
519 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
520 /* HLSQ_CONTROL_2_REG */
521 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
522 /* HLSQ_CONTROL_3_REG */
523 *cmds++ = 0x00000000;
524
525 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
526 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
527 /* HLSQ_VS_CONTROL_REG */
528 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
529 /* HLSQ_FS_CONTROL_REG */
530 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
531 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
532 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
533 /* HLSQ_CONST_VSPRESV_RANGE_REG */
534 *cmds++ = 0x00000000;
535 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
536 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
537 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
538
539 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
540 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
541 /* SP_FS_LENGTH_REG */
542 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
543
544 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
545 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
546 /* SP_SP_CTRL_REG */
547 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
548 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
549
550 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
551 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
552 /* SP_VS_CTRL_REG0 */
553 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
554 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
555 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
556 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
557 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
558 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
559 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
560 /* SP_VS_CTRL_REG1 */
561 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
562 /* SP_VS_PARAM_REG */
563 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
564 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
565 /* SP_VS_OUT_REG_0 */
566 *cmds++ = 0x00000000;
567 /* SP_VS_OUT_REG_1 */
568 *cmds++ = 0x00000000;
569 /* SP_VS_OUT_REG_2 */
570 *cmds++ = 0x00000000;
571 /* SP_VS_OUT_REG_3 */
572 *cmds++ = 0x00000000;
573 /* SP_VS_OUT_REG_4 */
574 *cmds++ = 0x00000000;
575 /* SP_VS_OUT_REG_5 */
576 *cmds++ = 0x00000000;
577 /* SP_VS_OUT_REG_6 */
578 *cmds++ = 0x00000000;
579 /* SP_VS_OUT_REG_7 */
580 *cmds++ = 0x00000000;
581
582 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
583 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
584 /* SP_VS_VPC_DST_REG_0 */
585 *cmds++ = 0x00000000;
586 /* SP_VS_VPC_DST_REG_1 */
587 *cmds++ = 0x00000000;
588 /* SP_VS_VPC_DST_REG_2 */
589 *cmds++ = 0x00000000;
590 /* SP_VS_VPC_DST_REG_3 */
591 *cmds++ = 0x00000000;
592 /* SP_VS_OBJ_OFFSET_REG */
593 *cmds++ = 0x00000000;
594 /* SP_VS_OBJ_START_REG */
595 *cmds++ = 0x00000000;
596
597 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
598 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
599 /* SP_VS_LENGTH_REG */
600 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
601 /* SP_FS_CTRL_REG0 */
602 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
603 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
604 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
605 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
606 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
607 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
608 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
609 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
610 /* SP_FS_CTRL_REG1 */
611 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
612 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
613 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
614 /* SP_FS_OBJ_OFFSET_REG */
615 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
616 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
617 /* SP_FS_OBJ_START_REG */
618 *cmds++ = 0x00000000;
619
620 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
621 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
622 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
623 *cmds++ = 0x00000000;
624 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
625 *cmds++ = 0x00000000;
626
627 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
628 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
629 /* SP_FS_OUTPUT_REG */
630 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
631
632 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
633 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
634 /* SP_FS_MRT_REG_0 */
635 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
636 /* SP_FS_MRT_REG_1 */
637 *cmds++ = 0x00000000;
638 /* SP_FS_MRT_REG_2 */
639 *cmds++ = 0x00000000;
640 /* SP_FS_MRT_REG_3 */
641 *cmds++ = 0x00000000;
642
643 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
644 *cmds++ = CP_REG(A3XX_VPC_ATTR);
645 /* VPC_ATTR */
646 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
647 _SET(VPC_VPCATTR_LMSIZE, 1);
648 /* VPC_PACK */
649 *cmds++ = 0x00000000;
650 /* VPC_VARRYING_INTERUPT_MODE_0 */
651 *cmds++ = 0x00000000;
652 /* VPC_VARRYING_INTERUPT_MODE_1 */
653 *cmds++ = 0x00000000;
654 /* VPC_VARRYING_INTERUPT_MODE_2 */
655 *cmds++ = 0x00000000;
656 /* VPC_VARRYING_INTERUPT_MODE_3 */
657 *cmds++ = 0x00000000;
658 /* VPC_VARYING_PS_REPL_MODE_0 */
659 *cmds++ = 0x00000000;
660 /* VPC_VARYING_PS_REPL_MODE_1 */
661 *cmds++ = 0x00000000;
662 /* VPC_VARYING_PS_REPL_MODE_2 */
663 *cmds++ = 0x00000000;
664 /* VPC_VARYING_PS_REPL_MODE_3 */
665 *cmds++ = 0x00000000;
666
667 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
668 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
669 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
670 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
671 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
672 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
673 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
674
675 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
676 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
677 /* end; */
678 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
679 /* nop; */
680 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
681 /* nop; */
682 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
683
684 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
685 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
686 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
687 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
688 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
689 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
690 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
691
692 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
693 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
694 /* end; */
695 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
696 /* nop; */
697 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
698 /* nop; */
699 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
700
701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
702 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
703 /* RB_MSAA_CONTROL */
704 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
705 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
706
707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
708 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
709 /* RB_DEPTH_CONTROL */
710 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
711
712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
713 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
714 /* RB_MRT_CONTROL0 */
715 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
716 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
717 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
718 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
719
720 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
721 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
722 /* RB_MRT_BLEND_CONTROL0 */
723 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
724 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
725 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
726 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
727 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
728 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
729 /* RB_MRT_CONTROL1 */
730 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
731 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
732 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
733
734 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
735 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
736 /* RB_MRT_BLEND_CONTROL1 */
737 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
738 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
739 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
740 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
741 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
742 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
743 /* RB_MRT_CONTROL2 */
744 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
745 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
746 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
747
748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
749 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
750 /* RB_MRT_BLEND_CONTROL2 */
751 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
752 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
753 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
755 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
756 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
757 /* RB_MRT_CONTROL3 */
758 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
759 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
760 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
761
762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
763 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
764 /* RB_MRT_BLEND_CONTROL3 */
765 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
766 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
767 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
769 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
770 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
771
772 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
773 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
774 /* VFD_INDEX_MIN */
775 *cmds++ = 0x00000000;
776 /* VFD_INDEX_MAX */
777 *cmds++ = 0xFFFFFFFF;
778 /* VFD_INSTANCEID_OFFSET */
779 *cmds++ = 0x00000000;
780 /* VFD_INDEX_OFFSET */
781 *cmds++ = 0x00000000;
782
783 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
784 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
785 /* VFD_VS_THREADING_THRESHOLD */
786 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
787 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
788
789 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
790 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
791 /* TPL1_TP_VS_TEX_OFFSET */
792 *cmds++ = 0;
793
794 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
795 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
796 /* TPL1_TP_FS_TEX_OFFSET */
797 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
798 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
799 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
800
801 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
802 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
803 /* PC_PRIM_VTX_CNTL */
804 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
805 PC_DRAW_TRIANGLES) |
806 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
807 PC_DRAW_TRIANGLES) |
808 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
809
810 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
811 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
812 /* GRAS_SC_WINDOW_SCISSOR_TL */
813 *cmds++ = 0x00000000;
814 /* GRAS_SC_WINDOW_SCISSOR_BR */
815 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
816 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
817
818 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
819 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
820 /* GRAS_SC_SCREEN_SCISSOR_TL */
821 *cmds++ = 0x00000000;
822 /* GRAS_SC_SCREEN_SCISSOR_BR */
823 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
824 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
825
826 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
827 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
828 /* GRAS_CL_VPORT_XOFFSET */
829 *cmds++ = 0x00000000;
830 /* GRAS_CL_VPORT_XSCALE */
831 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
832 /* GRAS_CL_VPORT_YOFFSET */
833 *cmds++ = 0x00000000;
834 /* GRAS_CL_VPORT_YSCALE */
835 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
836
837 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
838 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
839 /* GRAS_CL_VPORT_ZOFFSET */
840 *cmds++ = 0x00000000;
841 /* GRAS_CL_VPORT_ZSCALE */
842 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
843
844 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
845 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
846 /* GRAS_CL_CLIP_CNTL */
847 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
850 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
851 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
852
853 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
854 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
855 /* GRAS_CL_GB_CLIP_ADJ */
856 *cmds++ = 0x00000000;
857
858 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
859 *cmds++ = 0x00000000;
860
861 /*
862 * Resolve using two draw calls with a dummy register
863 * write in between. This is a HLM workaround
864 * that should be removed later.
865 */
866 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
867 *cmds++ = 0x00000000; /* Viz query info */
868 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
869 PC_DI_SRC_SEL_IMMEDIATE,
870 PC_DI_INDEX_SIZE_32_BIT,
871 PC_DI_IGNORE_VISIBILITY);
872 *cmds++ = 0x00000003; /* Num indices */
873 *cmds++ = 0x00000000; /* Index 0 */
874 *cmds++ = 0x00000001; /* Index 1 */
875 *cmds++ = 0x00000002; /* Index 2 */
876
877 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
878 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
879 *cmds++ = 0x00000000;
880
881 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
882 *cmds++ = 0x00000000; /* Viz query info */
883 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
884 PC_DI_SRC_SEL_IMMEDIATE,
885 PC_DI_INDEX_SIZE_32_BIT,
886 PC_DI_IGNORE_VISIBILITY);
887 *cmds++ = 0x00000003; /* Num indices */
888 *cmds++ = 0x00000002; /* Index 0 */
889 *cmds++ = 0x00000001; /* Index 1 */
890 *cmds++ = 0x00000003; /* Index 2 */
891
892 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
893 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
894 *cmds++ = 0x00000000;
895
896 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
897 *cmds++ = 0x00000000;
898
899 /* Create indirect buffer command for above command sequence */
900 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
901
902 return cmds;
903}
904
905static void build_shader_save_cmds(struct adreno_device *adreno_dev,
906 struct adreno_context *drawctxt)
907{
908 unsigned int *cmd = tmp_ctx.cmd;
909 unsigned int *start;
910
911 /* Reserve space for boolean values used for COND_EXEC packet */
912 drawctxt->cond_execs[0].hostptr = cmd;
913 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
914 *cmd++ = 0;
915 drawctxt->cond_execs[1].hostptr = cmd;
916 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
917 *cmd++ = 0;
918
919 drawctxt->shader_save_commands[0].hostptr = cmd;
920 drawctxt->shader_save_commands[0].gpuaddr =
921 virt2gpu(cmd, &drawctxt->gpustate);
922 *cmd++ = 0;
923 drawctxt->shader_save_commands[1].hostptr = cmd;
924 drawctxt->shader_save_commands[1].gpuaddr =
925 virt2gpu(cmd, &drawctxt->gpustate);
926 *cmd++ = 0;
927
928 start = cmd;
929
930 /* Save vertex shader */
931
932 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
933 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
934 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
935 *cmd++ = 0x0000FFFF;
936 *cmd++ = 3; /* EXEC_COUNT */
937
938 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
939 drawctxt->shader_save_commands[2].hostptr = cmd;
940 drawctxt->shader_save_commands[2].gpuaddr =
941 virt2gpu(cmd, &drawctxt->gpustate);
942 /*
943 From fixup:
944
945 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
946
947 From regspec:
948 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
949 If bit31 is 1, it means overflow
950 or any long shader.
951
952 src = (HLSQ_SHADOW_BASE + 0x1000)/4
953 */
954 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
955 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
956
957 /* Save fragment shader */
958 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
959 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
960 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
961 *cmd++ = 0x0000FFFF;
962 *cmd++ = 3; /* EXEC_COUNT */
963
964 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
965 drawctxt->shader_save_commands[3].hostptr = cmd;
966 drawctxt->shader_save_commands[3].gpuaddr =
967 virt2gpu(cmd, &drawctxt->gpustate);
968 /*
969 From fixup:
970
971 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
972
973 From regspec:
974 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
975 If bit31 is 1, it means overflow
976 or any long shader.
977
978 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
979 From regspec:
980
981 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
982 First instruction of the whole shader will be stored from
983 the offset in instruction cache, unit = 256bits, a cache line.
984 It can start from 0 if no VS available.
985
986 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
987 */
988 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
989 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
990 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
991
992 /* Create indirect buffer command for above command sequence */
993 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
994
995 tmp_ctx.cmd = cmd;
996}
997
998/*
999 * Make an IB to modify context save IBs with the correct shader instruction
1000 * and constant sizes and offsets.
1001 */
1002
1003static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1004 struct adreno_context *drawctxt)
1005{
1006 unsigned int *cmd = tmp_ctx.cmd;
1007 unsigned int *start = cmd;
1008
1009 /* Flush HLSQ lazy updates */
1010 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1011 *cmd++ = 0x7; /* HLSQ_FLUSH */
1012 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1013 *cmd++ = 0;
1014
1015 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1016 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1017 *cmd++ = (unsigned int)
1018 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1019 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1020 0; /* No end addr for full invalidate */
1021
1022 /* Make sure registers are flushed */
1023 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1024 *cmd++ = 0;
1025
1026#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1027
1028 /* Save shader sizes */
1029 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1030 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1031 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1032
1033 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1034 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1035 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1036
1037 /* Save shader offsets */
1038 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1039 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1040 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1041
1042 /* Save constant sizes */
1043 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1044 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1045 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1046 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1047 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1048 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1049
1050 /* Save FS constant offset */
1051 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1052 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1053 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1054
1055
1056 /* Save VS instruction store mode */
1057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1058 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1059 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1060
1061 /* Save FS instruction store mode */
1062 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1063 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1064 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1065#else
1066
1067 /* Shader save */
1068 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1069 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1070 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1071 drawctxt->shader_save_commands[2].gpuaddr);
1072
1073 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1074 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1075 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1076 *cmd++ = 0x00000000; /* AND value */
1077 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1078 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1079 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1080 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1081 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1082 A3XX_CP_SCRATCH_REG2;
1083 *cmd++ = 0x7f000000; /* AND value */
1084 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1085
1086 /*
1087 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1088 * SP_FS_OBJ_OFFSET_REG
1089 */
1090
1091 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1092 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1093 *cmd++ = 0x00000000; /* AND value */
1094 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1095 /*
1096 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1097 * 0x00000000
1098 */
1099 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1100 *cmd++ = A3XX_CP_SCRATCH_REG3;
1101 *cmd++ = 0xfe000000; /* AND value */
1102 *cmd++ = 0x00000000; /* OR value */
1103 /*
1104 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1105 */
1106 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1107 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1108 *cmd++ = 0xffffffff; /* AND value */
1109 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1110
1111 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1112 *cmd++ = A3XX_CP_SCRATCH_REG2;
1113 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1114
1115 /* Constant save */
1116 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1117 17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
1118 drawctxt->constant_save_commands[1].gpuaddr);
1119
1120 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1121 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
1122 drawctxt->constant_save_commands[2].gpuaddr);
1123
1124 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1125 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1126 drawctxt->constant_save_commands[2].gpuaddr
1127 + sizeof(unsigned int));
1128
1129 /* Modify constant save conditionals */
1130 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1131 0, 0, drawctxt->cond_execs[2].gpuaddr);
1132
1133 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1134 0, 0, drawctxt->cond_execs[3].gpuaddr);
1135
1136 /* Save VS instruction store mode */
1137
1138 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1139 31, 0, drawctxt->cond_execs[0].gpuaddr);
1140
1141 /* Save FS instruction store mode */
1142 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1143 31, 0, drawctxt->cond_execs[1].gpuaddr);
1144
1145#endif
1146
1147 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1148
1149 tmp_ctx.cmd = cmd;
1150}
1151
1152/****************************************************************************/
1153/* Functions to build context restore IBs */
1154/****************************************************************************/
1155
1156static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1157 struct adreno_context *drawctxt,
1158 struct gmem_shadow_t *shadow)
1159{
1160 unsigned int *cmds = tmp_ctx.cmd;
1161 unsigned int *start = cmds;
1162
1163 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1164 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1165 /* HLSQ_CONTROL_0_REG */
1166 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1167 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1168 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1169 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1170 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1171 /* HLSQ_CONTROL_1_REG */
1172 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1173 /* HLSQ_CONTROL_2_REG */
1174 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1175 /* HLSQ_CONTROL3_REG */
1176 *cmds++ = 0x00000000;
1177
1178 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1179 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1180 /* RB_MRT_BUF_INFO0 */
1181 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1182 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1183 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1184 (shadow->gmem_pitch * 4 * 8) / 256);
1185 /* RB_MRT_BUF_BASE0 */
1186 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1187
1188 /* Texture samplers */
1189 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1190 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1191 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1192 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1193 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1194 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1195 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1196 *cmds++ = 0x00000240;
1197 *cmds++ = 0x00000000;
1198
1199 /* Texture memobjs */
1200 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1201 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1202 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1203 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1204 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1205 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1206 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1207 *cmds++ = 0x4cc06880;
1208 *cmds++ = shadow->height | (shadow->width << 14);
1209 *cmds++ = (shadow->pitch*4*8) << 9;
1210 *cmds++ = 0x00000000;
1211
1212 /* Mipmap bases */
1213 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1214 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1215 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1216 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1217 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1218 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1219 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1220 *cmds++ = shadow->gmemshadow.gpuaddr;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233 *cmds++ = 0x00000000;
1234
1235 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1236 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1237 /* HLSQ_VS_CONTROL_REG */
1238 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1239 /* HLSQ_FS_CONTROL_REG */
1240 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1241 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1242 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1243 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1244 *cmds++ = 0x00000000;
1245 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1246 *cmds++ = 0x00000000;
1247
1248 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1249 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1250 /* SP_FS_LENGTH_REG */
1251 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1252
1253 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1254 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1255 /* SP_VS_CTRL_REG0 */
1256 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1257 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1258 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1259 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1260 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1261 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1262 /* SP_VS_CTRL_REG1 */
1263 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1264 /* SP_VS_PARAM_REG */
1265 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1266 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1267 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1268 /* SP_VS_OUT_REG0 */
1269 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1270 /* SP_VS_OUT_REG1 */
1271 *cmds++ = 0x00000000;
1272 /* SP_VS_OUT_REG2 */
1273 *cmds++ = 0x00000000;
1274 /* SP_VS_OUT_REG3 */
1275 *cmds++ = 0x00000000;
1276 /* SP_VS_OUT_REG4 */
1277 *cmds++ = 0x00000000;
1278 /* SP_VS_OUT_REG5 */
1279 *cmds++ = 0x00000000;
1280 /* SP_VS_OUT_REG6 */
1281 *cmds++ = 0x00000000;
1282 /* SP_VS_OUT_REG7 */
1283 *cmds++ = 0x00000000;
1284
1285 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1286 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1287 /* SP_VS_VPC_DST_REG0 */
1288 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1289 /* SP_VS_VPC_DST_REG1 */
1290 *cmds++ = 0x00000000;
1291 /* SP_VS_VPC_DST_REG2 */
1292 *cmds++ = 0x00000000;
1293 /* SP_VS_VPC_DST_REG3 */
1294 *cmds++ = 0x00000000;
1295 /* SP_VS_OBJ_OFFSET_REG */
1296 *cmds++ = 0x00000000;
1297 /* SP_VS_OBJ_START_REG */
1298 *cmds++ = 0x00000000;
1299
1300 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1301 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1302 /* SP_VS_LENGTH_REG */
1303 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1304 /* SP_FS_CTRL_REG0 */
1305 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1306 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1307 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1308 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1309 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1310 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1311 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1312 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1313 /* SP_FS_CTRL_REG1 */
1314 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1315 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1316 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1317 /* SP_FS_OBJ_OFFSET_REG */
1318 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128);
1319 /* SP_FS_OBJ_START_REG */
1320 *cmds++ = 0x00000000;
1321
1322 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1323 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1324 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1325 *cmds++ = 0x00000000;
1326 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1327 *cmds++ = 0x00000000;
1328
1329 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1330 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1331 /* SP_FS_OUT_REG */
1332 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1333
Jordan Crousea7ec4212012-02-04 10:23:52 -07001334 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001335 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1336 /* SP_FS_MRT_REG0 */
1337 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1338 /* SP_FS_MRT_REG1 */
1339 *cmds++ = 0;
1340 /* SP_FS_MRT_REG2 */
1341 *cmds++ = 0;
1342 /* SP_FS_MRT_REG3 */
1343 *cmds++ = 0;
1344
1345 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1346 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1347 /* VPC_ATTR */
1348 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1349 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1350 _SET(VPC_VPCATTR_LMSIZE, 1);
1351 /* VPC_PACK */
1352 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1353 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1354 /* VPC_VARYING_INTERP_MODE_0 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_INTERP_MODE1 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_INTERP_MODE2 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARYING_IINTERP_MODE3 */
1361 *cmds++ = 0x00000000;
1362 /* VPC_VARRYING_PS_REPL_MODE_0 */
1363 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1379 /* VPC_VARRYING_PS_REPL_MODE_1 */
1380 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1396 /* VPC_VARRYING_PS_REPL_MODE_2 */
1397 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1413 /* VPC_VARRYING_PS_REPL_MODE_3 */
1414 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1430
Jordan Crousea7ec4212012-02-04 10:23:52 -07001431 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001432 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1433 /* SP_SP_CTRL_REG */
1434 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1435
1436 /* Load vertex shader */
1437 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1438 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1439 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1440 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1441 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1442 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1443 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1444 /* (sy)end; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450 /* nop; */
1451 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1452
1453 /* Load fragment shader */
1454 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1455 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1456 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1457 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1458 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1459 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1460 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1461 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1462 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1463 /* (rpt5)nop; */
1464 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1465 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1466 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1467 /* (sy)mov.f32f32 r1.x, r0.x; */
1468 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1469 /* mov.f32f32 r1.y, r0.y; */
1470 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1471 /* mov.f32f32 r1.z, r0.z; */
1472 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1473 /* mov.f32f32 r1.w, r0.w; */
1474 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1475 /* end; */
1476 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1477
1478 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1479 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1480 /* VFD_CONTROL_0 */
1481 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1482 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1483 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1484 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1485 /* VFD_CONTROL_1 */
1486 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1487 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1488 _SET(VFD_CTRLREG1_REGID4INST, 252);
1489
1490 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1491 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1492 /* VFD_FETCH_INSTR_0_0 */
1493 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1494 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1495 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1496 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1497 /* VFD_FETCH_INSTR_1_0 */
1498 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1499 shadow->quad_vertices_restore.gpuaddr);
1500 /* VFD_FETCH_INSTR_0_1 */
1501 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1502 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1503 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1504 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1505 /* VFD_FETCH_INSTR_1_1 */
1506 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1507 shadow->quad_vertices_restore.gpuaddr + 16);
1508
1509 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1510 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1511 /* VFD_DECODE_INSTR_0 */
1512 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1513 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1514 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1516 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1517 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1518 /* VFD_DECODE_INSTR_1 */
1519 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1520 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1521 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1522 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1523 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1524 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1525
1526 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1527 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1528 /* RB_DEPTH_CONTROL */
1529 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1530
1531 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1532 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1533 /* RB_STENCIL_CONTROL */
1534 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1535 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1537 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1539 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1540 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1541 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1542
1543 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1544 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1545 /* RB_MODE_CONTROL */
1546 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1547 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1548
1549 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1550 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1551 /* RB_RENDER_CONTROL */
1552 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1553 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1554
1555 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1556 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1557 /* RB_MSAA_CONTROL */
1558 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1559 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1560
1561 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1562 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1563 /* RB_MRT_CONTROL0 */
1564 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1565 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1566 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1567 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1568
1569 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1570 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1571 /* RB_MRT_BLENDCONTROL0 */
1572 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1573 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1574 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1576 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1577 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1578 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1579 /* RB_MRT_CONTROL1 */
1580 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1581 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1582 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1583
1584 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1585 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1586 /* RB_MRT_BLENDCONTROL1 */
1587 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1588 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1589 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1590 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1591 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1592 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1593 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1594 /* RB_MRT_CONTROL2 */
1595 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1596 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1597 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1598
1599 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1600 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1601 /* RB_MRT_BLENDCONTROL2 */
1602 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1603 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1604 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1605 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1606 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1607 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1608 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1609 /* RB_MRT_CONTROL3 */
1610 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1611 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1612 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1613
1614 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1615 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1616 /* RB_MRT_BLENDCONTROL3 */
1617 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1618 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1619 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1620 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1621 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1622 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1623 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1624
1625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1626 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1627 /* VFD_INDEX_MIN */
1628 *cmds++ = 0x00000000;
1629 /* VFD_INDEX_MAX */
1630 *cmds++ = 0xFFFFFFFF;
1631 /* VFD_INDEX_OFFSET */
1632 *cmds++ = 0x00000000;
1633 /* TPL1_TP_VS_TEX_OFFSET */
1634 *cmds++ = 0x00000000;
1635
1636 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1637 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1638 /* VFD_VS_THREADING_THRESHOLD */
1639 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1640 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1641
1642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1643 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1644 /* TPL1_TP_VS_TEX_OFFSET */
1645 *cmds++ = 0x00000000;
1646
1647 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1648 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1649 /* TPL1_TP_FS_TEX_OFFSET */
1650 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1651 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1652 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1653
1654 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1655 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1656 /* GRAS_SC_CONTROL */
1657 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1658
1659 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1660 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1661 /* GRAS_SU_MODE_CONTROL */
1662 *cmds++ = 0x00000000;
1663
1664 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1665 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1666 /* GRAS_SC_WINDOW_SCISSOR_TL */
1667 *cmds++ = 0x00000000;
1668 /* GRAS_SC_WINDOW_SCISSOR_BR */
1669 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1670 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1671
1672 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1673 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1674 /* GRAS_SC_SCREEN_SCISSOR_TL */
1675 *cmds++ = 0x00000000;
1676 /* GRAS_SC_SCREEN_SCISSOR_BR */
1677 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1678 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1679
1680 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1681 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1682 /* GRAS_CL_VPORT_XOFFSET */
1683 *cmds++ = 0x00000000;
1684 /* GRAS_CL_VPORT_XSCALE */
1685 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1686 /* GRAS_CL_VPORT_YOFFSET */
1687 *cmds++ = 0x00000000;
1688 /* GRAS_CL_VPORT_YSCALE */
1689 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1690
1691 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1692 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1693 /* GRAS_CL_VPORT_ZOFFSET */
1694 *cmds++ = 0x00000000;
1695 /* GRAS_CL_VPORT_ZSCALE */
1696 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1697
1698 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1699 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1700 /* GRAS_CL_CLIP_CNTL */
1701 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1702
1703 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1704 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1705 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1706 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1707
1708 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1709 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1710 /* PC_PRIM_VTX_CONTROL */
1711 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1712 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1713 PC_DRAW_TRIANGLES) |
1714 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1715 PC_DRAW_TRIANGLES) |
1716 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1717
1718 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1719 *cmds++ = 0x00000000; /* Viz query info */
1720 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1721 PC_DI_SRC_SEL_AUTO_INDEX,
1722 PC_DI_INDEX_SIZE_16_BIT,
1723 PC_DI_IGNORE_VISIBILITY);
1724 *cmds++ = 0x00000002; /* Num indices */
1725
1726 /* Create indirect buffer command for above command sequence */
1727 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1728
1729 return cmds;
1730}
1731
1732static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1733 struct adreno_context *drawctxt)
1734{
1735 unsigned int *start = tmp_ctx.cmd;
1736 unsigned int *cmd = start;
1737 unsigned int *lcc_start;
1738
1739 int i;
1740
1741 /* Flush HLSQ lazy updates */
1742 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1743 *cmd++ = 0x7; /* HLSQ_FLUSH */
1744 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1745 *cmd++ = 0;
1746
1747 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1748 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1749 *cmd++ = (unsigned int)
1750 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1751 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1752 0; /* No end addr for full invalidate */
1753
1754 lcc_start = cmd;
1755
1756 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1757 cmd++;
1758
1759#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1760 /* Force mismatch */
1761 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1762#else
1763 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1764#endif
1765
1766 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1767 cmd = reg_range(cmd, context_register_ranges[i * 2],
1768 context_register_ranges[i * 2 + 1]);
1769 }
1770
1771 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1772 (cmd - lcc_start) - 1);
1773
1774#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1775 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1776#else
1777 lcc_start[2] |= (1 << 24) | (4 << 16);
1778#endif
1779
1780 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1781 *cmd++ = cp_type0_packet(global_registers[i], 1);
1782 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1783 *cmd++ = 0x00000000;
1784 }
1785
1786 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1787 tmp_ctx.cmd = cmd;
1788}
1789
1790static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1791 struct adreno_context *drawctxt)
1792{
1793 unsigned int *cmd = tmp_ctx.cmd;
1794 unsigned int *start = cmd;
1795 unsigned int mode = 4; /* Indirect mode */
1796 unsigned int stateblock;
1797 unsigned int numunits;
1798 unsigned int statetype;
1799
1800 drawctxt->cond_execs[2].hostptr = cmd;
1801 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1802 *cmd++ = 0;
1803 drawctxt->cond_execs[3].hostptr = cmd;
1804 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1805 *cmd++ = 0;
1806
1807#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1808 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1809 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1810 *cmd++ = 4 << 16;
1811 *cmd++ = 0x0;
1812#endif
1813 /* HLSQ full update */
1814 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1815 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1816 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1817
1818#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1819 /* Re-enable shadowing */
1820 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1821 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1822 *cmd++ = (4 << 16) | (1 << 24);
1823 *cmd++ = 0x0;
1824#endif
1825
1826 /* Load vertex shader constants */
1827 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1828 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1829 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1830 *cmd++ = 0x0000ffff;
1831 *cmd++ = 3; /* EXEC_COUNT */
1832 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1833 drawctxt->constant_load_commands[0].hostptr = cmd;
1834 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1835 &drawctxt->gpustate);
1836
1837 /*
1838 From fixup:
1839
1840 mode = 4 (indirect)
1841 stateblock = 4 (Vertex constants)
1842 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1843
1844 From register spec:
1845 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1846
1847 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1848 */
1849
1850 *cmd++ = 0; /* ord1 */
1851 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1852
1853 /* Load fragment shader constants */
1854 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1855 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1856 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1857 *cmd++ = 0x0000ffff;
1858 *cmd++ = 3; /* EXEC_COUNT */
1859 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1860 drawctxt->constant_load_commands[1].hostptr = cmd;
1861 drawctxt->constant_load_commands[1].gpuaddr =
1862 virt2gpu(cmd, &drawctxt->gpustate);
1863 /*
1864 From fixup:
1865
1866 mode = 4 (indirect)
1867 stateblock = 6 (Fragment constants)
1868 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1869
1870 From register spec:
1871 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1872
1873 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1874 */
1875
1876 *cmd++ = 0; /* ord1 */
1877 drawctxt->constant_load_commands[2].hostptr = cmd;
1878 drawctxt->constant_load_commands[2].gpuaddr =
1879 virt2gpu(cmd, &drawctxt->gpustate);
1880 /*
1881 From fixup:
1882 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1883 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1884
1885 From register spec:
1886 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1887 start offset in on chip RAM,
1888 128bit aligned
1889
1890 ord2 = base + offset | 1
1891 Because of the base alignment we can use
1892 ord2 = base | offset | 1
1893 */
1894 *cmd++ = 0; /* ord2 */
1895
1896 /* Restore VS texture memory objects */
1897 stateblock = 0;
1898 statetype = 1;
1899 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1900
1901 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1902 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1903 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1904 & 0xfffffffc) | statetype;
1905
1906 /* Restore VS texture mipmap addresses */
1907 stateblock = 1;
1908 statetype = 1;
1909 numunits = TEX_SIZE_MIPMAP / 4;
1910 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1911 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1912 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1913 & 0xfffffffc) | statetype;
1914
1915 /* Restore VS texture sampler objects */
1916 stateblock = 0;
1917 statetype = 0;
1918 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1919 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1920 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1921 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1922 & 0xfffffffc) | statetype;
1923
1924 /* Restore FS texture memory objects */
1925 stateblock = 2;
1926 statetype = 1;
1927 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1928 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1929 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1930 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1931 & 0xfffffffc) | statetype;
1932
1933 /* Restore FS texture mipmap addresses */
1934 stateblock = 3;
1935 statetype = 1;
1936 numunits = TEX_SIZE_MIPMAP / 4;
1937 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1938 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1939 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1940 & 0xfffffffc) | statetype;
1941
1942 /* Restore FS texture sampler objects */
1943 stateblock = 2;
1944 statetype = 0;
1945 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1946 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1947 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1948 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1949 & 0xfffffffc) | statetype;
1950
1951 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1952 tmp_ctx.cmd = cmd;
1953}
1954
1955static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1956 struct adreno_context *drawctxt)
1957{
1958 unsigned int *cmd = tmp_ctx.cmd;
1959 unsigned int *start = cmd;
1960
1961 /* Vertex shader */
1962 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1963 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1964 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1965 *cmd++ = 1;
1966 *cmd++ = 3; /* EXEC_COUNT */
1967
1968 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1969 drawctxt->shader_load_commands[0].hostptr = cmd;
1970 drawctxt->shader_load_commands[0].gpuaddr =
1971 virt2gpu(cmd, &drawctxt->gpustate);
1972 /*
1973 From fixup:
1974
1975 mode = 4 (indirect)
1976 stateblock = 4 (Vertex shader)
1977 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1978
1979 From regspec:
1980 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1981 If bit31 is 1, it means overflow
1982 or any long shader.
1983
1984 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1985 */
1986 *cmd++ = 0; /*ord1 */
1987 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1988
1989 /* Fragment shader */
1990 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1991 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1992 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1993 *cmd++ = 1;
1994 *cmd++ = 3; /* EXEC_COUNT */
1995
1996 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1997 drawctxt->shader_load_commands[1].hostptr = cmd;
1998 drawctxt->shader_load_commands[1].gpuaddr =
1999 virt2gpu(cmd, &drawctxt->gpustate);
2000 /*
2001 From fixup:
2002
2003 mode = 4 (indirect)
2004 stateblock = 6 (Fragment shader)
2005 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2006
2007 From regspec:
2008 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2009 If bit31 is 1, it means overflow
2010 or any long shader.
2011
2012 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2013 */
2014 *cmd++ = 0; /*ord1 */
2015 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2016 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2017
2018 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2019 tmp_ctx.cmd = cmd;
2020}
2021
2022static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2023 struct adreno_context *drawctxt)
2024{
2025 unsigned int *cmd = tmp_ctx.cmd;
2026 unsigned int *start = cmd;
2027
2028 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2029 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2030 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2031 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2032 = virt2gpu(cmd, &drawctxt->gpustate);
2033 *cmd++ = 0;
2034
2035 /* Create indirect buffer command for above command sequence */
2036 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2037
2038 tmp_ctx.cmd = cmd;
2039}
2040
2041/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2042static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2043 struct adreno_context *drawctxt)
2044{
2045 unsigned int *cmd = tmp_ctx.cmd;
2046 unsigned int *start = cmd;
2047
2048#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2049 /* Save shader sizes */
2050 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2051 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2052 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2053
2054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2055 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2056 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2057
2058 /* Save constant sizes */
2059 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2060 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2061 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2062
2063 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2064 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2065 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2066
2067 /* Save constant offsets */
2068 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2069 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2070 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2071#else
2072 /* Save shader sizes */
2073 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2074 30, (4 << 19) | (4 << 16),
2075 drawctxt->shader_load_commands[0].gpuaddr);
2076
2077 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2078 30, (6 << 19) | (4 << 16),
2079 drawctxt->shader_load_commands[1].gpuaddr);
2080
2081 /* Save constant sizes */
2082 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2083 23, (4 << 19) | (4 << 16),
2084 drawctxt->constant_load_commands[0].gpuaddr);
2085
2086 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2087 23, (6 << 19) | (4 << 16),
2088 drawctxt->constant_load_commands[1].gpuaddr);
2089
2090 /* Modify constant restore conditionals */
2091 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2092 0, 0, drawctxt->cond_execs[2].gpuaddr);
2093
2094 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2095 0, 0, drawctxt->cond_execs[3].gpuaddr);
2096
2097 /* Save fragment constant shadow offset */
2098 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2099 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2100 drawctxt->constant_load_commands[2].gpuaddr);
2101#endif
2102
2103 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2104 discard all the shader data */
2105
2106 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2107 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2108
2109 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2110
2111 tmp_ctx.cmd = cmd;
2112}
2113
2114static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2115 struct adreno_context *drawctxt)
2116{
2117 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2118
2119 build_regrestore_cmds(adreno_dev, drawctxt);
2120 build_constantrestore_cmds(adreno_dev, drawctxt);
2121 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2122 build_regconstantsave_cmds(adreno_dev, drawctxt);
2123 build_shader_save_cmds(adreno_dev, drawctxt);
2124 build_shader_restore_cmds(adreno_dev, drawctxt);
2125 build_restore_fixup_cmds(adreno_dev, drawctxt);
2126 build_save_fixup_cmds(adreno_dev, drawctxt);
2127
2128 return 0;
2129}
2130
2131/* create buffers for saving/restoring registers, constants, & GMEM */
2132static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2133 struct adreno_context *drawctxt)
2134{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002135 int result;
2136
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002137 calc_gmemsize(&drawctxt->context_gmem_shadow,
2138 adreno_dev->gmemspace.sizebytes);
2139 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2140
Jordan Crousea7ec4212012-02-04 10:23:52 -07002141 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2142 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002143
Jordan Crousea7ec4212012-02-04 10:23:52 -07002144 if (result)
2145 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002146
2147 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2148 &tmp_ctx.cmd);
2149
2150 /* Dow we need to idle? */
2151 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2152
2153 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2154 &drawctxt->context_gmem_shadow);
2155 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2156 &drawctxt->context_gmem_shadow);
2157
2158 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2159 KGSL_CACHE_OP_FLUSH);
2160
Jordan Crousea7ec4212012-02-04 10:23:52 -07002161 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2162
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002163 return 0;
2164}
2165
2166static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2167 struct adreno_context *drawctxt)
2168{
2169 int ret;
2170
2171 /*
2172 * Allocate memory for the GPU state and the context commands.
2173 * Despite the name, this is much more then just storage for
2174 * the gpustate. This contains command space for gmem save
2175 * and texture and vertex buffer storage too
2176 */
2177
2178 ret = kgsl_allocate(&drawctxt->gpustate,
2179 drawctxt->pagetable, CONTEXT_SIZE);
2180
2181 if (ret)
2182 return ret;
2183
2184 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2185 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2186
2187 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2188 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2189 if (ret)
2190 goto done;
2191
2192 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2193 }
2194
2195 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2196 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2197
2198done:
2199 if (ret)
2200 kgsl_sharedmem_free(&drawctxt->gpustate);
2201
2202 return ret;
2203}
2204
2205static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2206 struct adreno_context *context)
2207{
2208 struct kgsl_device *device = &adreno_dev->dev;
2209
2210 if (context == NULL)
2211 return;
2212
2213 if (context->flags & CTXT_FLAGS_GPU_HANG)
2214 KGSL_CTXT_WARN(device,
2215 "Current active context has caused gpu hang\n");
2216
2217 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2218 /* Fixup self modifying IBs for save operations */
2219 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2220 context->save_fixup, 3);
2221
2222 /* save registers and constants. */
2223 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2224 context->regconstant_save, 3);
2225
2226 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2227 /* Save shader instructions */
2228 adreno_ringbuffer_issuecmds(device,
2229 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2230
2231 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2232 }
2233 }
2234
2235 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2236 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2237 /*
2238 * Save GMEM (note: changes shader. shader must
2239 * already be saved.)
2240 */
2241
2242 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2243 context->context_gmem_shadow.
2244 gmem_save, 3);
2245 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2246 }
2247}
2248
2249static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2250 struct adreno_context *context)
2251{
2252 struct kgsl_device *device = &adreno_dev->dev;
2253 unsigned int cmds[5];
2254
2255 if (context == NULL) {
2256 /* No context - set the default pagetable and thats it */
2257 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2258 return;
2259 }
2260
2261 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2262
2263 cmds[0] = cp_nop_packet(1);
2264 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2265 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2266 cmds[3] = device->memstore.gpuaddr +
2267 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2268 cmds[4] = (unsigned int)context;
2269 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2270 kgsl_mmu_setstate(device, context->pagetable);
2271
2272 /*
2273 * Restore GMEM. (note: changes shader.
2274 * Shader must not already be restored.)
2275 */
2276
2277 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2278 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2279 context->context_gmem_shadow.
2280 gmem_restore, 3);
2281 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2282 }
2283
2284 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2285 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2286 context->reg_restore, 3);
2287
2288 /* Fixup self modifying IBs for restore operations */
2289 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2290 context->restore_fixup, 3);
2291
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->constant_restore, 3);
2294
2295 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2296 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2297 context->shader_restore, 3);
2298
2299 /* Restore HLSQ_CONTROL_0 register */
2300 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2301 context->hlsqcontrol_restore, 3);
2302 }
2303}
2304
2305static void a3xx_rb_init(struct adreno_device *adreno_dev,
2306 struct adreno_ringbuffer *rb)
2307{
2308 unsigned int *cmds, cmds_gpu;
2309 cmds = adreno_ringbuffer_allocspace(rb, 18);
2310 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2311
2312 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2313 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2314 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2315 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2327 /* Protected mode control - turned off for A3XX */
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2331
2332 adreno_ringbuffer_submit(rb);
2333}
2334
2335static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2336{
2337 struct kgsl_device *device = &adreno_dev->dev;
2338 const char *err = "";
2339
2340 switch (bit) {
2341 case A3XX_INT_RBBM_AHB_ERROR: {
2342 unsigned int reg;
2343
2344 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2345
2346 /*
2347 * Return the word address of the erroring register so that it
2348 * matches the register specification
2349 */
2350
2351 KGSL_DRV_CRIT(device,
2352 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2353 reg & (1 << 28) ? "WRITE" : "READ",
2354 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2355 (reg >> 24) & 0x3);
2356
2357 /* Clear the error */
2358 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2359 return;
2360 }
2361 case A3XX_INT_RBBM_REG_TIMEOUT:
2362 err = "RBBM: AHB register timeout";
2363 break;
2364 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2365 err = "RBBM: ME master split timeout";
2366 break;
2367 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2368 err = "RBBM: PFP master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2371 err = "RBBM: ATB bus oveflow";
2372 break;
2373 case A3XX_INT_VFD_ERROR:
2374 err = "VFD: Out of bounds access";
2375 break;
2376 case A3XX_INT_CP_T0_PACKET_IN_IB:
2377 err = "ringbuffer TO packet in IB interrupt";
2378 break;
2379 case A3XX_INT_CP_OPCODE_ERROR:
2380 err = "ringbuffer opcode error interrupt";
2381 break;
2382 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2383 err = "ringbuffer reserved bit error interrupt";
2384 break;
2385 case A3XX_INT_CP_HW_FAULT:
2386 err = "ringbuffer hardware fault";
2387 break;
2388 case A3XX_INT_CP_REG_PROTECT_FAULT:
2389 err = "ringbuffer protected mode error interrupt";
2390 break;
2391 case A3XX_INT_CP_AHB_ERROR_HALT:
2392 err = "ringbuffer AHB error interrupt";
2393 break;
2394 case A3XX_INT_MISC_HANG_DETECT:
2395 err = "MISC: GPU hang detected";
2396 break;
2397 case A3XX_INT_UCHE_OOB_ACCESS:
2398 err = "UCHE: Out of bounds access";
2399 break;
2400 }
2401
2402 KGSL_DRV_CRIT(device, "%s\n", err);
2403 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2404}
2405
2406static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2407{
2408 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2409
2410 if (irq == A3XX_INT_CP_RB_INT) {
2411 kgsl_sharedmem_writel(&rb->device->memstore,
2412 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2413 wmb();
2414 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2415 }
2416
2417 wake_up_interruptible_all(&rb->device->wait_queue);
2418
2419 /* Schedule work to free mem and issue ibs */
2420 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2421
2422 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2423 rb->device->id, NULL);
2424}
2425
2426#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2427
2428#define A3XX_INT_MASK \
2429 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2430 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
2431 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \
2432 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \
2433 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
2434 (1 << A3XX_INT_VFD_ERROR) | \
2435 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2436 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2437 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2438 (1 << A3XX_INT_CP_HW_FAULT) | \
2439 (1 << A3XX_INT_CP_IB1_INT) | \
2440 (1 << A3XX_INT_CP_IB2_INT) | \
2441 (1 << A3XX_INT_CP_RB_INT) | \
2442 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2443 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
2444 (1 << A3XX_INT_MISC_HANG_DETECT) | \
2445 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2446
2447static struct {
2448 void (*func)(struct adreno_device *, int);
2449} a3xx_irq_funcs[] = {
2450 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2451 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2452 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2453 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2457 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2460 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2461 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2462 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2463 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2464 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2465 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2466 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2467 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2468 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2469 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2470 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2471 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2472 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2473 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
2474 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 24 - MISC_HANG_DETECT */
2475 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2476 /* 26 to 31 - Unused */
2477};
2478
2479static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2480{
2481 struct kgsl_device *device = &adreno_dev->dev;
2482 irqreturn_t ret = IRQ_NONE;
2483 unsigned int status, tmp;
2484 int i;
2485
2486 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2487
2488 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2489 if (tmp & 1) {
2490 if (a3xx_irq_funcs[i].func != NULL) {
2491 a3xx_irq_funcs[i].func(adreno_dev, i);
2492 ret = IRQ_HANDLED;
2493 } else {
2494 KGSL_DRV_CRIT(device,
2495 "Unhandled interrupt bit %x\n", i);
2496 }
2497 }
2498
2499 tmp >>= 1;
2500 }
2501
2502 if (status)
2503 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2504 status);
2505 return ret;
2506}
2507
2508static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2509{
2510 struct kgsl_device *device = &adreno_dev->dev;
2511
2512 if (state)
2513 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2514 else
2515 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2516}
2517
2518static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2519{
2520 struct kgsl_device *device = &adreno_dev->dev;
2521 unsigned int reg, val;
2522
2523 /* Freeze the counter */
2524 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2525 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2526 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2527
2528 /* Read the value */
2529 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2530
2531 /* Reset the counter */
2532 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2533 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2534
2535 /* Re-enable the counter */
2536 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2537 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2538 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2539
2540 return val;
2541}
2542
2543static void a3xx_start(struct adreno_device *adreno_dev)
2544{
2545 struct kgsl_device *device = &adreno_dev->dev;
2546
Jordan Crousec6b3a992012-02-04 10:23:51 -07002547 /* GMEM size on A320 is 512K */
2548 adreno_dev->gmemspace.sizebytes = SZ_512K;
2549
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002550 /* Reset the core */
2551 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2552 0x00000001);
2553 msleep(20);
2554
2555 /*
2556 * enable fixed master AXI port of 0x0 for all clients to keep
2557 * traffic from going to random places
2558 */
2559
2560 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2561 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2562 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2563
2564 /* Make all blocks contribute to the GPU BUSY perf counter */
2565 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2566
2567 /* Enable the RBBM error reporting bits. This lets us get
2568 useful information on failure */
2569
2570 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2571
2572 /* Enable AHB error reporting */
2573 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
2574
2575 /* Turn on the power counters */
2576 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
2577}
2578
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002579/* Defined in adreno_a3xx_snapshot.c */
2580void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2581 int *remain, int hang);
2582
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002583struct adreno_gpudev adreno_a3xx_gpudev = {
2584 .reg_rbbm_status = A3XX_RBBM_STATUS,
2585 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2586 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2587
2588 .ctxt_create = a3xx_drawctxt_create,
2589 .ctxt_save = a3xx_drawctxt_save,
2590 .ctxt_restore = a3xx_drawctxt_restore,
2591 .rb_init = a3xx_rb_init,
2592 .irq_control = a3xx_irq_control,
2593 .irq_handler = a3xx_irq_handler,
2594 .busy_cycles = a3xx_busy_cycles,
2595 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002596 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002597};