blob: 9d9e09e13fa2f15a46458ed6f3d3e756e824cc6d [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070037 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070038 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
62 0x2750, 0x2756, 0x2760, 0x2760,
63};
64
65const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
66
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070067/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
68 * functions.
69 */
70
71#define _SET(_shift, _val) ((_val) << (_shift))
72
73/*
74 ****************************************************************************
75 *
76 * Context state shadow structure:
77 *
78 * +---------------------+------------+-------------+---------------------+---+
79 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
80 * +---------------------+------------+-------------+---------------------+---+
81 *
82 * 8K - ALU Constant Shadow (8K aligned)
83 * 4K - H/W Register Shadow (8K aligned)
84 * 5K - Command and Vertex Buffers
85 * 8K - Shader Instruction Shadow
86 * ~6K - Texture Constant Shadow
87 *
88 *
89 ***************************************************************************
90 */
91
92/* Sizes of all sections in state shadow memory */
93#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
94#define REG_SHADOW_SIZE (4*1024) /* 4KB */
95#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
96#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
97#define TEX_SIZE_MIPMAP 1936 /* bytes */
98#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
99#define TEX_SHADOW_SIZE \
100 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
101 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
102#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
103
104/* Total context size, excluding GMEM shadow */
105#define CONTEXT_SIZE \
106 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
107 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
108 TEX_SHADOW_SIZE)
109
110/* Offsets to different sections in context shadow memory */
111#define REG_OFFSET ALU_SHADOW_SIZE
112#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
113#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
114#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
115#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
116#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
117#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
118#define FS_TEX_OFFSET_MEM_OBJECTS \
119 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
120#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122
123/* The offset for fragment shader data in HLSQ context */
124#define SSIZE (16*1024)
125
126#define HLSQ_SAMPLER_OFFSET 0x000
127#define HLSQ_MEMOBJ_OFFSET 0x400
128#define HLSQ_MIPMAP_OFFSET 0x800
129
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700130/* Use shadow RAM */
131#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700132
133#define REG_TO_MEM_LOOP_COUNT_SHIFT 15
134
135#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
136 vis_cull_mode) \
137 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
138 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
139 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
140 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
141 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
142 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
143
144/*
145 * List of context registers (starting from dword offset 0x2000).
146 * Each line contains start and end of a range of registers.
147 */
148static const unsigned int context_register_ranges[] = {
149 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
150 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
151 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
152 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
153 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
154 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
155 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
156 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
157 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
158 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
159 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
160 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
161 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
162 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
163 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
164 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
165 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
166 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
167 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
168 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
169 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
170 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
171 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
172 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
173 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
174 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
175 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
176 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
177 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
178 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
179 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
180};
181
182/* Global registers that need to be saved separately */
183static const unsigned int global_registers[] = {
184 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
185 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
186 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
187 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
188 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
189 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
190 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
191 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
192 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
193 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
194 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
195 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
196 A3XX_VSC_BIN_SIZE,
197 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
198 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
199 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
200 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
201 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
202 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
203 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
204 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
205 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
206 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
207 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
208 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
209 A3XX_VSC_SIZE_ADDRESS
210};
211
212#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
213
214/* A scratchpad used to build commands during context create */
215static struct tmp_ctx {
216 unsigned int *cmd; /* Next available dword in C&V buffer */
217
218 /* Addresses in comamnd buffer where registers are saved */
219 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
220 uint32_t gmem_base; /* Base GPU address of GMEM */
221} tmp_ctx;
222
223#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
224/*
225 * Function for executing dest = ( (reg & and) ROL rol ) | or
226 */
227static unsigned int *rmw_regtomem(unsigned int *cmd,
228 unsigned int reg, unsigned int and,
229 unsigned int rol, unsigned int or,
230 unsigned int dest)
231{
232 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
233 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
234 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
235 *cmd++ = 0x00000000; /* AND value */
236 *cmd++ = reg; /* OR address */
237
238 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
239 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
240 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
241 *cmd++ = and; /* AND value */
242 *cmd++ = or; /* OR value */
243
244 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
245 *cmd++ = A3XX_CP_SCRATCH_REG2;
246 *cmd++ = dest;
247
248 return cmd;
249}
250#endif
251
252static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
253 struct adreno_context *drawctxt)
254{
255 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700256 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700257 unsigned int i;
258
259 drawctxt->constant_save_commands[0].hostptr = cmd;
260 drawctxt->constant_save_commands[0].gpuaddr =
261 virt2gpu(cmd, &drawctxt->gpustate);
262 cmd++;
263
Jordan Crousea7ec4212012-02-04 10:23:52 -0700264 start = cmd;
265
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700266 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
267 *cmd++ = 0;
268
269#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
270 /*
271 * Context registers are already shadowed; just need to
272 * disable shadowing to prevent corruption.
273 */
274
275 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
276 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
277 *cmd++ = 4 << 16; /* regs, start=0 */
278 *cmd++ = 0x0; /* count = 0 */
279
280#else
281 /*
282 * Make sure the HW context has the correct register values before
283 * reading them.
284 */
285
286 /* Write context registers into shadow */
287 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
288 unsigned int start = context_register_ranges[i * 2];
289 unsigned int end = context_register_ranges[i * 2 + 1];
290 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
291 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
292 start;
293 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
294 & 0xFFFFE000) + (start - 0x2000) * 4;
295 }
296#endif
297
298 /* Need to handle some of the global registers separately */
299 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
300 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
301 *cmd++ = global_registers[i];
302 *cmd++ = tmp_ctx.reg_values[i];
303 }
304
305 /* Save vertex shader constants */
306 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
307 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
308 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
309 *cmd++ = 0x0000FFFF;
310 *cmd++ = 3; /* EXEC_COUNT */
311 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
312 drawctxt->constant_save_commands[1].hostptr = cmd;
313 drawctxt->constant_save_commands[1].gpuaddr =
314 virt2gpu(cmd, &drawctxt->gpustate);
315 /*
316 From fixup:
317
318 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
319 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
320
321 From register spec:
322 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
323 */
324 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
325 /* ALU constant shadow base */
326 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
327
328 /* Save fragment shader constants */
329 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
330 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
331 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
332 *cmd++ = 0x0000FFFF;
333 *cmd++ = 3; /* EXEC_COUNT */
334 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
335 drawctxt->constant_save_commands[2].hostptr = cmd;
336 drawctxt->constant_save_commands[2].gpuaddr =
337 virt2gpu(cmd, &drawctxt->gpustate);
338 /*
339 From fixup:
340
341 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
342 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
343
344 From register spec:
345 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
346 */
347 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
348
349 /*
350 From fixup:
351
352 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
353 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
354
355 From register spec:
356 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
357 start offset in on chip RAM,
358 128bit aligned
359
360 dst = base + offset
361 Because of the base alignment we can use
362 dst = base | offset
363 */
364 *cmd++ = 0; /* dst */
365
366 /* Save VS texture memory objects */
367 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
368 *cmd++ =
369 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
370 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
371 *cmd++ =
372 (drawctxt->gpustate.gpuaddr +
373 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
374
375 /* Save VS texture mipmap pointers */
376 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
377 *cmd++ =
378 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
379 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
380 *cmd++ =
381 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
382
383 /* Save VS texture sampler objects */
384 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
385 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
386 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
387 *cmd++ =
388 (drawctxt->gpustate.gpuaddr +
389 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
390
391 /* Save FS texture memory objects */
392 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
393 *cmd++ =
394 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
395 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
396 *cmd++ =
397 (drawctxt->gpustate.gpuaddr +
398 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
399
400 /* Save FS texture mipmap pointers */
401 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
402 *cmd++ =
403 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
404 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
405 *cmd++ =
406 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
407
408 /* Save FS texture sampler objects */
409 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
410 *cmd++ =
411 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
412 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
413 *cmd++ =
414 (drawctxt->gpustate.gpuaddr +
415 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
416
417 /* Create indirect buffer command for above command sequence */
418 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
419
420 tmp_ctx.cmd = cmd;
421}
422
423/* Copy GMEM contents to system memory shadow. */
424static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
425 struct adreno_context *drawctxt,
426 struct gmem_shadow_t *shadow)
427{
428 unsigned int *cmds = tmp_ctx.cmd;
429 unsigned int *start = cmds;
430
431 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
432 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
433
434 /* RB_MODE_CONTROL */
435 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
436 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
437 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
438 /* RB_RENDER_CONTROL */
439 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
440 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
441
442 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
443 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
444 /* RB_COPY_CONTROL */
445 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
446 RB_CLEAR_MODE_RESOLVE) |
447 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
448 tmp_ctx.gmem_base >> 14);
449 /* RB_COPY_DEST_BASE */
450 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
451 shadow->gmemshadow.gpuaddr >> 5);
452 /* RB_COPY_DEST_PITCH */
453 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
454 (shadow->pitch * 4) / 32);
455 /* RB_COPY_DEST_INFO */
456 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
457 RB_TILINGMODE_LINEAR) |
458 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
459 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
460 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
461
462 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
463 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
464 /* GRAS_SC_CONTROL */
465 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
466
467 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
468 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
469 /* VFD_CONTROL_0 */
470 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
471 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
472 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
473 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
474 /* VFD_CONTROL_1 */
475 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
476 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
477 _SET(VFD_CTRLREG1_REGID4INST, 252);
478
479 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
480 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
481 /* VFD_FETCH_INSTR_0_0 */
482 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
483 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
484 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
485 /* VFD_FETCH_INSTR_1_0 */
486 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
487 shadow->quad_vertices.gpuaddr);
488
489 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
490 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
491 /* VFD_DECODE_INSTR_0 */
492 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
493 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
494 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
495 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
496 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
497 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
498
499 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
500 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
501 /* HLSQ_CONTROL_0_REG */
502 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
503 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
504 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
505 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
506 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
507 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
508 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
509 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
510 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
511 /* HLSQ_CONTROL_1_REG */
512 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
513 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
514 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
515 /* HLSQ_CONTROL_2_REG */
516 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
517 /* HLSQ_CONTROL_3_REG */
518 *cmds++ = 0x00000000;
519
520 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
521 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
522 /* HLSQ_VS_CONTROL_REG */
523 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
524 /* HLSQ_FS_CONTROL_REG */
525 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
526 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
527 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
528 /* HLSQ_CONST_VSPRESV_RANGE_REG */
529 *cmds++ = 0x00000000;
530 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
531 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
532 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
533
534 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
535 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
536 /* SP_FS_LENGTH_REG */
537 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
538
539 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
540 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
541 /* SP_SP_CTRL_REG */
542 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
543 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
544
545 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
546 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
547 /* SP_VS_CTRL_REG0 */
548 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
549 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
550 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
551 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
552 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
553 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
554 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
555 /* SP_VS_CTRL_REG1 */
556 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
557 /* SP_VS_PARAM_REG */
558 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
559 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
560 /* SP_VS_OUT_REG_0 */
561 *cmds++ = 0x00000000;
562 /* SP_VS_OUT_REG_1 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_2 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_3 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_4 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_5 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_6 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_7 */
575 *cmds++ = 0x00000000;
576
577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
578 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
579 /* SP_VS_VPC_DST_REG_0 */
580 *cmds++ = 0x00000000;
581 /* SP_VS_VPC_DST_REG_1 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_2 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_3 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_OBJ_OFFSET_REG */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_START_REG */
590 *cmds++ = 0x00000000;
591
592 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
593 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
594 /* SP_VS_LENGTH_REG */
595 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
596 /* SP_FS_CTRL_REG0 */
597 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
598 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
599 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
600 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
601 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
602 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
603 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
604 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
605 /* SP_FS_CTRL_REG1 */
606 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
607 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
608 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
609 /* SP_FS_OBJ_OFFSET_REG */
610 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
611 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
612 /* SP_FS_OBJ_START_REG */
613 *cmds++ = 0x00000000;
614
615 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
616 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
617 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
618 *cmds++ = 0x00000000;
619 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
620 *cmds++ = 0x00000000;
621
622 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
623 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
624 /* SP_FS_OUTPUT_REG */
625 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
626
627 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
628 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
629 /* SP_FS_MRT_REG_0 */
630 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
631 /* SP_FS_MRT_REG_1 */
632 *cmds++ = 0x00000000;
633 /* SP_FS_MRT_REG_2 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_3 */
636 *cmds++ = 0x00000000;
637
638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
639 *cmds++ = CP_REG(A3XX_VPC_ATTR);
640 /* VPC_ATTR */
641 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
642 _SET(VPC_VPCATTR_LMSIZE, 1);
643 /* VPC_PACK */
644 *cmds++ = 0x00000000;
645 /* VPC_VARRYING_INTERUPT_MODE_0 */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_1 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_2 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_3 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARYING_PS_REPL_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_3 */
660 *cmds++ = 0x00000000;
661
662 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
663 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
664 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
665 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
666 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
667 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
668 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
669
670 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
671 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
672 /* end; */
673 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
674 /* nop; */
675 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678
679 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
680 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
681 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
682 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
683 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
684 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
685 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
686
687 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
688 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
689 /* end; */
690 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
691 /* nop; */
692 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
693 /* nop; */
694 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
695
696 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
697 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
698 /* RB_MSAA_CONTROL */
699 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
700 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
701
702 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
703 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
704 /* RB_DEPTH_CONTROL */
705 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
706
707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
708 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
709 /* RB_MRT_CONTROL0 */
710 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
711 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
712 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
713 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
714
715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
716 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
717 /* RB_MRT_BLEND_CONTROL0 */
718 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
719 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
720 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
721 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
723 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
724 /* RB_MRT_CONTROL1 */
725 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
726 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
727 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
728
729 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
730 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
731 /* RB_MRT_BLEND_CONTROL1 */
732 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
733 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
734 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
735 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
736 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
737 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
738 /* RB_MRT_CONTROL2 */
739 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
740 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
741 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
742
743 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
744 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
745 /* RB_MRT_BLEND_CONTROL2 */
746 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
747 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
748 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
749 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
750 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
751 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
752 /* RB_MRT_CONTROL3 */
753 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
754 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
755 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
756
757 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
758 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
759 /* RB_MRT_BLEND_CONTROL3 */
760 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
761 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
762 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
763 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
764 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
765 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
766
767 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
768 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
769 /* VFD_INDEX_MIN */
770 *cmds++ = 0x00000000;
771 /* VFD_INDEX_MAX */
772 *cmds++ = 0xFFFFFFFF;
773 /* VFD_INSTANCEID_OFFSET */
774 *cmds++ = 0x00000000;
775 /* VFD_INDEX_OFFSET */
776 *cmds++ = 0x00000000;
777
778 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
779 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
780 /* VFD_VS_THREADING_THRESHOLD */
781 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
782 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
783
784 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
785 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
786 /* TPL1_TP_VS_TEX_OFFSET */
787 *cmds++ = 0;
788
789 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
790 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
791 /* TPL1_TP_FS_TEX_OFFSET */
792 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
793 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
794 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
795
796 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
797 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
798 /* PC_PRIM_VTX_CNTL */
799 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
800 PC_DRAW_TRIANGLES) |
801 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
802 PC_DRAW_TRIANGLES) |
803 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
804
805 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
806 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
807 /* GRAS_SC_WINDOW_SCISSOR_TL */
808 *cmds++ = 0x00000000;
809 /* GRAS_SC_WINDOW_SCISSOR_BR */
810 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
811 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
812
813 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
814 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
815 /* GRAS_SC_SCREEN_SCISSOR_TL */
816 *cmds++ = 0x00000000;
817 /* GRAS_SC_SCREEN_SCISSOR_BR */
818 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
819 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
820
821 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
822 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
823 /* GRAS_CL_VPORT_XOFFSET */
824 *cmds++ = 0x00000000;
825 /* GRAS_CL_VPORT_XSCALE */
826 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
827 /* GRAS_CL_VPORT_YOFFSET */
828 *cmds++ = 0x00000000;
829 /* GRAS_CL_VPORT_YSCALE */
830 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
831
832 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
833 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
834 /* GRAS_CL_VPORT_ZOFFSET */
835 *cmds++ = 0x00000000;
836 /* GRAS_CL_VPORT_ZSCALE */
837 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
838
839 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
840 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
841 /* GRAS_CL_CLIP_CNTL */
842 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
843 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
844 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
845 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
847
848 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
849 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
850 /* GRAS_CL_GB_CLIP_ADJ */
851 *cmds++ = 0x00000000;
852
853 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
854 *cmds++ = 0x00000000;
855
856 /*
857 * Resolve using two draw calls with a dummy register
858 * write in between. This is a HLM workaround
859 * that should be removed later.
860 */
861 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
862 *cmds++ = 0x00000000; /* Viz query info */
863 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
864 PC_DI_SRC_SEL_IMMEDIATE,
865 PC_DI_INDEX_SIZE_32_BIT,
866 PC_DI_IGNORE_VISIBILITY);
867 *cmds++ = 0x00000003; /* Num indices */
868 *cmds++ = 0x00000000; /* Index 0 */
869 *cmds++ = 0x00000001; /* Index 1 */
870 *cmds++ = 0x00000002; /* Index 2 */
871
872 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
873 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
874 *cmds++ = 0x00000000;
875
876 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
877 *cmds++ = 0x00000000; /* Viz query info */
878 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
879 PC_DI_SRC_SEL_IMMEDIATE,
880 PC_DI_INDEX_SIZE_32_BIT,
881 PC_DI_IGNORE_VISIBILITY);
882 *cmds++ = 0x00000003; /* Num indices */
883 *cmds++ = 0x00000002; /* Index 0 */
884 *cmds++ = 0x00000001; /* Index 1 */
885 *cmds++ = 0x00000003; /* Index 2 */
886
887 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
888 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
889 *cmds++ = 0x00000000;
890
891 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
892 *cmds++ = 0x00000000;
893
894 /* Create indirect buffer command for above command sequence */
895 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
896
897 return cmds;
898}
899
900static void build_shader_save_cmds(struct adreno_device *adreno_dev,
901 struct adreno_context *drawctxt)
902{
903 unsigned int *cmd = tmp_ctx.cmd;
904 unsigned int *start;
905
906 /* Reserve space for boolean values used for COND_EXEC packet */
907 drawctxt->cond_execs[0].hostptr = cmd;
908 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
909 *cmd++ = 0;
910 drawctxt->cond_execs[1].hostptr = cmd;
911 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
912 *cmd++ = 0;
913
914 drawctxt->shader_save_commands[0].hostptr = cmd;
915 drawctxt->shader_save_commands[0].gpuaddr =
916 virt2gpu(cmd, &drawctxt->gpustate);
917 *cmd++ = 0;
918 drawctxt->shader_save_commands[1].hostptr = cmd;
919 drawctxt->shader_save_commands[1].gpuaddr =
920 virt2gpu(cmd, &drawctxt->gpustate);
921 *cmd++ = 0;
922
923 start = cmd;
924
925 /* Save vertex shader */
926
927 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
928 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
929 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
930 *cmd++ = 0x0000FFFF;
931 *cmd++ = 3; /* EXEC_COUNT */
932
933 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
934 drawctxt->shader_save_commands[2].hostptr = cmd;
935 drawctxt->shader_save_commands[2].gpuaddr =
936 virt2gpu(cmd, &drawctxt->gpustate);
937 /*
938 From fixup:
939
940 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
941
942 From regspec:
943 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
944 If bit31 is 1, it means overflow
945 or any long shader.
946
947 src = (HLSQ_SHADOW_BASE + 0x1000)/4
948 */
949 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
950 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
951
952 /* Save fragment shader */
953 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
954 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
955 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
956 *cmd++ = 0x0000FFFF;
957 *cmd++ = 3; /* EXEC_COUNT */
958
959 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
960 drawctxt->shader_save_commands[3].hostptr = cmd;
961 drawctxt->shader_save_commands[3].gpuaddr =
962 virt2gpu(cmd, &drawctxt->gpustate);
963 /*
964 From fixup:
965
966 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
967
968 From regspec:
969 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
970 If bit31 is 1, it means overflow
971 or any long shader.
972
973 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
974 From regspec:
975
976 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
977 First instruction of the whole shader will be stored from
978 the offset in instruction cache, unit = 256bits, a cache line.
979 It can start from 0 if no VS available.
980
981 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
982 */
983 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
984 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
985 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
986
987 /* Create indirect buffer command for above command sequence */
988 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
989
990 tmp_ctx.cmd = cmd;
991}
992
993/*
994 * Make an IB to modify context save IBs with the correct shader instruction
995 * and constant sizes and offsets.
996 */
997
998static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
999 struct adreno_context *drawctxt)
1000{
1001 unsigned int *cmd = tmp_ctx.cmd;
1002 unsigned int *start = cmd;
1003
1004 /* Flush HLSQ lazy updates */
1005 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1006 *cmd++ = 0x7; /* HLSQ_FLUSH */
1007 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1008 *cmd++ = 0;
1009
1010 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1011 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1012 *cmd++ = (unsigned int)
1013 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1014 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1015 0; /* No end addr for full invalidate */
1016
1017 /* Make sure registers are flushed */
1018 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1019 *cmd++ = 0;
1020
1021#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1022
1023 /* Save shader sizes */
1024 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1025 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1026 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1027
1028 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1029 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1030 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1031
1032 /* Save shader offsets */
1033 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1034 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1035 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1036
1037 /* Save constant sizes */
1038 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1039 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1040 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1043 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1044
1045 /* Save FS constant offset */
1046 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1047 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1048 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1049
1050
1051 /* Save VS instruction store mode */
1052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1053 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1054 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1055
1056 /* Save FS instruction store mode */
1057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1058 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1059 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1060#else
1061
1062 /* Shader save */
1063 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1064 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1065 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1066 drawctxt->shader_save_commands[2].gpuaddr);
1067
1068 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1069 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1070 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1071 *cmd++ = 0x00000000; /* AND value */
1072 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1073 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1074 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1075 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1076 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1077 A3XX_CP_SCRATCH_REG2;
1078 *cmd++ = 0x7f000000; /* AND value */
1079 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1080
1081 /*
1082 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1083 * SP_FS_OBJ_OFFSET_REG
1084 */
1085
1086 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1087 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1088 *cmd++ = 0x00000000; /* AND value */
1089 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1090 /*
1091 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1092 * 0x00000000
1093 */
1094 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1095 *cmd++ = A3XX_CP_SCRATCH_REG3;
1096 *cmd++ = 0xfe000000; /* AND value */
1097 *cmd++ = 0x00000000; /* OR value */
1098 /*
1099 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1100 */
1101 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1102 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1103 *cmd++ = 0xffffffff; /* AND value */
1104 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1105
1106 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1107 *cmd++ = A3XX_CP_SCRATCH_REG2;
1108 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1109
1110 /* Constant save */
1111 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1112 17, (HLSQ_SHADOW_BASE + 0x2000) / 4,
1113 drawctxt->constant_save_commands[1].gpuaddr);
1114
1115 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1116 17, (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
1117 drawctxt->constant_save_commands[2].gpuaddr);
1118
1119 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1120 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1121 drawctxt->constant_save_commands[2].gpuaddr
1122 + sizeof(unsigned int));
1123
1124 /* Modify constant save conditionals */
1125 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1126 0, 0, drawctxt->cond_execs[2].gpuaddr);
1127
1128 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1129 0, 0, drawctxt->cond_execs[3].gpuaddr);
1130
1131 /* Save VS instruction store mode */
1132
1133 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1134 31, 0, drawctxt->cond_execs[0].gpuaddr);
1135
1136 /* Save FS instruction store mode */
1137 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1138 31, 0, drawctxt->cond_execs[1].gpuaddr);
1139
1140#endif
1141
1142 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1143
1144 tmp_ctx.cmd = cmd;
1145}
1146
1147/****************************************************************************/
1148/* Functions to build context restore IBs */
1149/****************************************************************************/
1150
1151static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1152 struct adreno_context *drawctxt,
1153 struct gmem_shadow_t *shadow)
1154{
1155 unsigned int *cmds = tmp_ctx.cmd;
1156 unsigned int *start = cmds;
1157
1158 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1159 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1160 /* HLSQ_CONTROL_0_REG */
1161 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1162 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1163 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1164 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1165 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1166 /* HLSQ_CONTROL_1_REG */
1167 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1168 /* HLSQ_CONTROL_2_REG */
1169 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1170 /* HLSQ_CONTROL3_REG */
1171 *cmds++ = 0x00000000;
1172
1173 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1174 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1175 /* RB_MRT_BUF_INFO0 */
1176 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1177 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1178 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1179 (shadow->gmem_pitch * 4 * 8) / 256);
1180 /* RB_MRT_BUF_BASE0 */
1181 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1182
1183 /* Texture samplers */
1184 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1185 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1186 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1187 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1188 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1189 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1190 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1191 *cmds++ = 0x00000240;
1192 *cmds++ = 0x00000000;
1193
1194 /* Texture memobjs */
1195 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1196 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1197 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1198 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1199 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1200 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1201 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1202 *cmds++ = 0x4cc06880;
1203 *cmds++ = shadow->height | (shadow->width << 14);
1204 *cmds++ = (shadow->pitch*4*8) << 9;
1205 *cmds++ = 0x00000000;
1206
1207 /* Mipmap bases */
1208 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1209 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1210 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1211 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1212 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1213 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1214 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1215 *cmds++ = shadow->gmemshadow.gpuaddr;
1216 *cmds++ = 0x00000000;
1217 *cmds++ = 0x00000000;
1218 *cmds++ = 0x00000000;
1219 *cmds++ = 0x00000000;
1220 *cmds++ = 0x00000000;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229
1230 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1231 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1232 /* HLSQ_VS_CONTROL_REG */
1233 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1234 /* HLSQ_FS_CONTROL_REG */
1235 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1236 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1237 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1238 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1239 *cmds++ = 0x00000000;
1240 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1241 *cmds++ = 0x00000000;
1242
1243 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1244 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1245 /* SP_FS_LENGTH_REG */
1246 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1247
1248 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1249 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1250 /* SP_VS_CTRL_REG0 */
1251 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1252 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1253 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1254 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1255 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1256 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1257 /* SP_VS_CTRL_REG1 */
1258 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1259 /* SP_VS_PARAM_REG */
1260 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1261 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1262 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1263 /* SP_VS_OUT_REG0 */
1264 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1265 /* SP_VS_OUT_REG1 */
1266 *cmds++ = 0x00000000;
1267 /* SP_VS_OUT_REG2 */
1268 *cmds++ = 0x00000000;
1269 /* SP_VS_OUT_REG3 */
1270 *cmds++ = 0x00000000;
1271 /* SP_VS_OUT_REG4 */
1272 *cmds++ = 0x00000000;
1273 /* SP_VS_OUT_REG5 */
1274 *cmds++ = 0x00000000;
1275 /* SP_VS_OUT_REG6 */
1276 *cmds++ = 0x00000000;
1277 /* SP_VS_OUT_REG7 */
1278 *cmds++ = 0x00000000;
1279
1280 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1281 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1282 /* SP_VS_VPC_DST_REG0 */
1283 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1284 /* SP_VS_VPC_DST_REG1 */
1285 *cmds++ = 0x00000000;
1286 /* SP_VS_VPC_DST_REG2 */
1287 *cmds++ = 0x00000000;
1288 /* SP_VS_VPC_DST_REG3 */
1289 *cmds++ = 0x00000000;
1290 /* SP_VS_OBJ_OFFSET_REG */
1291 *cmds++ = 0x00000000;
1292 /* SP_VS_OBJ_START_REG */
1293 *cmds++ = 0x00000000;
1294
1295 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1296 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1297 /* SP_VS_LENGTH_REG */
1298 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1299 /* SP_FS_CTRL_REG0 */
1300 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1301 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1302 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1303 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1304 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1305 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1306 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1307 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1308 /* SP_FS_CTRL_REG1 */
1309 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1310 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1311 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1312 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001313 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1314 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001315 /* SP_FS_OBJ_START_REG */
1316 *cmds++ = 0x00000000;
1317
1318 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1319 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1320 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1321 *cmds++ = 0x00000000;
1322 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1323 *cmds++ = 0x00000000;
1324
1325 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1326 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1327 /* SP_FS_OUT_REG */
1328 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1329
Jordan Crousea7ec4212012-02-04 10:23:52 -07001330 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001331 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1332 /* SP_FS_MRT_REG0 */
1333 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1334 /* SP_FS_MRT_REG1 */
1335 *cmds++ = 0;
1336 /* SP_FS_MRT_REG2 */
1337 *cmds++ = 0;
1338 /* SP_FS_MRT_REG3 */
1339 *cmds++ = 0;
1340
1341 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1342 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1343 /* VPC_ATTR */
1344 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1345 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1346 _SET(VPC_VPCATTR_LMSIZE, 1);
1347 /* VPC_PACK */
1348 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1349 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1350 /* VPC_VARYING_INTERP_MODE_0 */
1351 *cmds++ = 0x00000000;
1352 /* VPC_VARYING_INTERP_MODE1 */
1353 *cmds++ = 0x00000000;
1354 /* VPC_VARYING_INTERP_MODE2 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_IINTERP_MODE3 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARRYING_PS_REPL_MODE_0 */
1359 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1360 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1361 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1362 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1363 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1375 /* VPC_VARRYING_PS_REPL_MODE_1 */
1376 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1392 /* VPC_VARRYING_PS_REPL_MODE_2 */
1393 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1409 /* VPC_VARRYING_PS_REPL_MODE_3 */
1410 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1426
Jordan Crousea7ec4212012-02-04 10:23:52 -07001427 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001428 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1429 /* SP_SP_CTRL_REG */
1430 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1431
1432 /* Load vertex shader */
1433 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1434 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1435 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1436 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1437 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1438 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1439 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1440 /* (sy)end; */
1441 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1442 /* nop; */
1443 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1444 /* nop; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448
1449 /* Load fragment shader */
1450 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1451 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1452 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1453 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1454 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1455 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1456 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1457 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1458 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1459 /* (rpt5)nop; */
1460 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1461 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1462 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1463 /* (sy)mov.f32f32 r1.x, r0.x; */
1464 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1465 /* mov.f32f32 r1.y, r0.y; */
1466 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1467 /* mov.f32f32 r1.z, r0.z; */
1468 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1469 /* mov.f32f32 r1.w, r0.w; */
1470 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1471 /* end; */
1472 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1473
1474 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1475 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1476 /* VFD_CONTROL_0 */
1477 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1478 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1479 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1480 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1481 /* VFD_CONTROL_1 */
1482 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1483 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1484 _SET(VFD_CTRLREG1_REGID4INST, 252);
1485
1486 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1487 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1488 /* VFD_FETCH_INSTR_0_0 */
1489 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1490 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1491 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1492 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1493 /* VFD_FETCH_INSTR_1_0 */
1494 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1495 shadow->quad_vertices_restore.gpuaddr);
1496 /* VFD_FETCH_INSTR_0_1 */
1497 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1498 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1499 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1500 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1501 /* VFD_FETCH_INSTR_1_1 */
1502 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1503 shadow->quad_vertices_restore.gpuaddr + 16);
1504
1505 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1506 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1507 /* VFD_DECODE_INSTR_0 */
1508 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1509 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1510 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1511 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1512 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1513 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1514 /* VFD_DECODE_INSTR_1 */
1515 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1516 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1517 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1518 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1519 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1520 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1521
1522 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1523 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1524 /* RB_DEPTH_CONTROL */
1525 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1526
1527 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1528 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1529 /* RB_STENCIL_CONTROL */
1530 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1531 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1532 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1533 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1534 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1535 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1537 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1538
1539 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1540 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1541 /* RB_MODE_CONTROL */
1542 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1543 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1544
1545 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1546 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1547 /* RB_RENDER_CONTROL */
1548 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1549 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1550
1551 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1552 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1553 /* RB_MSAA_CONTROL */
1554 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1555 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1556
1557 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1558 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1559 /* RB_MRT_CONTROL0 */
1560 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1561 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1562 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1563 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1564
1565 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1566 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1567 /* RB_MRT_BLENDCONTROL0 */
1568 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1569 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1570 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1571 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1572 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1573 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1574 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1575 /* RB_MRT_CONTROL1 */
1576 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1577 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1578 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1579
1580 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1581 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1582 /* RB_MRT_BLENDCONTROL1 */
1583 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1584 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1585 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1586 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1587 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1588 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1589 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1590 /* RB_MRT_CONTROL2 */
1591 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1592 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1593 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1594
1595 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1596 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1597 /* RB_MRT_BLENDCONTROL2 */
1598 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1599 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1600 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1601 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1602 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1603 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1604 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1605 /* RB_MRT_CONTROL3 */
1606 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1607 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1608 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1609
1610 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1611 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1612 /* RB_MRT_BLENDCONTROL3 */
1613 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1614 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1615 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1616 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1617 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1618 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1619 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1620
1621 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1622 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1623 /* VFD_INDEX_MIN */
1624 *cmds++ = 0x00000000;
1625 /* VFD_INDEX_MAX */
1626 *cmds++ = 0xFFFFFFFF;
1627 /* VFD_INDEX_OFFSET */
1628 *cmds++ = 0x00000000;
1629 /* TPL1_TP_VS_TEX_OFFSET */
1630 *cmds++ = 0x00000000;
1631
1632 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1633 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1634 /* VFD_VS_THREADING_THRESHOLD */
1635 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1636 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1637
1638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1639 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1640 /* TPL1_TP_VS_TEX_OFFSET */
1641 *cmds++ = 0x00000000;
1642
1643 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1644 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1645 /* TPL1_TP_FS_TEX_OFFSET */
1646 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1647 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1648 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1649
1650 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1651 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1652 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001653 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1654 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1655 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001656
1657 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1658 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1659 /* GRAS_SU_MODE_CONTROL */
1660 *cmds++ = 0x00000000;
1661
1662 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1663 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1664 /* GRAS_SC_WINDOW_SCISSOR_TL */
1665 *cmds++ = 0x00000000;
1666 /* GRAS_SC_WINDOW_SCISSOR_BR */
1667 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1668 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1669
1670 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1671 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1672 /* GRAS_SC_SCREEN_SCISSOR_TL */
1673 *cmds++ = 0x00000000;
1674 /* GRAS_SC_SCREEN_SCISSOR_BR */
1675 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1676 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1677
1678 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1679 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1680 /* GRAS_CL_VPORT_XOFFSET */
1681 *cmds++ = 0x00000000;
1682 /* GRAS_CL_VPORT_XSCALE */
1683 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1684 /* GRAS_CL_VPORT_YOFFSET */
1685 *cmds++ = 0x00000000;
1686 /* GRAS_CL_VPORT_YSCALE */
1687 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1688
1689 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1690 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1691 /* GRAS_CL_VPORT_ZOFFSET */
1692 *cmds++ = 0x00000000;
1693 /* GRAS_CL_VPORT_ZSCALE */
1694 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1695
1696 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1697 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1698 /* GRAS_CL_CLIP_CNTL */
1699 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1700
1701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1702 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1703 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1704 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1705
1706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1707 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1708 /* PC_PRIM_VTX_CONTROL */
1709 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1710 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1711 PC_DRAW_TRIANGLES) |
1712 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1713 PC_DRAW_TRIANGLES) |
1714 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1715
1716 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1717 *cmds++ = 0x00000000; /* Viz query info */
1718 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1719 PC_DI_SRC_SEL_AUTO_INDEX,
1720 PC_DI_INDEX_SIZE_16_BIT,
1721 PC_DI_IGNORE_VISIBILITY);
1722 *cmds++ = 0x00000002; /* Num indices */
1723
1724 /* Create indirect buffer command for above command sequence */
1725 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1726
1727 return cmds;
1728}
1729
1730static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1731 struct adreno_context *drawctxt)
1732{
1733 unsigned int *start = tmp_ctx.cmd;
1734 unsigned int *cmd = start;
1735 unsigned int *lcc_start;
1736
1737 int i;
1738
1739 /* Flush HLSQ lazy updates */
1740 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1741 *cmd++ = 0x7; /* HLSQ_FLUSH */
1742 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1743 *cmd++ = 0;
1744
1745 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1746 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1747 *cmd++ = (unsigned int)
1748 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1749 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1750 0; /* No end addr for full invalidate */
1751
1752 lcc_start = cmd;
1753
1754 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1755 cmd++;
1756
1757#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1758 /* Force mismatch */
1759 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1760#else
1761 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1762#endif
1763
1764 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1765 cmd = reg_range(cmd, context_register_ranges[i * 2],
1766 context_register_ranges[i * 2 + 1]);
1767 }
1768
1769 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1770 (cmd - lcc_start) - 1);
1771
1772#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1773 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1774#else
1775 lcc_start[2] |= (1 << 24) | (4 << 16);
1776#endif
1777
1778 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1779 *cmd++ = cp_type0_packet(global_registers[i], 1);
1780 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1781 *cmd++ = 0x00000000;
1782 }
1783
1784 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1785 tmp_ctx.cmd = cmd;
1786}
1787
1788static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1789 struct adreno_context *drawctxt)
1790{
1791 unsigned int *cmd = tmp_ctx.cmd;
1792 unsigned int *start = cmd;
1793 unsigned int mode = 4; /* Indirect mode */
1794 unsigned int stateblock;
1795 unsigned int numunits;
1796 unsigned int statetype;
1797
1798 drawctxt->cond_execs[2].hostptr = cmd;
1799 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1800 *cmd++ = 0;
1801 drawctxt->cond_execs[3].hostptr = cmd;
1802 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1803 *cmd++ = 0;
1804
1805#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1806 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1807 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1808 *cmd++ = 4 << 16;
1809 *cmd++ = 0x0;
1810#endif
1811 /* HLSQ full update */
1812 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1813 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1814 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1815
1816#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1817 /* Re-enable shadowing */
1818 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1819 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1820 *cmd++ = (4 << 16) | (1 << 24);
1821 *cmd++ = 0x0;
1822#endif
1823
1824 /* Load vertex shader constants */
1825 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1826 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1827 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1828 *cmd++ = 0x0000ffff;
1829 *cmd++ = 3; /* EXEC_COUNT */
1830 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1831 drawctxt->constant_load_commands[0].hostptr = cmd;
1832 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1833 &drawctxt->gpustate);
1834
1835 /*
1836 From fixup:
1837
1838 mode = 4 (indirect)
1839 stateblock = 4 (Vertex constants)
1840 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1841
1842 From register spec:
1843 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1844
1845 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1846 */
1847
1848 *cmd++ = 0; /* ord1 */
1849 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1850
1851 /* Load fragment shader constants */
1852 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1853 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1854 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1855 *cmd++ = 0x0000ffff;
1856 *cmd++ = 3; /* EXEC_COUNT */
1857 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1858 drawctxt->constant_load_commands[1].hostptr = cmd;
1859 drawctxt->constant_load_commands[1].gpuaddr =
1860 virt2gpu(cmd, &drawctxt->gpustate);
1861 /*
1862 From fixup:
1863
1864 mode = 4 (indirect)
1865 stateblock = 6 (Fragment constants)
1866 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1867
1868 From register spec:
1869 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1870
1871 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1872 */
1873
1874 *cmd++ = 0; /* ord1 */
1875 drawctxt->constant_load_commands[2].hostptr = cmd;
1876 drawctxt->constant_load_commands[2].gpuaddr =
1877 virt2gpu(cmd, &drawctxt->gpustate);
1878 /*
1879 From fixup:
1880 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1881 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1882
1883 From register spec:
1884 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1885 start offset in on chip RAM,
1886 128bit aligned
1887
1888 ord2 = base + offset | 1
1889 Because of the base alignment we can use
1890 ord2 = base | offset | 1
1891 */
1892 *cmd++ = 0; /* ord2 */
1893
1894 /* Restore VS texture memory objects */
1895 stateblock = 0;
1896 statetype = 1;
1897 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1898
1899 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1900 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1901 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1902 & 0xfffffffc) | statetype;
1903
1904 /* Restore VS texture mipmap addresses */
1905 stateblock = 1;
1906 statetype = 1;
1907 numunits = TEX_SIZE_MIPMAP / 4;
1908 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1909 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1910 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1911 & 0xfffffffc) | statetype;
1912
1913 /* Restore VS texture sampler objects */
1914 stateblock = 0;
1915 statetype = 0;
1916 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1917 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1918 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1919 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1920 & 0xfffffffc) | statetype;
1921
1922 /* Restore FS texture memory objects */
1923 stateblock = 2;
1924 statetype = 1;
1925 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1926 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1927 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1928 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1929 & 0xfffffffc) | statetype;
1930
1931 /* Restore FS texture mipmap addresses */
1932 stateblock = 3;
1933 statetype = 1;
1934 numunits = TEX_SIZE_MIPMAP / 4;
1935 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1936 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1937 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1938 & 0xfffffffc) | statetype;
1939
1940 /* Restore FS texture sampler objects */
1941 stateblock = 2;
1942 statetype = 0;
1943 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1944 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1945 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1946 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1947 & 0xfffffffc) | statetype;
1948
1949 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1950 tmp_ctx.cmd = cmd;
1951}
1952
1953static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1954 struct adreno_context *drawctxt)
1955{
1956 unsigned int *cmd = tmp_ctx.cmd;
1957 unsigned int *start = cmd;
1958
1959 /* Vertex shader */
1960 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1961 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1962 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1963 *cmd++ = 1;
1964 *cmd++ = 3; /* EXEC_COUNT */
1965
1966 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1967 drawctxt->shader_load_commands[0].hostptr = cmd;
1968 drawctxt->shader_load_commands[0].gpuaddr =
1969 virt2gpu(cmd, &drawctxt->gpustate);
1970 /*
1971 From fixup:
1972
1973 mode = 4 (indirect)
1974 stateblock = 4 (Vertex shader)
1975 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1976
1977 From regspec:
1978 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1979 If bit31 is 1, it means overflow
1980 or any long shader.
1981
1982 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1983 */
1984 *cmd++ = 0; /*ord1 */
1985 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1986
1987 /* Fragment shader */
1988 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1989 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1990 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1991 *cmd++ = 1;
1992 *cmd++ = 3; /* EXEC_COUNT */
1993
1994 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1995 drawctxt->shader_load_commands[1].hostptr = cmd;
1996 drawctxt->shader_load_commands[1].gpuaddr =
1997 virt2gpu(cmd, &drawctxt->gpustate);
1998 /*
1999 From fixup:
2000
2001 mode = 4 (indirect)
2002 stateblock = 6 (Fragment shader)
2003 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2004
2005 From regspec:
2006 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2007 If bit31 is 1, it means overflow
2008 or any long shader.
2009
2010 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2011 */
2012 *cmd++ = 0; /*ord1 */
2013 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2014 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2015
2016 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2017 tmp_ctx.cmd = cmd;
2018}
2019
2020static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2021 struct adreno_context *drawctxt)
2022{
2023 unsigned int *cmd = tmp_ctx.cmd;
2024 unsigned int *start = cmd;
2025
2026 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2027 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2028 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2029 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2030 = virt2gpu(cmd, &drawctxt->gpustate);
2031 *cmd++ = 0;
2032
2033 /* Create indirect buffer command for above command sequence */
2034 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2035
2036 tmp_ctx.cmd = cmd;
2037}
2038
2039/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2040static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2041 struct adreno_context *drawctxt)
2042{
2043 unsigned int *cmd = tmp_ctx.cmd;
2044 unsigned int *start = cmd;
2045
2046#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2047 /* Save shader sizes */
2048 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2049 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2050 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2051
2052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2053 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2054 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2055
2056 /* Save constant sizes */
2057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2058 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2059 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2060
2061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2062 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2063 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2064
2065 /* Save constant offsets */
2066 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2067 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2068 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2069#else
2070 /* Save shader sizes */
2071 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2072 30, (4 << 19) | (4 << 16),
2073 drawctxt->shader_load_commands[0].gpuaddr);
2074
2075 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2076 30, (6 << 19) | (4 << 16),
2077 drawctxt->shader_load_commands[1].gpuaddr);
2078
2079 /* Save constant sizes */
2080 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2081 23, (4 << 19) | (4 << 16),
2082 drawctxt->constant_load_commands[0].gpuaddr);
2083
2084 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2085 23, (6 << 19) | (4 << 16),
2086 drawctxt->constant_load_commands[1].gpuaddr);
2087
2088 /* Modify constant restore conditionals */
2089 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2090 0, 0, drawctxt->cond_execs[2].gpuaddr);
2091
2092 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2093 0, 0, drawctxt->cond_execs[3].gpuaddr);
2094
2095 /* Save fragment constant shadow offset */
2096 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2097 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2098 drawctxt->constant_load_commands[2].gpuaddr);
2099#endif
2100
2101 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2102 discard all the shader data */
2103
2104 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2105 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2106
2107 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2108
2109 tmp_ctx.cmd = cmd;
2110}
2111
2112static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2113 struct adreno_context *drawctxt)
2114{
2115 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2116
2117 build_regrestore_cmds(adreno_dev, drawctxt);
2118 build_constantrestore_cmds(adreno_dev, drawctxt);
2119 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2120 build_regconstantsave_cmds(adreno_dev, drawctxt);
2121 build_shader_save_cmds(adreno_dev, drawctxt);
2122 build_shader_restore_cmds(adreno_dev, drawctxt);
2123 build_restore_fixup_cmds(adreno_dev, drawctxt);
2124 build_save_fixup_cmds(adreno_dev, drawctxt);
2125
2126 return 0;
2127}
2128
2129/* create buffers for saving/restoring registers, constants, & GMEM */
2130static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2131 struct adreno_context *drawctxt)
2132{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002133 int result;
2134
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002135 calc_gmemsize(&drawctxt->context_gmem_shadow,
2136 adreno_dev->gmemspace.sizebytes);
2137 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2138
Jordan Crousea7ec4212012-02-04 10:23:52 -07002139 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2140 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002141
Jordan Crousea7ec4212012-02-04 10:23:52 -07002142 if (result)
2143 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002144
2145 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2146 &tmp_ctx.cmd);
2147
2148 /* Dow we need to idle? */
2149 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2150
2151 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2152 &drawctxt->context_gmem_shadow);
2153 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2154 &drawctxt->context_gmem_shadow);
2155
2156 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2157 KGSL_CACHE_OP_FLUSH);
2158
Jordan Crousea7ec4212012-02-04 10:23:52 -07002159 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2160
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002161 return 0;
2162}
2163
2164static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2165 struct adreno_context *drawctxt)
2166{
2167 int ret;
2168
2169 /*
2170 * Allocate memory for the GPU state and the context commands.
2171 * Despite the name, this is much more then just storage for
2172 * the gpustate. This contains command space for gmem save
2173 * and texture and vertex buffer storage too
2174 */
2175
2176 ret = kgsl_allocate(&drawctxt->gpustate,
2177 drawctxt->pagetable, CONTEXT_SIZE);
2178
2179 if (ret)
2180 return ret;
2181
2182 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2183 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2184
2185 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2186 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2187 if (ret)
2188 goto done;
2189
2190 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2191 }
2192
2193 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2194 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2195
2196done:
2197 if (ret)
2198 kgsl_sharedmem_free(&drawctxt->gpustate);
2199
2200 return ret;
2201}
2202
2203static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2204 struct adreno_context *context)
2205{
2206 struct kgsl_device *device = &adreno_dev->dev;
2207
2208 if (context == NULL)
2209 return;
2210
2211 if (context->flags & CTXT_FLAGS_GPU_HANG)
2212 KGSL_CTXT_WARN(device,
2213 "Current active context has caused gpu hang\n");
2214
2215 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2216 /* Fixup self modifying IBs for save operations */
2217 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2218 context->save_fixup, 3);
2219
2220 /* save registers and constants. */
2221 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2222 context->regconstant_save, 3);
2223
2224 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2225 /* Save shader instructions */
2226 adreno_ringbuffer_issuecmds(device,
2227 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2228
2229 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2230 }
2231 }
2232
2233 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2234 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2235 /*
2236 * Save GMEM (note: changes shader. shader must
2237 * already be saved.)
2238 */
2239
2240 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2241 context->context_gmem_shadow.
2242 gmem_save, 3);
2243 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2244 }
2245}
2246
2247static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2248 struct adreno_context *context)
2249{
2250 struct kgsl_device *device = &adreno_dev->dev;
2251 unsigned int cmds[5];
2252
2253 if (context == NULL) {
2254 /* No context - set the default pagetable and thats it */
2255 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2256 return;
2257 }
2258
2259 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2260
2261 cmds[0] = cp_nop_packet(1);
2262 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2263 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2264 cmds[3] = device->memstore.gpuaddr +
2265 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2266 cmds[4] = (unsigned int)context;
2267 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2268 kgsl_mmu_setstate(device, context->pagetable);
2269
2270 /*
2271 * Restore GMEM. (note: changes shader.
2272 * Shader must not already be restored.)
2273 */
2274
2275 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2276 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2277 context->context_gmem_shadow.
2278 gmem_restore, 3);
2279 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2280 }
2281
2282 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2283 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2284 context->reg_restore, 3);
2285
2286 /* Fixup self modifying IBs for restore operations */
2287 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2288 context->restore_fixup, 3);
2289
2290 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2291 context->constant_restore, 3);
2292
2293 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2294 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2295 context->shader_restore, 3);
2296
2297 /* Restore HLSQ_CONTROL_0 register */
2298 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2299 context->hlsqcontrol_restore, 3);
2300 }
2301}
2302
2303static void a3xx_rb_init(struct adreno_device *adreno_dev,
2304 struct adreno_ringbuffer *rb)
2305{
2306 unsigned int *cmds, cmds_gpu;
2307 cmds = adreno_ringbuffer_allocspace(rb, 18);
2308 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2309
2310 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2311 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2312 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2313 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2314 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2315 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2325 /* Protected mode control - turned off for A3XX */
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329
2330 adreno_ringbuffer_submit(rb);
2331}
2332
2333static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2334{
2335 struct kgsl_device *device = &adreno_dev->dev;
2336 const char *err = "";
2337
2338 switch (bit) {
2339 case A3XX_INT_RBBM_AHB_ERROR: {
2340 unsigned int reg;
2341
2342 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2343
2344 /*
2345 * Return the word address of the erroring register so that it
2346 * matches the register specification
2347 */
2348
2349 KGSL_DRV_CRIT(device,
2350 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2351 reg & (1 << 28) ? "WRITE" : "READ",
2352 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2353 (reg >> 24) & 0x3);
2354
2355 /* Clear the error */
2356 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2357 return;
2358 }
2359 case A3XX_INT_RBBM_REG_TIMEOUT:
2360 err = "RBBM: AHB register timeout";
2361 break;
2362 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2363 err = "RBBM: ME master split timeout";
2364 break;
2365 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2366 err = "RBBM: PFP master split timeout";
2367 break;
2368 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2369 err = "RBBM: ATB bus oveflow";
2370 break;
2371 case A3XX_INT_VFD_ERROR:
2372 err = "VFD: Out of bounds access";
2373 break;
2374 case A3XX_INT_CP_T0_PACKET_IN_IB:
2375 err = "ringbuffer TO packet in IB interrupt";
2376 break;
2377 case A3XX_INT_CP_OPCODE_ERROR:
2378 err = "ringbuffer opcode error interrupt";
2379 break;
2380 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2381 err = "ringbuffer reserved bit error interrupt";
2382 break;
2383 case A3XX_INT_CP_HW_FAULT:
2384 err = "ringbuffer hardware fault";
2385 break;
2386 case A3XX_INT_CP_REG_PROTECT_FAULT:
2387 err = "ringbuffer protected mode error interrupt";
2388 break;
2389 case A3XX_INT_CP_AHB_ERROR_HALT:
2390 err = "ringbuffer AHB error interrupt";
2391 break;
2392 case A3XX_INT_MISC_HANG_DETECT:
2393 err = "MISC: GPU hang detected";
2394 break;
2395 case A3XX_INT_UCHE_OOB_ACCESS:
2396 err = "UCHE: Out of bounds access";
2397 break;
2398 }
2399
2400 KGSL_DRV_CRIT(device, "%s\n", err);
2401 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2402}
2403
2404static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2405{
2406 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2407
2408 if (irq == A3XX_INT_CP_RB_INT) {
2409 kgsl_sharedmem_writel(&rb->device->memstore,
2410 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2411 wmb();
2412 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2413 }
2414
2415 wake_up_interruptible_all(&rb->device->wait_queue);
2416
2417 /* Schedule work to free mem and issue ibs */
2418 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2419
2420 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2421 rb->device->id, NULL);
2422}
2423
2424#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2425
2426#define A3XX_INT_MASK \
2427 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2428 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
2429 (1 << A3XX_INT_RBBM_ME_MS_TIMEOUT) | \
2430 (1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT) | \
2431 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
2432 (1 << A3XX_INT_VFD_ERROR) | \
2433 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2434 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2435 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2436 (1 << A3XX_INT_CP_HW_FAULT) | \
2437 (1 << A3XX_INT_CP_IB1_INT) | \
2438 (1 << A3XX_INT_CP_IB2_INT) | \
2439 (1 << A3XX_INT_CP_RB_INT) | \
2440 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2441 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002442 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2443
2444static struct {
2445 void (*func)(struct adreno_device *, int);
2446} a3xx_irq_funcs[] = {
2447 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2448 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2449 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2450 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2451 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2452 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2453 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2454 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2459 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2460 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2461 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2462 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2464 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2465 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2466 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2467 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2468 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2469 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2470 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002471 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002472 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2473 /* 26 to 31 - Unused */
2474};
2475
2476static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2477{
2478 struct kgsl_device *device = &adreno_dev->dev;
2479 irqreturn_t ret = IRQ_NONE;
2480 unsigned int status, tmp;
2481 int i;
2482
2483 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2484
2485 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2486 if (tmp & 1) {
2487 if (a3xx_irq_funcs[i].func != NULL) {
2488 a3xx_irq_funcs[i].func(adreno_dev, i);
2489 ret = IRQ_HANDLED;
2490 } else {
2491 KGSL_DRV_CRIT(device,
2492 "Unhandled interrupt bit %x\n", i);
2493 }
2494 }
2495
2496 tmp >>= 1;
2497 }
2498
2499 if (status)
2500 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2501 status);
2502 return ret;
2503}
2504
2505static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2506{
2507 struct kgsl_device *device = &adreno_dev->dev;
2508
2509 if (state)
2510 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2511 else
2512 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2513}
2514
2515static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2516{
2517 struct kgsl_device *device = &adreno_dev->dev;
2518 unsigned int reg, val;
2519
2520 /* Freeze the counter */
2521 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2522 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2523 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2524
2525 /* Read the value */
2526 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2527
2528 /* Reset the counter */
2529 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2530 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2531
2532 /* Re-enable the counter */
2533 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2534 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2535 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2536
2537 return val;
2538}
2539
2540static void a3xx_start(struct adreno_device *adreno_dev)
2541{
2542 struct kgsl_device *device = &adreno_dev->dev;
2543
Jordan Crousec6b3a992012-02-04 10:23:51 -07002544 /* GMEM size on A320 is 512K */
2545 adreno_dev->gmemspace.sizebytes = SZ_512K;
2546
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002547 /* Reset the core */
2548 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2549 0x00000001);
2550 msleep(20);
2551
2552 /*
2553 * enable fixed master AXI port of 0x0 for all clients to keep
2554 * traffic from going to random places
2555 */
2556
2557 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2558 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2559 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2560
2561 /* Make all blocks contribute to the GPU BUSY perf counter */
2562 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2563
2564 /* Enable the RBBM error reporting bits. This lets us get
2565 useful information on failure */
2566
2567 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2568
2569 /* Enable AHB error reporting */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08002570 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0x86FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002571
2572 /* Turn on the power counters */
2573 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002574
2575 /* Turn on hang detection - this spews a lot of useful information
2576 * into the RBBM registers on a hang */
2577
2578 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2579 (1 << 16) | 0xFFF);
2580
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002581}
2582
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002583/* Defined in adreno_a3xx_snapshot.c */
2584void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2585 int *remain, int hang);
2586
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002587struct adreno_gpudev adreno_a3xx_gpudev = {
2588 .reg_rbbm_status = A3XX_RBBM_STATUS,
2589 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2590 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2591
2592 .ctxt_create = a3xx_drawctxt_create,
2593 .ctxt_save = a3xx_drawctxt_save,
2594 .ctxt_restore = a3xx_drawctxt_restore,
2595 .rb_init = a3xx_rb_init,
2596 .irq_control = a3xx_irq_control,
2597 .irq_handler = a3xx_irq_handler,
2598 .busy_cycles = a3xx_busy_cycles,
2599 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002600 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002601};