blob: 1585db10b4894168cbe59545517524a38628a39e [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070037 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070038 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
62 0x2750, 0x2756, 0x2760, 0x2760,
63};
64
65const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
66
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070067/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
68 * functions.
69 */
70
71#define _SET(_shift, _val) ((_val) << (_shift))
72
73/*
74 ****************************************************************************
75 *
76 * Context state shadow structure:
77 *
78 * +---------------------+------------+-------------+---------------------+---+
79 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
80 * +---------------------+------------+-------------+---------------------+---+
81 *
82 * 8K - ALU Constant Shadow (8K aligned)
83 * 4K - H/W Register Shadow (8K aligned)
84 * 5K - Command and Vertex Buffers
85 * 8K - Shader Instruction Shadow
86 * ~6K - Texture Constant Shadow
87 *
88 *
89 ***************************************************************************
90 */
91
92/* Sizes of all sections in state shadow memory */
93#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
94#define REG_SHADOW_SIZE (4*1024) /* 4KB */
95#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
96#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
97#define TEX_SIZE_MIPMAP 1936 /* bytes */
98#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
99#define TEX_SHADOW_SIZE \
100 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
101 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
102#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
103
104/* Total context size, excluding GMEM shadow */
105#define CONTEXT_SIZE \
106 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
107 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
108 TEX_SHADOW_SIZE)
109
110/* Offsets to different sections in context shadow memory */
111#define REG_OFFSET ALU_SHADOW_SIZE
112#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
113#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
114#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
115#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
116#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
117#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
118#define FS_TEX_OFFSET_MEM_OBJECTS \
119 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
120#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122
123/* The offset for fragment shader data in HLSQ context */
124#define SSIZE (16*1024)
125
126#define HLSQ_SAMPLER_OFFSET 0x000
127#define HLSQ_MEMOBJ_OFFSET 0x400
128#define HLSQ_MIPMAP_OFFSET 0x800
129
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700130/* Use shadow RAM */
131#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700132
Jordan Croused0070882012-02-21 08:54:52 -0700133#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700134
135#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
136 vis_cull_mode) \
137 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
138 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
139 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
140 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
141 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
142 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
143
144/*
145 * List of context registers (starting from dword offset 0x2000).
146 * Each line contains start and end of a range of registers.
147 */
148static const unsigned int context_register_ranges[] = {
149 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
150 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
151 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
152 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
153 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
154 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
155 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
156 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
157 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
158 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
159 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
160 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
161 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
162 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
163 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
164 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
165 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
166 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
167 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
168 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
169 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
170 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
171 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
172 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
173 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
174 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
175 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
176 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
177 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
178 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
179 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
180};
181
182/* Global registers that need to be saved separately */
183static const unsigned int global_registers[] = {
184 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
185 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
186 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
187 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
188 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
189 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
190 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
191 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
192 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
193 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
194 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
195 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
196 A3XX_VSC_BIN_SIZE,
197 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
198 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
199 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
200 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
201 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
202 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
203 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
204 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
205 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
206 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
207 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
208 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
209 A3XX_VSC_SIZE_ADDRESS
210};
211
212#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
213
214/* A scratchpad used to build commands during context create */
215static struct tmp_ctx {
216 unsigned int *cmd; /* Next available dword in C&V buffer */
217
218 /* Addresses in comamnd buffer where registers are saved */
219 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
220 uint32_t gmem_base; /* Base GPU address of GMEM */
221} tmp_ctx;
222
223#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
224/*
225 * Function for executing dest = ( (reg & and) ROL rol ) | or
226 */
227static unsigned int *rmw_regtomem(unsigned int *cmd,
228 unsigned int reg, unsigned int and,
229 unsigned int rol, unsigned int or,
230 unsigned int dest)
231{
232 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
233 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
234 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
235 *cmd++ = 0x00000000; /* AND value */
236 *cmd++ = reg; /* OR address */
237
238 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
239 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
240 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
241 *cmd++ = and; /* AND value */
242 *cmd++ = or; /* OR value */
243
244 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
245 *cmd++ = A3XX_CP_SCRATCH_REG2;
246 *cmd++ = dest;
247
248 return cmd;
249}
250#endif
251
252static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
253 struct adreno_context *drawctxt)
254{
255 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700256 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700257 unsigned int i;
258
259 drawctxt->constant_save_commands[0].hostptr = cmd;
260 drawctxt->constant_save_commands[0].gpuaddr =
261 virt2gpu(cmd, &drawctxt->gpustate);
262 cmd++;
263
Jordan Crousea7ec4212012-02-04 10:23:52 -0700264 start = cmd;
265
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700266 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
267 *cmd++ = 0;
268
269#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
270 /*
271 * Context registers are already shadowed; just need to
272 * disable shadowing to prevent corruption.
273 */
274
275 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
276 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
277 *cmd++ = 4 << 16; /* regs, start=0 */
278 *cmd++ = 0x0; /* count = 0 */
279
280#else
281 /*
282 * Make sure the HW context has the correct register values before
283 * reading them.
284 */
285
286 /* Write context registers into shadow */
287 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
288 unsigned int start = context_register_ranges[i * 2];
289 unsigned int end = context_register_ranges[i * 2 + 1];
290 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
291 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
292 start;
293 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
294 & 0xFFFFE000) + (start - 0x2000) * 4;
295 }
296#endif
297
298 /* Need to handle some of the global registers separately */
299 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
300 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
301 *cmd++ = global_registers[i];
302 *cmd++ = tmp_ctx.reg_values[i];
303 }
304
305 /* Save vertex shader constants */
306 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
307 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
308 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
309 *cmd++ = 0x0000FFFF;
310 *cmd++ = 3; /* EXEC_COUNT */
311 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
312 drawctxt->constant_save_commands[1].hostptr = cmd;
313 drawctxt->constant_save_commands[1].gpuaddr =
314 virt2gpu(cmd, &drawctxt->gpustate);
315 /*
316 From fixup:
317
318 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
319 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
320
321 From register spec:
322 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
323 */
324 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
325 /* ALU constant shadow base */
326 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
327
328 /* Save fragment shader constants */
329 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
330 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
331 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
332 *cmd++ = 0x0000FFFF;
333 *cmd++ = 3; /* EXEC_COUNT */
334 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
335 drawctxt->constant_save_commands[2].hostptr = cmd;
336 drawctxt->constant_save_commands[2].gpuaddr =
337 virt2gpu(cmd, &drawctxt->gpustate);
338 /*
339 From fixup:
340
341 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
342 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
343
344 From register spec:
345 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
346 */
347 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
348
349 /*
350 From fixup:
351
352 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
353 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
354
355 From register spec:
356 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
357 start offset in on chip RAM,
358 128bit aligned
359
360 dst = base + offset
361 Because of the base alignment we can use
362 dst = base | offset
363 */
364 *cmd++ = 0; /* dst */
365
366 /* Save VS texture memory objects */
367 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
368 *cmd++ =
369 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
370 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
371 *cmd++ =
372 (drawctxt->gpustate.gpuaddr +
373 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
374
375 /* Save VS texture mipmap pointers */
376 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
377 *cmd++ =
378 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
379 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
380 *cmd++ =
381 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
382
383 /* Save VS texture sampler objects */
384 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
385 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
386 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
387 *cmd++ =
388 (drawctxt->gpustate.gpuaddr +
389 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
390
391 /* Save FS texture memory objects */
392 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
393 *cmd++ =
394 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
395 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
396 *cmd++ =
397 (drawctxt->gpustate.gpuaddr +
398 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
399
400 /* Save FS texture mipmap pointers */
401 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
402 *cmd++ =
403 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
404 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
405 *cmd++ =
406 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
407
408 /* Save FS texture sampler objects */
409 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
410 *cmd++ =
411 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
412 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
413 *cmd++ =
414 (drawctxt->gpustate.gpuaddr +
415 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
416
417 /* Create indirect buffer command for above command sequence */
418 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
419
420 tmp_ctx.cmd = cmd;
421}
422
423/* Copy GMEM contents to system memory shadow. */
424static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
425 struct adreno_context *drawctxt,
426 struct gmem_shadow_t *shadow)
427{
428 unsigned int *cmds = tmp_ctx.cmd;
429 unsigned int *start = cmds;
430
431 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
432 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
433
434 /* RB_MODE_CONTROL */
435 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
436 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
437 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
438 /* RB_RENDER_CONTROL */
439 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
440 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
441
442 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
443 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
444 /* RB_COPY_CONTROL */
445 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
446 RB_CLEAR_MODE_RESOLVE) |
447 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
448 tmp_ctx.gmem_base >> 14);
449 /* RB_COPY_DEST_BASE */
450 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
451 shadow->gmemshadow.gpuaddr >> 5);
452 /* RB_COPY_DEST_PITCH */
453 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
454 (shadow->pitch * 4) / 32);
455 /* RB_COPY_DEST_INFO */
456 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
457 RB_TILINGMODE_LINEAR) |
458 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
459 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
460 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
461
462 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
463 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
464 /* GRAS_SC_CONTROL */
465 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
466
467 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
468 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
469 /* VFD_CONTROL_0 */
470 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
471 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
472 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
473 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
474 /* VFD_CONTROL_1 */
475 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
476 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
477 _SET(VFD_CTRLREG1_REGID4INST, 252);
478
479 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
480 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
481 /* VFD_FETCH_INSTR_0_0 */
482 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
483 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
484 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
485 /* VFD_FETCH_INSTR_1_0 */
486 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
487 shadow->quad_vertices.gpuaddr);
488
489 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
490 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
491 /* VFD_DECODE_INSTR_0 */
492 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
493 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
494 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
495 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
496 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
497 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
498
499 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
500 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
501 /* HLSQ_CONTROL_0_REG */
502 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
503 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
504 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
505 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
506 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
507 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
508 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
509 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
510 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
511 /* HLSQ_CONTROL_1_REG */
512 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
513 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
514 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
515 /* HLSQ_CONTROL_2_REG */
516 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
517 /* HLSQ_CONTROL_3_REG */
518 *cmds++ = 0x00000000;
519
520 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
521 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
522 /* HLSQ_VS_CONTROL_REG */
523 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
524 /* HLSQ_FS_CONTROL_REG */
525 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
526 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
527 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
528 /* HLSQ_CONST_VSPRESV_RANGE_REG */
529 *cmds++ = 0x00000000;
530 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
531 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
532 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
533
534 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
535 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
536 /* SP_FS_LENGTH_REG */
537 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
538
539 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
540 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
541 /* SP_SP_CTRL_REG */
542 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
543 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
544
545 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
546 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
547 /* SP_VS_CTRL_REG0 */
548 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
549 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
550 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
551 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
552 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
553 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
554 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
555 /* SP_VS_CTRL_REG1 */
556 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
557 /* SP_VS_PARAM_REG */
558 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
559 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
560 /* SP_VS_OUT_REG_0 */
561 *cmds++ = 0x00000000;
562 /* SP_VS_OUT_REG_1 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_2 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_3 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_4 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_5 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_6 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_7 */
575 *cmds++ = 0x00000000;
576
577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
578 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
579 /* SP_VS_VPC_DST_REG_0 */
580 *cmds++ = 0x00000000;
581 /* SP_VS_VPC_DST_REG_1 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_2 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_3 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_OBJ_OFFSET_REG */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_START_REG */
590 *cmds++ = 0x00000000;
591
592 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
593 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
594 /* SP_VS_LENGTH_REG */
595 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
596 /* SP_FS_CTRL_REG0 */
597 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
598 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
599 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
600 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
601 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
602 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
603 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
604 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
605 /* SP_FS_CTRL_REG1 */
606 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
607 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
608 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
609 /* SP_FS_OBJ_OFFSET_REG */
610 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
611 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
612 /* SP_FS_OBJ_START_REG */
613 *cmds++ = 0x00000000;
614
615 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
616 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
617 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
618 *cmds++ = 0x00000000;
619 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
620 *cmds++ = 0x00000000;
621
622 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
623 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
624 /* SP_FS_OUTPUT_REG */
625 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
626
627 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
628 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
629 /* SP_FS_MRT_REG_0 */
630 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
631 /* SP_FS_MRT_REG_1 */
632 *cmds++ = 0x00000000;
633 /* SP_FS_MRT_REG_2 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_3 */
636 *cmds++ = 0x00000000;
637
638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
639 *cmds++ = CP_REG(A3XX_VPC_ATTR);
640 /* VPC_ATTR */
641 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
642 _SET(VPC_VPCATTR_LMSIZE, 1);
643 /* VPC_PACK */
644 *cmds++ = 0x00000000;
645 /* VPC_VARRYING_INTERUPT_MODE_0 */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_1 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_2 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_3 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARYING_PS_REPL_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_3 */
660 *cmds++ = 0x00000000;
661
662 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
663 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
664 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
665 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
666 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
667 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
668 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
669
670 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
671 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
672 /* end; */
673 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
674 /* nop; */
675 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678
679 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
680 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
681 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
682 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
683 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
684 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
685 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
686
687 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
688 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
689 /* end; */
690 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
691 /* nop; */
692 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
693 /* nop; */
694 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
695
696 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
697 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
698 /* RB_MSAA_CONTROL */
699 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
700 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
701
702 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
703 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
704 /* RB_DEPTH_CONTROL */
705 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
706
707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
708 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
709 /* RB_MRT_CONTROL0 */
710 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
711 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
712 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
713 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
714
715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
716 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
717 /* RB_MRT_BLEND_CONTROL0 */
718 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
719 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
720 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
721 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
723 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
724 /* RB_MRT_CONTROL1 */
725 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
726 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
727 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
728
729 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
730 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
731 /* RB_MRT_BLEND_CONTROL1 */
732 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
733 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
734 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
735 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
736 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
737 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
738 /* RB_MRT_CONTROL2 */
739 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
740 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
741 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
742
743 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
744 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
745 /* RB_MRT_BLEND_CONTROL2 */
746 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
747 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
748 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
749 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
750 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
751 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
752 /* RB_MRT_CONTROL3 */
753 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
754 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
755 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
756
757 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
758 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
759 /* RB_MRT_BLEND_CONTROL3 */
760 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
761 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
762 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
763 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
764 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
765 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
766
767 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
768 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
769 /* VFD_INDEX_MIN */
770 *cmds++ = 0x00000000;
771 /* VFD_INDEX_MAX */
772 *cmds++ = 0xFFFFFFFF;
773 /* VFD_INSTANCEID_OFFSET */
774 *cmds++ = 0x00000000;
775 /* VFD_INDEX_OFFSET */
776 *cmds++ = 0x00000000;
777
778 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
779 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
780 /* VFD_VS_THREADING_THRESHOLD */
781 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
782 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
783
784 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
785 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
786 /* TPL1_TP_VS_TEX_OFFSET */
787 *cmds++ = 0;
788
789 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
790 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
791 /* TPL1_TP_FS_TEX_OFFSET */
792 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
793 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
794 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
795
796 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
797 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
798 /* PC_PRIM_VTX_CNTL */
799 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
800 PC_DRAW_TRIANGLES) |
801 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
802 PC_DRAW_TRIANGLES) |
803 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
804
805 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
806 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
807 /* GRAS_SC_WINDOW_SCISSOR_TL */
808 *cmds++ = 0x00000000;
809 /* GRAS_SC_WINDOW_SCISSOR_BR */
810 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
811 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
812
813 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
814 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
815 /* GRAS_SC_SCREEN_SCISSOR_TL */
816 *cmds++ = 0x00000000;
817 /* GRAS_SC_SCREEN_SCISSOR_BR */
818 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
819 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
820
821 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
822 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
823 /* GRAS_CL_VPORT_XOFFSET */
824 *cmds++ = 0x00000000;
825 /* GRAS_CL_VPORT_XSCALE */
826 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
827 /* GRAS_CL_VPORT_YOFFSET */
828 *cmds++ = 0x00000000;
829 /* GRAS_CL_VPORT_YSCALE */
830 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
831
832 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
833 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
834 /* GRAS_CL_VPORT_ZOFFSET */
835 *cmds++ = 0x00000000;
836 /* GRAS_CL_VPORT_ZSCALE */
837 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
838
839 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
840 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
841 /* GRAS_CL_CLIP_CNTL */
842 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
843 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
844 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
845 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
847
848 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
849 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
850 /* GRAS_CL_GB_CLIP_ADJ */
851 *cmds++ = 0x00000000;
852
853 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
854 *cmds++ = 0x00000000;
855
856 /*
857 * Resolve using two draw calls with a dummy register
858 * write in between. This is a HLM workaround
859 * that should be removed later.
860 */
861 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
862 *cmds++ = 0x00000000; /* Viz query info */
863 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
864 PC_DI_SRC_SEL_IMMEDIATE,
865 PC_DI_INDEX_SIZE_32_BIT,
866 PC_DI_IGNORE_VISIBILITY);
867 *cmds++ = 0x00000003; /* Num indices */
868 *cmds++ = 0x00000000; /* Index 0 */
869 *cmds++ = 0x00000001; /* Index 1 */
870 *cmds++ = 0x00000002; /* Index 2 */
871
872 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
873 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
874 *cmds++ = 0x00000000;
875
876 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
877 *cmds++ = 0x00000000; /* Viz query info */
878 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
879 PC_DI_SRC_SEL_IMMEDIATE,
880 PC_DI_INDEX_SIZE_32_BIT,
881 PC_DI_IGNORE_VISIBILITY);
882 *cmds++ = 0x00000003; /* Num indices */
883 *cmds++ = 0x00000002; /* Index 0 */
884 *cmds++ = 0x00000001; /* Index 1 */
885 *cmds++ = 0x00000003; /* Index 2 */
886
887 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
888 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
889 *cmds++ = 0x00000000;
890
891 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
892 *cmds++ = 0x00000000;
893
894 /* Create indirect buffer command for above command sequence */
895 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
896
897 return cmds;
898}
899
900static void build_shader_save_cmds(struct adreno_device *adreno_dev,
901 struct adreno_context *drawctxt)
902{
903 unsigned int *cmd = tmp_ctx.cmd;
904 unsigned int *start;
905
906 /* Reserve space for boolean values used for COND_EXEC packet */
907 drawctxt->cond_execs[0].hostptr = cmd;
908 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
909 *cmd++ = 0;
910 drawctxt->cond_execs[1].hostptr = cmd;
911 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
912 *cmd++ = 0;
913
914 drawctxt->shader_save_commands[0].hostptr = cmd;
915 drawctxt->shader_save_commands[0].gpuaddr =
916 virt2gpu(cmd, &drawctxt->gpustate);
917 *cmd++ = 0;
918 drawctxt->shader_save_commands[1].hostptr = cmd;
919 drawctxt->shader_save_commands[1].gpuaddr =
920 virt2gpu(cmd, &drawctxt->gpustate);
921 *cmd++ = 0;
922
923 start = cmd;
924
925 /* Save vertex shader */
926
927 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
928 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
929 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
930 *cmd++ = 0x0000FFFF;
931 *cmd++ = 3; /* EXEC_COUNT */
932
933 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
934 drawctxt->shader_save_commands[2].hostptr = cmd;
935 drawctxt->shader_save_commands[2].gpuaddr =
936 virt2gpu(cmd, &drawctxt->gpustate);
937 /*
938 From fixup:
939
940 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
941
942 From regspec:
943 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
944 If bit31 is 1, it means overflow
945 or any long shader.
946
947 src = (HLSQ_SHADOW_BASE + 0x1000)/4
948 */
949 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
950 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
951
952 /* Save fragment shader */
953 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
954 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
955 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
956 *cmd++ = 0x0000FFFF;
957 *cmd++ = 3; /* EXEC_COUNT */
958
959 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
960 drawctxt->shader_save_commands[3].hostptr = cmd;
961 drawctxt->shader_save_commands[3].gpuaddr =
962 virt2gpu(cmd, &drawctxt->gpustate);
963 /*
964 From fixup:
965
966 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
967
968 From regspec:
969 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
970 If bit31 is 1, it means overflow
971 or any long shader.
972
973 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
974 From regspec:
975
976 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
977 First instruction of the whole shader will be stored from
978 the offset in instruction cache, unit = 256bits, a cache line.
979 It can start from 0 if no VS available.
980
981 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
982 */
983 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
984 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
985 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
986
987 /* Create indirect buffer command for above command sequence */
988 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
989
990 tmp_ctx.cmd = cmd;
991}
992
993/*
994 * Make an IB to modify context save IBs with the correct shader instruction
995 * and constant sizes and offsets.
996 */
997
998static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
999 struct adreno_context *drawctxt)
1000{
1001 unsigned int *cmd = tmp_ctx.cmd;
1002 unsigned int *start = cmd;
1003
1004 /* Flush HLSQ lazy updates */
1005 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1006 *cmd++ = 0x7; /* HLSQ_FLUSH */
1007 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1008 *cmd++ = 0;
1009
1010 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1011 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1012 *cmd++ = (unsigned int)
1013 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1014 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1015 0; /* No end addr for full invalidate */
1016
1017 /* Make sure registers are flushed */
1018 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1019 *cmd++ = 0;
1020
1021#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1022
1023 /* Save shader sizes */
1024 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1025 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1026 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1027
1028 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1029 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1030 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1031
1032 /* Save shader offsets */
1033 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1034 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1035 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1036
1037 /* Save constant sizes */
1038 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1039 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1040 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1043 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1044
1045 /* Save FS constant offset */
1046 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1047 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1048 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1049
1050
1051 /* Save VS instruction store mode */
1052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1053 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1054 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1055
1056 /* Save FS instruction store mode */
1057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1058 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1059 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1060#else
1061
1062 /* Shader save */
1063 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1064 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1065 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1066 drawctxt->shader_save_commands[2].gpuaddr);
1067
1068 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1069 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1070 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1071 *cmd++ = 0x00000000; /* AND value */
1072 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1073 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1074 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1075 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1076 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1077 A3XX_CP_SCRATCH_REG2;
1078 *cmd++ = 0x7f000000; /* AND value */
1079 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1080
1081 /*
1082 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1083 * SP_FS_OBJ_OFFSET_REG
1084 */
1085
1086 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1087 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1088 *cmd++ = 0x00000000; /* AND value */
1089 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1090 /*
1091 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1092 * 0x00000000
1093 */
1094 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1095 *cmd++ = A3XX_CP_SCRATCH_REG3;
1096 *cmd++ = 0xfe000000; /* AND value */
1097 *cmd++ = 0x00000000; /* OR value */
1098 /*
1099 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1100 */
1101 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1102 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1103 *cmd++ = 0xffffffff; /* AND value */
1104 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1105
1106 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1107 *cmd++ = A3XX_CP_SCRATCH_REG2;
1108 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1109
1110 /* Constant save */
1111 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001112 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1113 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001114 drawctxt->constant_save_commands[1].gpuaddr);
1115
1116 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001117 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1118 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001119 drawctxt->constant_save_commands[2].gpuaddr);
1120
1121 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1122 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1123 drawctxt->constant_save_commands[2].gpuaddr
1124 + sizeof(unsigned int));
1125
1126 /* Modify constant save conditionals */
1127 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1128 0, 0, drawctxt->cond_execs[2].gpuaddr);
1129
1130 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1131 0, 0, drawctxt->cond_execs[3].gpuaddr);
1132
1133 /* Save VS instruction store mode */
1134
1135 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1136 31, 0, drawctxt->cond_execs[0].gpuaddr);
1137
1138 /* Save FS instruction store mode */
1139 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1140 31, 0, drawctxt->cond_execs[1].gpuaddr);
1141
1142#endif
1143
1144 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1145
1146 tmp_ctx.cmd = cmd;
1147}
1148
1149/****************************************************************************/
1150/* Functions to build context restore IBs */
1151/****************************************************************************/
1152
1153static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1154 struct adreno_context *drawctxt,
1155 struct gmem_shadow_t *shadow)
1156{
1157 unsigned int *cmds = tmp_ctx.cmd;
1158 unsigned int *start = cmds;
1159
1160 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1161 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1162 /* HLSQ_CONTROL_0_REG */
1163 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1164 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1165 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1166 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1167 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1168 /* HLSQ_CONTROL_1_REG */
1169 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1170 /* HLSQ_CONTROL_2_REG */
1171 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1172 /* HLSQ_CONTROL3_REG */
1173 *cmds++ = 0x00000000;
1174
1175 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1176 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1177 /* RB_MRT_BUF_INFO0 */
1178 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1179 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1180 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1181 (shadow->gmem_pitch * 4 * 8) / 256);
1182 /* RB_MRT_BUF_BASE0 */
1183 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1184
1185 /* Texture samplers */
1186 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1187 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1188 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1189 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1190 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1191 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1192 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1193 *cmds++ = 0x00000240;
1194 *cmds++ = 0x00000000;
1195
1196 /* Texture memobjs */
1197 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1198 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1199 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1200 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1201 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1202 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1203 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1204 *cmds++ = 0x4cc06880;
1205 *cmds++ = shadow->height | (shadow->width << 14);
1206 *cmds++ = (shadow->pitch*4*8) << 9;
1207 *cmds++ = 0x00000000;
1208
1209 /* Mipmap bases */
1210 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1211 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1212 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1213 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1214 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1215 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1216 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1217 *cmds++ = shadow->gmemshadow.gpuaddr;
1218 *cmds++ = 0x00000000;
1219 *cmds++ = 0x00000000;
1220 *cmds++ = 0x00000000;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231
1232 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1233 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1234 /* HLSQ_VS_CONTROL_REG */
1235 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1236 /* HLSQ_FS_CONTROL_REG */
1237 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1238 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1239 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1240 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1241 *cmds++ = 0x00000000;
1242 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1243 *cmds++ = 0x00000000;
1244
1245 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1246 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1247 /* SP_FS_LENGTH_REG */
1248 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1249
1250 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1251 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1252 /* SP_VS_CTRL_REG0 */
1253 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1254 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1255 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1256 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1257 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1258 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1259 /* SP_VS_CTRL_REG1 */
1260 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1261 /* SP_VS_PARAM_REG */
1262 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1263 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1264 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1265 /* SP_VS_OUT_REG0 */
1266 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1267 /* SP_VS_OUT_REG1 */
1268 *cmds++ = 0x00000000;
1269 /* SP_VS_OUT_REG2 */
1270 *cmds++ = 0x00000000;
1271 /* SP_VS_OUT_REG3 */
1272 *cmds++ = 0x00000000;
1273 /* SP_VS_OUT_REG4 */
1274 *cmds++ = 0x00000000;
1275 /* SP_VS_OUT_REG5 */
1276 *cmds++ = 0x00000000;
1277 /* SP_VS_OUT_REG6 */
1278 *cmds++ = 0x00000000;
1279 /* SP_VS_OUT_REG7 */
1280 *cmds++ = 0x00000000;
1281
1282 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1283 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1284 /* SP_VS_VPC_DST_REG0 */
1285 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1286 /* SP_VS_VPC_DST_REG1 */
1287 *cmds++ = 0x00000000;
1288 /* SP_VS_VPC_DST_REG2 */
1289 *cmds++ = 0x00000000;
1290 /* SP_VS_VPC_DST_REG3 */
1291 *cmds++ = 0x00000000;
1292 /* SP_VS_OBJ_OFFSET_REG */
1293 *cmds++ = 0x00000000;
1294 /* SP_VS_OBJ_START_REG */
1295 *cmds++ = 0x00000000;
1296
1297 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1298 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1299 /* SP_VS_LENGTH_REG */
1300 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1301 /* SP_FS_CTRL_REG0 */
1302 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1303 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1304 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1305 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1306 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1307 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1308 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1309 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1310 /* SP_FS_CTRL_REG1 */
1311 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1312 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1313 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1314 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001315 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1316 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001317 /* SP_FS_OBJ_START_REG */
1318 *cmds++ = 0x00000000;
1319
1320 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1321 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1322 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1323 *cmds++ = 0x00000000;
1324 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1325 *cmds++ = 0x00000000;
1326
1327 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1328 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1329 /* SP_FS_OUT_REG */
1330 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1331
Jordan Crousea7ec4212012-02-04 10:23:52 -07001332 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001333 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1334 /* SP_FS_MRT_REG0 */
1335 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1336 /* SP_FS_MRT_REG1 */
1337 *cmds++ = 0;
1338 /* SP_FS_MRT_REG2 */
1339 *cmds++ = 0;
1340 /* SP_FS_MRT_REG3 */
1341 *cmds++ = 0;
1342
1343 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1344 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1345 /* VPC_ATTR */
1346 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1347 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1348 _SET(VPC_VPCATTR_LMSIZE, 1);
1349 /* VPC_PACK */
1350 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1351 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1352 /* VPC_VARYING_INTERP_MODE_0 */
1353 *cmds++ = 0x00000000;
1354 /* VPC_VARYING_INTERP_MODE1 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_INTERP_MODE2 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_IINTERP_MODE3 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARRYING_PS_REPL_MODE_0 */
1361 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1362 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1363 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1377 /* VPC_VARRYING_PS_REPL_MODE_1 */
1378 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1394 /* VPC_VARRYING_PS_REPL_MODE_2 */
1395 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1411 /* VPC_VARRYING_PS_REPL_MODE_3 */
1412 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1428
Jordan Crousea7ec4212012-02-04 10:23:52 -07001429 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001430 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1431 /* SP_SP_CTRL_REG */
1432 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1433
1434 /* Load vertex shader */
1435 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1436 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1437 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1438 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1439 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1440 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1441 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1442 /* (sy)end; */
1443 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1444 /* nop; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450
1451 /* Load fragment shader */
1452 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1453 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1454 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1455 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1456 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1457 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1458 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1459 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1460 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1461 /* (rpt5)nop; */
1462 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1463 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1464 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1465 /* (sy)mov.f32f32 r1.x, r0.x; */
1466 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1467 /* mov.f32f32 r1.y, r0.y; */
1468 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1469 /* mov.f32f32 r1.z, r0.z; */
1470 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1471 /* mov.f32f32 r1.w, r0.w; */
1472 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1473 /* end; */
1474 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1475
1476 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1477 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1478 /* VFD_CONTROL_0 */
1479 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1480 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1481 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1482 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1483 /* VFD_CONTROL_1 */
1484 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1485 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1486 _SET(VFD_CTRLREG1_REGID4INST, 252);
1487
1488 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1489 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1490 /* VFD_FETCH_INSTR_0_0 */
1491 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1492 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1493 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1494 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1495 /* VFD_FETCH_INSTR_1_0 */
1496 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1497 shadow->quad_vertices_restore.gpuaddr);
1498 /* VFD_FETCH_INSTR_0_1 */
1499 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1500 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1501 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1502 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1503 /* VFD_FETCH_INSTR_1_1 */
1504 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1505 shadow->quad_vertices_restore.gpuaddr + 16);
1506
1507 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1508 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1509 /* VFD_DECODE_INSTR_0 */
1510 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1511 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1512 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1513 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1514 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1516 /* VFD_DECODE_INSTR_1 */
1517 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1518 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1519 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1520 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1521 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1522 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1523
1524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1525 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1526 /* RB_DEPTH_CONTROL */
1527 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1528
1529 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1530 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1531 /* RB_STENCIL_CONTROL */
1532 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1533 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1534 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1535 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1537 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1540
1541 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1542 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1543 /* RB_MODE_CONTROL */
1544 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1545 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1546
1547 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1548 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1549 /* RB_RENDER_CONTROL */
1550 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1551 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1552
1553 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1554 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1555 /* RB_MSAA_CONTROL */
1556 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1557 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1558
1559 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1560 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1561 /* RB_MRT_CONTROL0 */
1562 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1563 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1564 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1565 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1566
1567 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1568 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1569 /* RB_MRT_BLENDCONTROL0 */
1570 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1571 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1572 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1573 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1574 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1576 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1577 /* RB_MRT_CONTROL1 */
1578 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1579 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1580 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1581
1582 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1583 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1584 /* RB_MRT_BLENDCONTROL1 */
1585 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1586 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1587 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1588 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1589 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1590 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1591 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1592 /* RB_MRT_CONTROL2 */
1593 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1594 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1595 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1596
1597 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1598 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1599 /* RB_MRT_BLENDCONTROL2 */
1600 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1601 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1602 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1603 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1604 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1605 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1606 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1607 /* RB_MRT_CONTROL3 */
1608 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1609 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1610 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1611
1612 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1613 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1614 /* RB_MRT_BLENDCONTROL3 */
1615 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1616 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1617 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1618 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1619 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1620 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1621 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1622
1623 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1624 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1625 /* VFD_INDEX_MIN */
1626 *cmds++ = 0x00000000;
1627 /* VFD_INDEX_MAX */
1628 *cmds++ = 0xFFFFFFFF;
1629 /* VFD_INDEX_OFFSET */
1630 *cmds++ = 0x00000000;
1631 /* TPL1_TP_VS_TEX_OFFSET */
1632 *cmds++ = 0x00000000;
1633
1634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1635 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1636 /* VFD_VS_THREADING_THRESHOLD */
1637 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1638 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1639
1640 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1641 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1642 /* TPL1_TP_VS_TEX_OFFSET */
1643 *cmds++ = 0x00000000;
1644
1645 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1646 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1647 /* TPL1_TP_FS_TEX_OFFSET */
1648 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1649 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1650 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1651
1652 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1653 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1654 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001655 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1656 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1657 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001658
1659 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1660 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1661 /* GRAS_SU_MODE_CONTROL */
1662 *cmds++ = 0x00000000;
1663
1664 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1665 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1666 /* GRAS_SC_WINDOW_SCISSOR_TL */
1667 *cmds++ = 0x00000000;
1668 /* GRAS_SC_WINDOW_SCISSOR_BR */
1669 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1670 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1671
1672 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1673 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1674 /* GRAS_SC_SCREEN_SCISSOR_TL */
1675 *cmds++ = 0x00000000;
1676 /* GRAS_SC_SCREEN_SCISSOR_BR */
1677 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1678 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1679
1680 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1681 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1682 /* GRAS_CL_VPORT_XOFFSET */
1683 *cmds++ = 0x00000000;
1684 /* GRAS_CL_VPORT_XSCALE */
1685 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1686 /* GRAS_CL_VPORT_YOFFSET */
1687 *cmds++ = 0x00000000;
1688 /* GRAS_CL_VPORT_YSCALE */
1689 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1690
1691 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1692 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1693 /* GRAS_CL_VPORT_ZOFFSET */
1694 *cmds++ = 0x00000000;
1695 /* GRAS_CL_VPORT_ZSCALE */
1696 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1697
1698 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1699 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1700 /* GRAS_CL_CLIP_CNTL */
1701 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1702
1703 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1704 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1705 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1706 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1707
1708 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1709 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1710 /* PC_PRIM_VTX_CONTROL */
1711 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1712 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1713 PC_DRAW_TRIANGLES) |
1714 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1715 PC_DRAW_TRIANGLES) |
1716 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1717
1718 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1719 *cmds++ = 0x00000000; /* Viz query info */
1720 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1721 PC_DI_SRC_SEL_AUTO_INDEX,
1722 PC_DI_INDEX_SIZE_16_BIT,
1723 PC_DI_IGNORE_VISIBILITY);
1724 *cmds++ = 0x00000002; /* Num indices */
1725
1726 /* Create indirect buffer command for above command sequence */
1727 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1728
1729 return cmds;
1730}
1731
1732static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1733 struct adreno_context *drawctxt)
1734{
1735 unsigned int *start = tmp_ctx.cmd;
1736 unsigned int *cmd = start;
1737 unsigned int *lcc_start;
1738
1739 int i;
1740
1741 /* Flush HLSQ lazy updates */
1742 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1743 *cmd++ = 0x7; /* HLSQ_FLUSH */
1744 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1745 *cmd++ = 0;
1746
1747 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1748 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1749 *cmd++ = (unsigned int)
1750 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1751 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1752 0; /* No end addr for full invalidate */
1753
1754 lcc_start = cmd;
1755
1756 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1757 cmd++;
1758
1759#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1760 /* Force mismatch */
1761 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1762#else
1763 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1764#endif
1765
1766 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1767 cmd = reg_range(cmd, context_register_ranges[i * 2],
1768 context_register_ranges[i * 2 + 1]);
1769 }
1770
1771 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1772 (cmd - lcc_start) - 1);
1773
1774#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1775 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1776#else
1777 lcc_start[2] |= (1 << 24) | (4 << 16);
1778#endif
1779
1780 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1781 *cmd++ = cp_type0_packet(global_registers[i], 1);
1782 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1783 *cmd++ = 0x00000000;
1784 }
1785
1786 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1787 tmp_ctx.cmd = cmd;
1788}
1789
1790static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1791 struct adreno_context *drawctxt)
1792{
1793 unsigned int *cmd = tmp_ctx.cmd;
1794 unsigned int *start = cmd;
1795 unsigned int mode = 4; /* Indirect mode */
1796 unsigned int stateblock;
1797 unsigned int numunits;
1798 unsigned int statetype;
1799
1800 drawctxt->cond_execs[2].hostptr = cmd;
1801 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1802 *cmd++ = 0;
1803 drawctxt->cond_execs[3].hostptr = cmd;
1804 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1805 *cmd++ = 0;
1806
1807#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1808 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1809 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1810 *cmd++ = 4 << 16;
1811 *cmd++ = 0x0;
1812#endif
1813 /* HLSQ full update */
1814 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1815 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1816 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1817
1818#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1819 /* Re-enable shadowing */
1820 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1821 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1822 *cmd++ = (4 << 16) | (1 << 24);
1823 *cmd++ = 0x0;
1824#endif
1825
1826 /* Load vertex shader constants */
1827 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1828 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1829 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1830 *cmd++ = 0x0000ffff;
1831 *cmd++ = 3; /* EXEC_COUNT */
1832 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1833 drawctxt->constant_load_commands[0].hostptr = cmd;
1834 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1835 &drawctxt->gpustate);
1836
1837 /*
1838 From fixup:
1839
1840 mode = 4 (indirect)
1841 stateblock = 4 (Vertex constants)
1842 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1843
1844 From register spec:
1845 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1846
1847 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1848 */
1849
1850 *cmd++ = 0; /* ord1 */
1851 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1852
1853 /* Load fragment shader constants */
1854 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1855 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1856 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1857 *cmd++ = 0x0000ffff;
1858 *cmd++ = 3; /* EXEC_COUNT */
1859 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1860 drawctxt->constant_load_commands[1].hostptr = cmd;
1861 drawctxt->constant_load_commands[1].gpuaddr =
1862 virt2gpu(cmd, &drawctxt->gpustate);
1863 /*
1864 From fixup:
1865
1866 mode = 4 (indirect)
1867 stateblock = 6 (Fragment constants)
1868 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1869
1870 From register spec:
1871 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1872
1873 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1874 */
1875
1876 *cmd++ = 0; /* ord1 */
1877 drawctxt->constant_load_commands[2].hostptr = cmd;
1878 drawctxt->constant_load_commands[2].gpuaddr =
1879 virt2gpu(cmd, &drawctxt->gpustate);
1880 /*
1881 From fixup:
1882 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1883 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1884
1885 From register spec:
1886 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1887 start offset in on chip RAM,
1888 128bit aligned
1889
1890 ord2 = base + offset | 1
1891 Because of the base alignment we can use
1892 ord2 = base | offset | 1
1893 */
1894 *cmd++ = 0; /* ord2 */
1895
1896 /* Restore VS texture memory objects */
1897 stateblock = 0;
1898 statetype = 1;
1899 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1900
1901 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1902 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1903 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1904 & 0xfffffffc) | statetype;
1905
1906 /* Restore VS texture mipmap addresses */
1907 stateblock = 1;
1908 statetype = 1;
1909 numunits = TEX_SIZE_MIPMAP / 4;
1910 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1911 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1912 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1913 & 0xfffffffc) | statetype;
1914
1915 /* Restore VS texture sampler objects */
1916 stateblock = 0;
1917 statetype = 0;
1918 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1919 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1920 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1921 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1922 & 0xfffffffc) | statetype;
1923
1924 /* Restore FS texture memory objects */
1925 stateblock = 2;
1926 statetype = 1;
1927 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1928 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1929 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1930 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1931 & 0xfffffffc) | statetype;
1932
1933 /* Restore FS texture mipmap addresses */
1934 stateblock = 3;
1935 statetype = 1;
1936 numunits = TEX_SIZE_MIPMAP / 4;
1937 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1938 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1939 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1940 & 0xfffffffc) | statetype;
1941
1942 /* Restore FS texture sampler objects */
1943 stateblock = 2;
1944 statetype = 0;
1945 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1946 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1947 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1948 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1949 & 0xfffffffc) | statetype;
1950
1951 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1952 tmp_ctx.cmd = cmd;
1953}
1954
1955static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1956 struct adreno_context *drawctxt)
1957{
1958 unsigned int *cmd = tmp_ctx.cmd;
1959 unsigned int *start = cmd;
1960
1961 /* Vertex shader */
1962 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1963 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1964 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1965 *cmd++ = 1;
1966 *cmd++ = 3; /* EXEC_COUNT */
1967
1968 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1969 drawctxt->shader_load_commands[0].hostptr = cmd;
1970 drawctxt->shader_load_commands[0].gpuaddr =
1971 virt2gpu(cmd, &drawctxt->gpustate);
1972 /*
1973 From fixup:
1974
1975 mode = 4 (indirect)
1976 stateblock = 4 (Vertex shader)
1977 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1978
1979 From regspec:
1980 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1981 If bit31 is 1, it means overflow
1982 or any long shader.
1983
1984 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1985 */
1986 *cmd++ = 0; /*ord1 */
1987 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1988
1989 /* Fragment shader */
1990 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1991 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1992 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1993 *cmd++ = 1;
1994 *cmd++ = 3; /* EXEC_COUNT */
1995
1996 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1997 drawctxt->shader_load_commands[1].hostptr = cmd;
1998 drawctxt->shader_load_commands[1].gpuaddr =
1999 virt2gpu(cmd, &drawctxt->gpustate);
2000 /*
2001 From fixup:
2002
2003 mode = 4 (indirect)
2004 stateblock = 6 (Fragment shader)
2005 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2006
2007 From regspec:
2008 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2009 If bit31 is 1, it means overflow
2010 or any long shader.
2011
2012 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2013 */
2014 *cmd++ = 0; /*ord1 */
2015 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2016 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2017
2018 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2019 tmp_ctx.cmd = cmd;
2020}
2021
2022static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2023 struct adreno_context *drawctxt)
2024{
2025 unsigned int *cmd = tmp_ctx.cmd;
2026 unsigned int *start = cmd;
2027
2028 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2029 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2030 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2031 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2032 = virt2gpu(cmd, &drawctxt->gpustate);
2033 *cmd++ = 0;
2034
2035 /* Create indirect buffer command for above command sequence */
2036 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2037
2038 tmp_ctx.cmd = cmd;
2039}
2040
2041/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2042static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2043 struct adreno_context *drawctxt)
2044{
2045 unsigned int *cmd = tmp_ctx.cmd;
2046 unsigned int *start = cmd;
2047
2048#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2049 /* Save shader sizes */
2050 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2051 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2052 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2053
2054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2055 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2056 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2057
2058 /* Save constant sizes */
2059 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2060 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2061 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2062
2063 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2064 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2065 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2066
2067 /* Save constant offsets */
2068 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2069 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2070 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2071#else
2072 /* Save shader sizes */
2073 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2074 30, (4 << 19) | (4 << 16),
2075 drawctxt->shader_load_commands[0].gpuaddr);
2076
2077 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2078 30, (6 << 19) | (4 << 16),
2079 drawctxt->shader_load_commands[1].gpuaddr);
2080
2081 /* Save constant sizes */
2082 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2083 23, (4 << 19) | (4 << 16),
2084 drawctxt->constant_load_commands[0].gpuaddr);
2085
2086 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2087 23, (6 << 19) | (4 << 16),
2088 drawctxt->constant_load_commands[1].gpuaddr);
2089
2090 /* Modify constant restore conditionals */
2091 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2092 0, 0, drawctxt->cond_execs[2].gpuaddr);
2093
2094 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2095 0, 0, drawctxt->cond_execs[3].gpuaddr);
2096
2097 /* Save fragment constant shadow offset */
2098 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2099 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2100 drawctxt->constant_load_commands[2].gpuaddr);
2101#endif
2102
2103 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2104 discard all the shader data */
2105
2106 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2107 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2108
2109 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2110
2111 tmp_ctx.cmd = cmd;
2112}
2113
2114static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2115 struct adreno_context *drawctxt)
2116{
2117 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2118
2119 build_regrestore_cmds(adreno_dev, drawctxt);
2120 build_constantrestore_cmds(adreno_dev, drawctxt);
2121 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2122 build_regconstantsave_cmds(adreno_dev, drawctxt);
2123 build_shader_save_cmds(adreno_dev, drawctxt);
2124 build_shader_restore_cmds(adreno_dev, drawctxt);
2125 build_restore_fixup_cmds(adreno_dev, drawctxt);
2126 build_save_fixup_cmds(adreno_dev, drawctxt);
2127
2128 return 0;
2129}
2130
2131/* create buffers for saving/restoring registers, constants, & GMEM */
2132static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2133 struct adreno_context *drawctxt)
2134{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002135 int result;
2136
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002137 calc_gmemsize(&drawctxt->context_gmem_shadow,
2138 adreno_dev->gmemspace.sizebytes);
2139 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2140
Jordan Crousea7ec4212012-02-04 10:23:52 -07002141 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2142 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002143
Jordan Crousea7ec4212012-02-04 10:23:52 -07002144 if (result)
2145 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002146
2147 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2148 &tmp_ctx.cmd);
2149
2150 /* Dow we need to idle? */
2151 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2152
2153 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2154 &drawctxt->context_gmem_shadow);
2155 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2156 &drawctxt->context_gmem_shadow);
2157
2158 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2159 KGSL_CACHE_OP_FLUSH);
2160
Jordan Crousea7ec4212012-02-04 10:23:52 -07002161 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2162
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002163 return 0;
2164}
2165
2166static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2167 struct adreno_context *drawctxt)
2168{
2169 int ret;
2170
2171 /*
2172 * Allocate memory for the GPU state and the context commands.
2173 * Despite the name, this is much more then just storage for
2174 * the gpustate. This contains command space for gmem save
2175 * and texture and vertex buffer storage too
2176 */
2177
2178 ret = kgsl_allocate(&drawctxt->gpustate,
2179 drawctxt->pagetable, CONTEXT_SIZE);
2180
2181 if (ret)
2182 return ret;
2183
2184 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2185 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2186
2187 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2188 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2189 if (ret)
2190 goto done;
2191
2192 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2193 }
2194
2195 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2196 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2197
2198done:
2199 if (ret)
2200 kgsl_sharedmem_free(&drawctxt->gpustate);
2201
2202 return ret;
2203}
2204
2205static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2206 struct adreno_context *context)
2207{
2208 struct kgsl_device *device = &adreno_dev->dev;
2209
2210 if (context == NULL)
2211 return;
2212
2213 if (context->flags & CTXT_FLAGS_GPU_HANG)
2214 KGSL_CTXT_WARN(device,
2215 "Current active context has caused gpu hang\n");
2216
2217 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2218 /* Fixup self modifying IBs for save operations */
2219 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2220 context->save_fixup, 3);
2221
2222 /* save registers and constants. */
2223 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2224 context->regconstant_save, 3);
2225
2226 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2227 /* Save shader instructions */
2228 adreno_ringbuffer_issuecmds(device,
2229 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2230
2231 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2232 }
2233 }
2234
2235 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2236 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2237 /*
2238 * Save GMEM (note: changes shader. shader must
2239 * already be saved.)
2240 */
2241
2242 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2243 context->context_gmem_shadow.
2244 gmem_save, 3);
2245 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2246 }
2247}
2248
2249static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2250 struct adreno_context *context)
2251{
2252 struct kgsl_device *device = &adreno_dev->dev;
2253 unsigned int cmds[5];
2254
2255 if (context == NULL) {
2256 /* No context - set the default pagetable and thats it */
2257 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2258 return;
2259 }
2260
2261 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2262
2263 cmds[0] = cp_nop_packet(1);
2264 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2265 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2266 cmds[3] = device->memstore.gpuaddr +
2267 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2268 cmds[4] = (unsigned int)context;
2269 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2270 kgsl_mmu_setstate(device, context->pagetable);
2271
2272 /*
2273 * Restore GMEM. (note: changes shader.
2274 * Shader must not already be restored.)
2275 */
2276
2277 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2278 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2279 context->context_gmem_shadow.
2280 gmem_restore, 3);
2281 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2282 }
2283
2284 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2285 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2286 context->reg_restore, 3);
2287
2288 /* Fixup self modifying IBs for restore operations */
2289 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2290 context->restore_fixup, 3);
2291
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->constant_restore, 3);
2294
2295 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2296 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2297 context->shader_restore, 3);
2298
2299 /* Restore HLSQ_CONTROL_0 register */
2300 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2301 context->hlsqcontrol_restore, 3);
2302 }
2303}
2304
2305static void a3xx_rb_init(struct adreno_device *adreno_dev,
2306 struct adreno_ringbuffer *rb)
2307{
2308 unsigned int *cmds, cmds_gpu;
2309 cmds = adreno_ringbuffer_allocspace(rb, 18);
2310 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2311
2312 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2313 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2314 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2315 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2327 /* Protected mode control - turned off for A3XX */
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2331
2332 adreno_ringbuffer_submit(rb);
2333}
2334
2335static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2336{
2337 struct kgsl_device *device = &adreno_dev->dev;
2338 const char *err = "";
2339
2340 switch (bit) {
2341 case A3XX_INT_RBBM_AHB_ERROR: {
2342 unsigned int reg;
2343
2344 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2345
2346 /*
2347 * Return the word address of the erroring register so that it
2348 * matches the register specification
2349 */
2350
2351 KGSL_DRV_CRIT(device,
2352 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2353 reg & (1 << 28) ? "WRITE" : "READ",
2354 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2355 (reg >> 24) & 0x3);
2356
2357 /* Clear the error */
2358 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2359 return;
2360 }
2361 case A3XX_INT_RBBM_REG_TIMEOUT:
2362 err = "RBBM: AHB register timeout";
2363 break;
2364 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2365 err = "RBBM: ME master split timeout";
2366 break;
2367 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2368 err = "RBBM: PFP master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2371 err = "RBBM: ATB bus oveflow";
2372 break;
2373 case A3XX_INT_VFD_ERROR:
2374 err = "VFD: Out of bounds access";
2375 break;
2376 case A3XX_INT_CP_T0_PACKET_IN_IB:
2377 err = "ringbuffer TO packet in IB interrupt";
2378 break;
2379 case A3XX_INT_CP_OPCODE_ERROR:
2380 err = "ringbuffer opcode error interrupt";
2381 break;
2382 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2383 err = "ringbuffer reserved bit error interrupt";
2384 break;
2385 case A3XX_INT_CP_HW_FAULT:
2386 err = "ringbuffer hardware fault";
2387 break;
2388 case A3XX_INT_CP_REG_PROTECT_FAULT:
2389 err = "ringbuffer protected mode error interrupt";
2390 break;
2391 case A3XX_INT_CP_AHB_ERROR_HALT:
2392 err = "ringbuffer AHB error interrupt";
2393 break;
2394 case A3XX_INT_MISC_HANG_DETECT:
2395 err = "MISC: GPU hang detected";
2396 break;
2397 case A3XX_INT_UCHE_OOB_ACCESS:
2398 err = "UCHE: Out of bounds access";
2399 break;
2400 }
2401
2402 KGSL_DRV_CRIT(device, "%s\n", err);
2403 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2404}
2405
2406static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2407{
2408 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2409
2410 if (irq == A3XX_INT_CP_RB_INT) {
2411 kgsl_sharedmem_writel(&rb->device->memstore,
2412 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2413 wmb();
2414 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2415 }
2416
2417 wake_up_interruptible_all(&rb->device->wait_queue);
2418
2419 /* Schedule work to free mem and issue ibs */
2420 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2421
2422 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2423 rb->device->id, NULL);
2424}
2425
2426#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2427
2428#define A3XX_INT_MASK \
2429 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2430 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002431 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002432 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2433 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2434 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2435 (1 << A3XX_INT_CP_HW_FAULT) | \
2436 (1 << A3XX_INT_CP_IB1_INT) | \
2437 (1 << A3XX_INT_CP_IB2_INT) | \
2438 (1 << A3XX_INT_CP_RB_INT) | \
2439 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2440 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002441 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2442
2443static struct {
2444 void (*func)(struct adreno_device *, int);
2445} a3xx_irq_funcs[] = {
2446 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2447 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2448 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2449 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2450 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2451 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2452 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2453 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2458 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2459 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2460 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2461 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2462 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2463 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2464 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2465 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2466 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2467 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2468 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2469 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002470 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002471 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2472 /* 26 to 31 - Unused */
2473};
2474
2475static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2476{
2477 struct kgsl_device *device = &adreno_dev->dev;
2478 irqreturn_t ret = IRQ_NONE;
2479 unsigned int status, tmp;
2480 int i;
2481
2482 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2483
2484 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2485 if (tmp & 1) {
2486 if (a3xx_irq_funcs[i].func != NULL) {
2487 a3xx_irq_funcs[i].func(adreno_dev, i);
2488 ret = IRQ_HANDLED;
2489 } else {
2490 KGSL_DRV_CRIT(device,
2491 "Unhandled interrupt bit %x\n", i);
2492 }
2493 }
2494
2495 tmp >>= 1;
2496 }
2497
2498 if (status)
2499 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2500 status);
2501 return ret;
2502}
2503
2504static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2505{
2506 struct kgsl_device *device = &adreno_dev->dev;
2507
2508 if (state)
2509 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2510 else
2511 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2512}
2513
2514static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2515{
2516 struct kgsl_device *device = &adreno_dev->dev;
2517 unsigned int reg, val;
2518
2519 /* Freeze the counter */
2520 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2521 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2522 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2523
2524 /* Read the value */
2525 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2526
2527 /* Reset the counter */
2528 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2529 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2530
2531 /* Re-enable the counter */
2532 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2533 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2534 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2535
2536 return val;
2537}
2538
2539static void a3xx_start(struct adreno_device *adreno_dev)
2540{
2541 struct kgsl_device *device = &adreno_dev->dev;
2542
Jordan Crousec6b3a992012-02-04 10:23:51 -07002543 /* GMEM size on A320 is 512K */
2544 adreno_dev->gmemspace.sizebytes = SZ_512K;
2545
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002546 /* Reset the core */
2547 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2548 0x00000001);
2549 msleep(20);
2550
2551 /*
2552 * enable fixed master AXI port of 0x0 for all clients to keep
2553 * traffic from going to random places
2554 */
2555
2556 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_EN, 0x0001003F);
2557 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL0, 0x00000000);
2558 adreno_regwrite(device, A3XX_VBIF_FIXED_SORT_SEL1, 0x00000000);
2559
2560 /* Make all blocks contribute to the GPU BUSY perf counter */
2561 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2562
2563 /* Enable the RBBM error reporting bits. This lets us get
2564 useful information on failure */
2565
2566 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2567
2568 /* Enable AHB error reporting */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08002569 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0x86FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002570
2571 /* Turn on the power counters */
2572 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002573
2574 /* Turn on hang detection - this spews a lot of useful information
2575 * into the RBBM registers on a hang */
2576
2577 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2578 (1 << 16) | 0xFFF);
2579
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002580}
2581
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002582/* Defined in adreno_a3xx_snapshot.c */
2583void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2584 int *remain, int hang);
2585
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002586struct adreno_gpudev adreno_a3xx_gpudev = {
2587 .reg_rbbm_status = A3XX_RBBM_STATUS,
2588 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2589 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2590
2591 .ctxt_create = a3xx_drawctxt_create,
2592 .ctxt_save = a3xx_drawctxt_save,
2593 .ctxt_restore = a3xx_drawctxt_restore,
2594 .rb_init = a3xx_rb_init,
2595 .irq_control = a3xx_irq_control,
2596 .irq_handler = a3xx_irq_handler,
2597 .busy_cycles = a3xx_busy_cycles,
2598 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002599 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002600};