blob: aeb48d79975d451a050875f8d55ca174452c2ffc [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
15
16#include "kgsl.h"
17#include "adreno.h"
18#include "kgsl_sharedmem.h"
19#include "kgsl_cffdump.h"
20#include "a3xx_reg.h"
21
Jordan Crouse0c2761a2012-02-01 22:11:12 -070022/*
23 * Set of registers to dump for A3XX on postmortem and snapshot.
24 * Registers in pairs - first value is the start offset, second
25 * is the stop offset (inclusive)
26 */
27
28const unsigned int a3xx_registers[] = {
29 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
30 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
31 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
32 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
33 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
34 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
35 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
36 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070037 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070038 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
39 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
40 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
41 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
42 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
43 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
44 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
45 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
46 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
47 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
48 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
49 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
50 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
51 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
52 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
53 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
54 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
55 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
56 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
57 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
58 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
59 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
60 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
61 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070062 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
63 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
64 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070065};
66
67const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
68
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070069/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
70 * functions.
71 */
72
73#define _SET(_shift, _val) ((_val) << (_shift))
74
75/*
76 ****************************************************************************
77 *
78 * Context state shadow structure:
79 *
80 * +---------------------+------------+-------------+---------------------+---+
81 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
82 * +---------------------+------------+-------------+---------------------+---+
83 *
84 * 8K - ALU Constant Shadow (8K aligned)
85 * 4K - H/W Register Shadow (8K aligned)
86 * 5K - Command and Vertex Buffers
87 * 8K - Shader Instruction Shadow
88 * ~6K - Texture Constant Shadow
89 *
90 *
91 ***************************************************************************
92 */
93
94/* Sizes of all sections in state shadow memory */
95#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
96#define REG_SHADOW_SIZE (4*1024) /* 4KB */
97#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
98#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
99#define TEX_SIZE_MIPMAP 1936 /* bytes */
100#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
101#define TEX_SHADOW_SIZE \
102 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
103 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
104#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
105
106/* Total context size, excluding GMEM shadow */
107#define CONTEXT_SIZE \
108 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
109 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
110 TEX_SHADOW_SIZE)
111
112/* Offsets to different sections in context shadow memory */
113#define REG_OFFSET ALU_SHADOW_SIZE
114#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
115#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
116#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
117#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
118#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
119#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
120#define FS_TEX_OFFSET_MEM_OBJECTS \
121 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
122#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
123#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
124
125/* The offset for fragment shader data in HLSQ context */
126#define SSIZE (16*1024)
127
128#define HLSQ_SAMPLER_OFFSET 0x000
129#define HLSQ_MEMOBJ_OFFSET 0x400
130#define HLSQ_MIPMAP_OFFSET 0x800
131
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700132/* Use shadow RAM */
133#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700134
Jordan Croused0070882012-02-21 08:54:52 -0700135#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700136
137#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
138 vis_cull_mode) \
139 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
140 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
141 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
142 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
143 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
144 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
145
146/*
147 * List of context registers (starting from dword offset 0x2000).
148 * Each line contains start and end of a range of registers.
149 */
150static const unsigned int context_register_ranges[] = {
151 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
152 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
153 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
154 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
155 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
156 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
157 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
158 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
159 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
160 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
161 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
162 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
163 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
164 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
165 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
166 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
167 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
168 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
169 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
170 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
171 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
172 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
173 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
174 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
175 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
176 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
177 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
178 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
179 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
180 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
181 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
182};
183
184/* Global registers that need to be saved separately */
185static const unsigned int global_registers[] = {
186 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
187 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
188 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
189 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
190 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
191 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
192 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
193 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
194 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
195 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
196 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
197 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
198 A3XX_VSC_BIN_SIZE,
199 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
200 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
201 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
202 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
203 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
204 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
205 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
206 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
207 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
208 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
209 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
210 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
211 A3XX_VSC_SIZE_ADDRESS
212};
213
214#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
215
216/* A scratchpad used to build commands during context create */
217static struct tmp_ctx {
218 unsigned int *cmd; /* Next available dword in C&V buffer */
219
220 /* Addresses in comamnd buffer where registers are saved */
221 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
222 uint32_t gmem_base; /* Base GPU address of GMEM */
223} tmp_ctx;
224
225#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
226/*
227 * Function for executing dest = ( (reg & and) ROL rol ) | or
228 */
229static unsigned int *rmw_regtomem(unsigned int *cmd,
230 unsigned int reg, unsigned int and,
231 unsigned int rol, unsigned int or,
232 unsigned int dest)
233{
234 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
235 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
236 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
237 *cmd++ = 0x00000000; /* AND value */
238 *cmd++ = reg; /* OR address */
239
240 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
241 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
242 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
243 *cmd++ = and; /* AND value */
244 *cmd++ = or; /* OR value */
245
246 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
247 *cmd++ = A3XX_CP_SCRATCH_REG2;
248 *cmd++ = dest;
249
250 return cmd;
251}
252#endif
253
254static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
255 struct adreno_context *drawctxt)
256{
257 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700258 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700259 unsigned int i;
260
261 drawctxt->constant_save_commands[0].hostptr = cmd;
262 drawctxt->constant_save_commands[0].gpuaddr =
263 virt2gpu(cmd, &drawctxt->gpustate);
264 cmd++;
265
Jordan Crousea7ec4212012-02-04 10:23:52 -0700266 start = cmd;
267
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700268 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
269 *cmd++ = 0;
270
271#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
272 /*
273 * Context registers are already shadowed; just need to
274 * disable shadowing to prevent corruption.
275 */
276
277 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
278 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
279 *cmd++ = 4 << 16; /* regs, start=0 */
280 *cmd++ = 0x0; /* count = 0 */
281
282#else
283 /*
284 * Make sure the HW context has the correct register values before
285 * reading them.
286 */
287
288 /* Write context registers into shadow */
289 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
290 unsigned int start = context_register_ranges[i * 2];
291 unsigned int end = context_register_ranges[i * 2 + 1];
292 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
293 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
294 start;
295 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
296 & 0xFFFFE000) + (start - 0x2000) * 4;
297 }
298#endif
299
300 /* Need to handle some of the global registers separately */
301 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
302 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
303 *cmd++ = global_registers[i];
304 *cmd++ = tmp_ctx.reg_values[i];
305 }
306
307 /* Save vertex shader constants */
308 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
309 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
310 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
311 *cmd++ = 0x0000FFFF;
312 *cmd++ = 3; /* EXEC_COUNT */
313 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
314 drawctxt->constant_save_commands[1].hostptr = cmd;
315 drawctxt->constant_save_commands[1].gpuaddr =
316 virt2gpu(cmd, &drawctxt->gpustate);
317 /*
318 From fixup:
319
320 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
321 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
322
323 From register spec:
324 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
325 */
326 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
327 /* ALU constant shadow base */
328 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
329
330 /* Save fragment shader constants */
331 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
332 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
333 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
334 *cmd++ = 0x0000FFFF;
335 *cmd++ = 3; /* EXEC_COUNT */
336 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
337 drawctxt->constant_save_commands[2].hostptr = cmd;
338 drawctxt->constant_save_commands[2].gpuaddr =
339 virt2gpu(cmd, &drawctxt->gpustate);
340 /*
341 From fixup:
342
343 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
344 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
345
346 From register spec:
347 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
348 */
349 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
350
351 /*
352 From fixup:
353
354 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
355 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
356
357 From register spec:
358 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
359 start offset in on chip RAM,
360 128bit aligned
361
362 dst = base + offset
363 Because of the base alignment we can use
364 dst = base | offset
365 */
366 *cmd++ = 0; /* dst */
367
368 /* Save VS texture memory objects */
369 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
370 *cmd++ =
371 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
372 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
373 *cmd++ =
374 (drawctxt->gpustate.gpuaddr +
375 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
376
377 /* Save VS texture mipmap pointers */
378 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
379 *cmd++ =
380 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
381 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
382 *cmd++ =
383 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
384
385 /* Save VS texture sampler objects */
386 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
387 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
388 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
389 *cmd++ =
390 (drawctxt->gpustate.gpuaddr +
391 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
392
393 /* Save FS texture memory objects */
394 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
395 *cmd++ =
396 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
397 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
398 *cmd++ =
399 (drawctxt->gpustate.gpuaddr +
400 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
401
402 /* Save FS texture mipmap pointers */
403 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
404 *cmd++ =
405 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
406 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
407 *cmd++ =
408 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
409
410 /* Save FS texture sampler objects */
411 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
412 *cmd++ =
413 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
414 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
415 *cmd++ =
416 (drawctxt->gpustate.gpuaddr +
417 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
418
419 /* Create indirect buffer command for above command sequence */
420 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
421
422 tmp_ctx.cmd = cmd;
423}
424
425/* Copy GMEM contents to system memory shadow. */
426static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
427 struct adreno_context *drawctxt,
428 struct gmem_shadow_t *shadow)
429{
430 unsigned int *cmds = tmp_ctx.cmd;
431 unsigned int *start = cmds;
432
433 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
434 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
435
436 /* RB_MODE_CONTROL */
437 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
438 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
439 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
440 /* RB_RENDER_CONTROL */
441 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
442 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
443
444 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
445 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
446 /* RB_COPY_CONTROL */
447 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
448 RB_CLEAR_MODE_RESOLVE) |
449 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
450 tmp_ctx.gmem_base >> 14);
451 /* RB_COPY_DEST_BASE */
452 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
453 shadow->gmemshadow.gpuaddr >> 5);
454 /* RB_COPY_DEST_PITCH */
455 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
456 (shadow->pitch * 4) / 32);
457 /* RB_COPY_DEST_INFO */
458 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
459 RB_TILINGMODE_LINEAR) |
460 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
461 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
462 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
463
464 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
465 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
466 /* GRAS_SC_CONTROL */
467 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
468
469 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
470 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
471 /* VFD_CONTROL_0 */
472 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
473 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
474 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
475 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
476 /* VFD_CONTROL_1 */
477 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
478 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
479 _SET(VFD_CTRLREG1_REGID4INST, 252);
480
481 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
482 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
483 /* VFD_FETCH_INSTR_0_0 */
484 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
485 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
486 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
487 /* VFD_FETCH_INSTR_1_0 */
488 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
489 shadow->quad_vertices.gpuaddr);
490
491 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
492 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
493 /* VFD_DECODE_INSTR_0 */
494 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
495 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
496 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
497 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
498 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
499 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
500
501 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
502 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
503 /* HLSQ_CONTROL_0_REG */
504 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
505 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
506 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
507 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
508 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
509 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
510 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
511 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
512 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
513 /* HLSQ_CONTROL_1_REG */
514 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
515 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
516 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
517 /* HLSQ_CONTROL_2_REG */
518 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
519 /* HLSQ_CONTROL_3_REG */
520 *cmds++ = 0x00000000;
521
522 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
523 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
524 /* HLSQ_VS_CONTROL_REG */
525 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
526 /* HLSQ_FS_CONTROL_REG */
527 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
528 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
529 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
530 /* HLSQ_CONST_VSPRESV_RANGE_REG */
531 *cmds++ = 0x00000000;
532 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
533 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
534 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
535
536 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
537 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
538 /* SP_FS_LENGTH_REG */
539 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
540
541 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
542 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
543 /* SP_SP_CTRL_REG */
544 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
545 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
546
547 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
548 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
549 /* SP_VS_CTRL_REG0 */
550 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
551 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
552 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
553 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
554 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
555 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
556 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
557 /* SP_VS_CTRL_REG1 */
558 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
559 /* SP_VS_PARAM_REG */
560 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
561 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
562 /* SP_VS_OUT_REG_0 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_1 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_2 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_3 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_4 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_5 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_6 */
575 *cmds++ = 0x00000000;
576 /* SP_VS_OUT_REG_7 */
577 *cmds++ = 0x00000000;
578
579 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
580 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
581 /* SP_VS_VPC_DST_REG_0 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_1 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_2 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_VPC_DST_REG_3 */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_OFFSET_REG */
590 *cmds++ = 0x00000000;
591 /* SP_VS_OBJ_START_REG */
592 *cmds++ = 0x00000000;
593
594 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
595 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
596 /* SP_VS_LENGTH_REG */
597 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
598 /* SP_FS_CTRL_REG0 */
599 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
600 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
601 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
602 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
603 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
604 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
605 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
606 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
607 /* SP_FS_CTRL_REG1 */
608 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
609 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
610 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
611 /* SP_FS_OBJ_OFFSET_REG */
612 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
613 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
614 /* SP_FS_OBJ_START_REG */
615 *cmds++ = 0x00000000;
616
617 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
618 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
619 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
620 *cmds++ = 0x00000000;
621 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
622 *cmds++ = 0x00000000;
623
624 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
625 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
626 /* SP_FS_OUTPUT_REG */
627 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
628
629 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
630 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
631 /* SP_FS_MRT_REG_0 */
632 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
633 /* SP_FS_MRT_REG_1 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_2 */
636 *cmds++ = 0x00000000;
637 /* SP_FS_MRT_REG_3 */
638 *cmds++ = 0x00000000;
639
640 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
641 *cmds++ = CP_REG(A3XX_VPC_ATTR);
642 /* VPC_ATTR */
643 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
644 _SET(VPC_VPCATTR_LMSIZE, 1);
645 /* VPC_PACK */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_0 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_1 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_2 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARRYING_INTERUPT_MODE_3 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_0 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_1 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_2 */
660 *cmds++ = 0x00000000;
661 /* VPC_VARYING_PS_REPL_MODE_3 */
662 *cmds++ = 0x00000000;
663
664 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
665 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
666 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
667 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
668 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
669 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
670 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
671
672 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
673 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
674 /* end; */
675 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678 /* nop; */
679 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
680
681 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
682 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
683 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
684 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
685 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
686 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
687 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
688
689 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
690 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
691 /* end; */
692 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
693 /* nop; */
694 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
695 /* nop; */
696 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
697
698 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
699 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
700 /* RB_MSAA_CONTROL */
701 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
702 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
703
704 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
705 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
706 /* RB_DEPTH_CONTROL */
707 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
708
709 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
710 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
711 /* RB_MRT_CONTROL0 */
712 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
713 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
714 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
715 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
716
717 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
718 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
719 /* RB_MRT_BLEND_CONTROL0 */
720 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
721 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
722 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
723 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
724 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
725 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
726 /* RB_MRT_CONTROL1 */
727 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
728 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
729 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
730
731 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
732 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
733 /* RB_MRT_BLEND_CONTROL1 */
734 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
735 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
736 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
737 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
738 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
739 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
740 /* RB_MRT_CONTROL2 */
741 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
742 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
743 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
744
745 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
746 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
747 /* RB_MRT_BLEND_CONTROL2 */
748 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
749 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
750 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
751 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
752 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
753 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
754 /* RB_MRT_CONTROL3 */
755 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
756 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
757 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
758
759 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
760 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
761 /* RB_MRT_BLEND_CONTROL3 */
762 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
763 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
764 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
765 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
766 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
767 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
768
769 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
770 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
771 /* VFD_INDEX_MIN */
772 *cmds++ = 0x00000000;
773 /* VFD_INDEX_MAX */
774 *cmds++ = 0xFFFFFFFF;
775 /* VFD_INSTANCEID_OFFSET */
776 *cmds++ = 0x00000000;
777 /* VFD_INDEX_OFFSET */
778 *cmds++ = 0x00000000;
779
780 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
781 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
782 /* VFD_VS_THREADING_THRESHOLD */
783 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
784 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
785
786 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
787 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
788 /* TPL1_TP_VS_TEX_OFFSET */
789 *cmds++ = 0;
790
791 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
792 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
793 /* TPL1_TP_FS_TEX_OFFSET */
794 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
795 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
796 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
797
798 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
799 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
800 /* PC_PRIM_VTX_CNTL */
801 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
802 PC_DRAW_TRIANGLES) |
803 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
804 PC_DRAW_TRIANGLES) |
805 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
806
807 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
808 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
809 /* GRAS_SC_WINDOW_SCISSOR_TL */
810 *cmds++ = 0x00000000;
811 /* GRAS_SC_WINDOW_SCISSOR_BR */
812 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
813 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
814
815 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
816 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
817 /* GRAS_SC_SCREEN_SCISSOR_TL */
818 *cmds++ = 0x00000000;
819 /* GRAS_SC_SCREEN_SCISSOR_BR */
820 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
821 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
822
823 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
824 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
825 /* GRAS_CL_VPORT_XOFFSET */
826 *cmds++ = 0x00000000;
827 /* GRAS_CL_VPORT_XSCALE */
828 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
829 /* GRAS_CL_VPORT_YOFFSET */
830 *cmds++ = 0x00000000;
831 /* GRAS_CL_VPORT_YSCALE */
832 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
833
834 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
835 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
836 /* GRAS_CL_VPORT_ZOFFSET */
837 *cmds++ = 0x00000000;
838 /* GRAS_CL_VPORT_ZSCALE */
839 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
840
841 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
842 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
843 /* GRAS_CL_CLIP_CNTL */
844 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
845 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
849
850 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
851 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
852 /* GRAS_CL_GB_CLIP_ADJ */
853 *cmds++ = 0x00000000;
854
855 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
856 *cmds++ = 0x00000000;
857
858 /*
859 * Resolve using two draw calls with a dummy register
860 * write in between. This is a HLM workaround
861 * that should be removed later.
862 */
863 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
864 *cmds++ = 0x00000000; /* Viz query info */
865 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
866 PC_DI_SRC_SEL_IMMEDIATE,
867 PC_DI_INDEX_SIZE_32_BIT,
868 PC_DI_IGNORE_VISIBILITY);
869 *cmds++ = 0x00000003; /* Num indices */
870 *cmds++ = 0x00000000; /* Index 0 */
871 *cmds++ = 0x00000001; /* Index 1 */
872 *cmds++ = 0x00000002; /* Index 2 */
873
874 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
875 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
876 *cmds++ = 0x00000000;
877
878 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
879 *cmds++ = 0x00000000; /* Viz query info */
880 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
881 PC_DI_SRC_SEL_IMMEDIATE,
882 PC_DI_INDEX_SIZE_32_BIT,
883 PC_DI_IGNORE_VISIBILITY);
884 *cmds++ = 0x00000003; /* Num indices */
885 *cmds++ = 0x00000002; /* Index 0 */
886 *cmds++ = 0x00000001; /* Index 1 */
887 *cmds++ = 0x00000003; /* Index 2 */
888
889 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
890 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
891 *cmds++ = 0x00000000;
892
893 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
894 *cmds++ = 0x00000000;
895
896 /* Create indirect buffer command for above command sequence */
897 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
898
899 return cmds;
900}
901
902static void build_shader_save_cmds(struct adreno_device *adreno_dev,
903 struct adreno_context *drawctxt)
904{
905 unsigned int *cmd = tmp_ctx.cmd;
906 unsigned int *start;
907
908 /* Reserve space for boolean values used for COND_EXEC packet */
909 drawctxt->cond_execs[0].hostptr = cmd;
910 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
911 *cmd++ = 0;
912 drawctxt->cond_execs[1].hostptr = cmd;
913 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
914 *cmd++ = 0;
915
916 drawctxt->shader_save_commands[0].hostptr = cmd;
917 drawctxt->shader_save_commands[0].gpuaddr =
918 virt2gpu(cmd, &drawctxt->gpustate);
919 *cmd++ = 0;
920 drawctxt->shader_save_commands[1].hostptr = cmd;
921 drawctxt->shader_save_commands[1].gpuaddr =
922 virt2gpu(cmd, &drawctxt->gpustate);
923 *cmd++ = 0;
924
925 start = cmd;
926
927 /* Save vertex shader */
928
929 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
930 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
931 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
932 *cmd++ = 0x0000FFFF;
933 *cmd++ = 3; /* EXEC_COUNT */
934
935 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
936 drawctxt->shader_save_commands[2].hostptr = cmd;
937 drawctxt->shader_save_commands[2].gpuaddr =
938 virt2gpu(cmd, &drawctxt->gpustate);
939 /*
940 From fixup:
941
942 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
943
944 From regspec:
945 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
946 If bit31 is 1, it means overflow
947 or any long shader.
948
949 src = (HLSQ_SHADOW_BASE + 0x1000)/4
950 */
951 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
952 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
953
954 /* Save fragment shader */
955 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
956 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
957 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
958 *cmd++ = 0x0000FFFF;
959 *cmd++ = 3; /* EXEC_COUNT */
960
961 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
962 drawctxt->shader_save_commands[3].hostptr = cmd;
963 drawctxt->shader_save_commands[3].gpuaddr =
964 virt2gpu(cmd, &drawctxt->gpustate);
965 /*
966 From fixup:
967
968 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
969
970 From regspec:
971 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
972 If bit31 is 1, it means overflow
973 or any long shader.
974
975 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
976 From regspec:
977
978 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
979 First instruction of the whole shader will be stored from
980 the offset in instruction cache, unit = 256bits, a cache line.
981 It can start from 0 if no VS available.
982
983 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
984 */
985 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
986 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
987 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
988
989 /* Create indirect buffer command for above command sequence */
990 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
991
992 tmp_ctx.cmd = cmd;
993}
994
995/*
996 * Make an IB to modify context save IBs with the correct shader instruction
997 * and constant sizes and offsets.
998 */
999
1000static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1001 struct adreno_context *drawctxt)
1002{
1003 unsigned int *cmd = tmp_ctx.cmd;
1004 unsigned int *start = cmd;
1005
1006 /* Flush HLSQ lazy updates */
1007 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1008 *cmd++ = 0x7; /* HLSQ_FLUSH */
1009 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1010 *cmd++ = 0;
1011
1012 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1013 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1014 *cmd++ = (unsigned int)
1015 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1016 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1017 0; /* No end addr for full invalidate */
1018
1019 /* Make sure registers are flushed */
1020 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1021 *cmd++ = 0;
1022
1023#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1024
1025 /* Save shader sizes */
1026 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1027 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1028 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1029
1030 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1031 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1032 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1033
1034 /* Save shader offsets */
1035 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1036 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1037 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1038
1039 /* Save constant sizes */
1040 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1041 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1042 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1043 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1044 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1045 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1046
1047 /* Save FS constant offset */
1048 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1049 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1050 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1051
1052
1053 /* Save VS instruction store mode */
1054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1055 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1056 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1057
1058 /* Save FS instruction store mode */
1059 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1060 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1061 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1062#else
1063
1064 /* Shader save */
1065 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1066 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1067 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1068 drawctxt->shader_save_commands[2].gpuaddr);
1069
1070 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1071 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1072 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1073 *cmd++ = 0x00000000; /* AND value */
1074 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1075 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1076 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1077 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1078 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1079 A3XX_CP_SCRATCH_REG2;
1080 *cmd++ = 0x7f000000; /* AND value */
1081 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1082
1083 /*
1084 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1085 * SP_FS_OBJ_OFFSET_REG
1086 */
1087
1088 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1089 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1090 *cmd++ = 0x00000000; /* AND value */
1091 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1092 /*
1093 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1094 * 0x00000000
1095 */
1096 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1097 *cmd++ = A3XX_CP_SCRATCH_REG3;
1098 *cmd++ = 0xfe000000; /* AND value */
1099 *cmd++ = 0x00000000; /* OR value */
1100 /*
1101 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1102 */
1103 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1104 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1105 *cmd++ = 0xffffffff; /* AND value */
1106 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1107
1108 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1109 *cmd++ = A3XX_CP_SCRATCH_REG2;
1110 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1111
1112 /* Constant save */
1113 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001114 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1115 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001116 drawctxt->constant_save_commands[1].gpuaddr);
1117
1118 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001119 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1120 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001121 drawctxt->constant_save_commands[2].gpuaddr);
1122
1123 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1124 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1125 drawctxt->constant_save_commands[2].gpuaddr
1126 + sizeof(unsigned int));
1127
1128 /* Modify constant save conditionals */
1129 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1130 0, 0, drawctxt->cond_execs[2].gpuaddr);
1131
1132 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1133 0, 0, drawctxt->cond_execs[3].gpuaddr);
1134
1135 /* Save VS instruction store mode */
1136
1137 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1138 31, 0, drawctxt->cond_execs[0].gpuaddr);
1139
1140 /* Save FS instruction store mode */
1141 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1142 31, 0, drawctxt->cond_execs[1].gpuaddr);
1143
1144#endif
1145
1146 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1147
1148 tmp_ctx.cmd = cmd;
1149}
1150
1151/****************************************************************************/
1152/* Functions to build context restore IBs */
1153/****************************************************************************/
1154
1155static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1156 struct adreno_context *drawctxt,
1157 struct gmem_shadow_t *shadow)
1158{
1159 unsigned int *cmds = tmp_ctx.cmd;
1160 unsigned int *start = cmds;
1161
1162 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1163 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1164 /* HLSQ_CONTROL_0_REG */
1165 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1166 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1167 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1168 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1169 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1170 /* HLSQ_CONTROL_1_REG */
1171 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1172 /* HLSQ_CONTROL_2_REG */
1173 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1174 /* HLSQ_CONTROL3_REG */
1175 *cmds++ = 0x00000000;
1176
1177 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1178 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1179 /* RB_MRT_BUF_INFO0 */
1180 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1181 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1182 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1183 (shadow->gmem_pitch * 4 * 8) / 256);
1184 /* RB_MRT_BUF_BASE0 */
1185 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1186
1187 /* Texture samplers */
1188 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1189 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1190 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1191 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1192 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1193 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1194 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1195 *cmds++ = 0x00000240;
1196 *cmds++ = 0x00000000;
1197
1198 /* Texture memobjs */
1199 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1200 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1201 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1202 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1203 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1204 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1205 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1206 *cmds++ = 0x4cc06880;
1207 *cmds++ = shadow->height | (shadow->width << 14);
1208 *cmds++ = (shadow->pitch*4*8) << 9;
1209 *cmds++ = 0x00000000;
1210
1211 /* Mipmap bases */
1212 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1213 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1214 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1215 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1216 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1217 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1218 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1219 *cmds++ = shadow->gmemshadow.gpuaddr;
1220 *cmds++ = 0x00000000;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233
1234 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1235 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1236 /* HLSQ_VS_CONTROL_REG */
1237 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1238 /* HLSQ_FS_CONTROL_REG */
1239 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1240 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1241 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1242 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1243 *cmds++ = 0x00000000;
1244 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1245 *cmds++ = 0x00000000;
1246
1247 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1248 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1249 /* SP_FS_LENGTH_REG */
1250 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1251
1252 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1253 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1254 /* SP_VS_CTRL_REG0 */
1255 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1256 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1257 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1258 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1259 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1260 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1261 /* SP_VS_CTRL_REG1 */
1262 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1263 /* SP_VS_PARAM_REG */
1264 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1265 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1266 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1267 /* SP_VS_OUT_REG0 */
1268 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1269 /* SP_VS_OUT_REG1 */
1270 *cmds++ = 0x00000000;
1271 /* SP_VS_OUT_REG2 */
1272 *cmds++ = 0x00000000;
1273 /* SP_VS_OUT_REG3 */
1274 *cmds++ = 0x00000000;
1275 /* SP_VS_OUT_REG4 */
1276 *cmds++ = 0x00000000;
1277 /* SP_VS_OUT_REG5 */
1278 *cmds++ = 0x00000000;
1279 /* SP_VS_OUT_REG6 */
1280 *cmds++ = 0x00000000;
1281 /* SP_VS_OUT_REG7 */
1282 *cmds++ = 0x00000000;
1283
1284 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1285 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1286 /* SP_VS_VPC_DST_REG0 */
1287 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1288 /* SP_VS_VPC_DST_REG1 */
1289 *cmds++ = 0x00000000;
1290 /* SP_VS_VPC_DST_REG2 */
1291 *cmds++ = 0x00000000;
1292 /* SP_VS_VPC_DST_REG3 */
1293 *cmds++ = 0x00000000;
1294 /* SP_VS_OBJ_OFFSET_REG */
1295 *cmds++ = 0x00000000;
1296 /* SP_VS_OBJ_START_REG */
1297 *cmds++ = 0x00000000;
1298
1299 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1300 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1301 /* SP_VS_LENGTH_REG */
1302 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1303 /* SP_FS_CTRL_REG0 */
1304 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1305 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1306 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1307 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1308 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1309 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1310 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1311 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1312 /* SP_FS_CTRL_REG1 */
1313 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1314 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1315 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1316 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001317 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1318 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001319 /* SP_FS_OBJ_START_REG */
1320 *cmds++ = 0x00000000;
1321
1322 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1323 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1324 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1325 *cmds++ = 0x00000000;
1326 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1327 *cmds++ = 0x00000000;
1328
1329 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1330 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1331 /* SP_FS_OUT_REG */
1332 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1333
Jordan Crousea7ec4212012-02-04 10:23:52 -07001334 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001335 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1336 /* SP_FS_MRT_REG0 */
1337 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1338 /* SP_FS_MRT_REG1 */
1339 *cmds++ = 0;
1340 /* SP_FS_MRT_REG2 */
1341 *cmds++ = 0;
1342 /* SP_FS_MRT_REG3 */
1343 *cmds++ = 0;
1344
1345 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1346 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1347 /* VPC_ATTR */
1348 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1349 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1350 _SET(VPC_VPCATTR_LMSIZE, 1);
1351 /* VPC_PACK */
1352 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1353 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1354 /* VPC_VARYING_INTERP_MODE_0 */
1355 *cmds++ = 0x00000000;
1356 /* VPC_VARYING_INTERP_MODE1 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_INTERP_MODE2 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARYING_IINTERP_MODE3 */
1361 *cmds++ = 0x00000000;
1362 /* VPC_VARRYING_PS_REPL_MODE_0 */
1363 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1364 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1379 /* VPC_VARRYING_PS_REPL_MODE_1 */
1380 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1396 /* VPC_VARRYING_PS_REPL_MODE_2 */
1397 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1413 /* VPC_VARRYING_PS_REPL_MODE_3 */
1414 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1430
Jordan Crousea7ec4212012-02-04 10:23:52 -07001431 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001432 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1433 /* SP_SP_CTRL_REG */
1434 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1435
1436 /* Load vertex shader */
1437 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1438 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1439 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1440 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1441 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1442 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1443 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1444 /* (sy)end; */
1445 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1446 /* nop; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450 /* nop; */
1451 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1452
1453 /* Load fragment shader */
1454 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1455 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1456 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1457 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1458 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1459 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1460 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1461 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1462 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1463 /* (rpt5)nop; */
1464 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1465 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1466 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1467 /* (sy)mov.f32f32 r1.x, r0.x; */
1468 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1469 /* mov.f32f32 r1.y, r0.y; */
1470 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1471 /* mov.f32f32 r1.z, r0.z; */
1472 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1473 /* mov.f32f32 r1.w, r0.w; */
1474 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1475 /* end; */
1476 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1477
1478 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1479 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1480 /* VFD_CONTROL_0 */
1481 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1482 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1483 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1484 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1485 /* VFD_CONTROL_1 */
1486 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1487 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1488 _SET(VFD_CTRLREG1_REGID4INST, 252);
1489
1490 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1491 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1492 /* VFD_FETCH_INSTR_0_0 */
1493 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1494 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1495 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1496 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1497 /* VFD_FETCH_INSTR_1_0 */
1498 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1499 shadow->quad_vertices_restore.gpuaddr);
1500 /* VFD_FETCH_INSTR_0_1 */
1501 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1502 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1503 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1504 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1505 /* VFD_FETCH_INSTR_1_1 */
1506 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1507 shadow->quad_vertices_restore.gpuaddr + 16);
1508
1509 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1510 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1511 /* VFD_DECODE_INSTR_0 */
1512 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1513 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1514 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1516 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1517 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1518 /* VFD_DECODE_INSTR_1 */
1519 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1520 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1521 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1522 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1523 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1524 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1525
1526 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1527 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1528 /* RB_DEPTH_CONTROL */
1529 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1530
1531 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1532 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1533 /* RB_STENCIL_CONTROL */
1534 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1535 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1536 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1537 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1539 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1540 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1541 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1542
1543 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1544 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1545 /* RB_MODE_CONTROL */
1546 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1547 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1548
1549 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1550 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1551 /* RB_RENDER_CONTROL */
1552 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1553 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1554
1555 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1556 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1557 /* RB_MSAA_CONTROL */
1558 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1559 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1560
1561 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1562 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1563 /* RB_MRT_CONTROL0 */
1564 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1565 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1566 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1567 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1568
1569 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1570 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1571 /* RB_MRT_BLENDCONTROL0 */
1572 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1573 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1574 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1575 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1576 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1577 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1578 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1579 /* RB_MRT_CONTROL1 */
1580 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1581 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1582 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1583
1584 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1585 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1586 /* RB_MRT_BLENDCONTROL1 */
1587 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1588 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1589 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1590 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1591 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1592 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1593 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1594 /* RB_MRT_CONTROL2 */
1595 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1596 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1597 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1598
1599 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1600 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1601 /* RB_MRT_BLENDCONTROL2 */
1602 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1603 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1604 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1605 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1606 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1607 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1608 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1609 /* RB_MRT_CONTROL3 */
1610 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1611 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1612 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1613
1614 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1615 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1616 /* RB_MRT_BLENDCONTROL3 */
1617 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1618 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1619 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1620 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1621 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1622 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1623 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1624
1625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1626 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1627 /* VFD_INDEX_MIN */
1628 *cmds++ = 0x00000000;
1629 /* VFD_INDEX_MAX */
1630 *cmds++ = 0xFFFFFFFF;
1631 /* VFD_INDEX_OFFSET */
1632 *cmds++ = 0x00000000;
1633 /* TPL1_TP_VS_TEX_OFFSET */
1634 *cmds++ = 0x00000000;
1635
1636 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1637 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1638 /* VFD_VS_THREADING_THRESHOLD */
1639 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1640 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1641
1642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1643 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1644 /* TPL1_TP_VS_TEX_OFFSET */
1645 *cmds++ = 0x00000000;
1646
1647 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1648 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1649 /* TPL1_TP_FS_TEX_OFFSET */
1650 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1651 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1652 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1653
1654 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1655 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1656 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001657 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1658 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1659 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001660
1661 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1662 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1663 /* GRAS_SU_MODE_CONTROL */
1664 *cmds++ = 0x00000000;
1665
1666 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1667 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1668 /* GRAS_SC_WINDOW_SCISSOR_TL */
1669 *cmds++ = 0x00000000;
1670 /* GRAS_SC_WINDOW_SCISSOR_BR */
1671 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1672 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1673
1674 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1675 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1676 /* GRAS_SC_SCREEN_SCISSOR_TL */
1677 *cmds++ = 0x00000000;
1678 /* GRAS_SC_SCREEN_SCISSOR_BR */
1679 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1680 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1681
1682 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1683 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1684 /* GRAS_CL_VPORT_XOFFSET */
1685 *cmds++ = 0x00000000;
1686 /* GRAS_CL_VPORT_XSCALE */
1687 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1688 /* GRAS_CL_VPORT_YOFFSET */
1689 *cmds++ = 0x00000000;
1690 /* GRAS_CL_VPORT_YSCALE */
1691 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1692
1693 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1694 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1695 /* GRAS_CL_VPORT_ZOFFSET */
1696 *cmds++ = 0x00000000;
1697 /* GRAS_CL_VPORT_ZSCALE */
1698 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1699
1700 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1701 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1702 /* GRAS_CL_CLIP_CNTL */
1703 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1704
1705 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1706 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1707 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1708 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1709
1710 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1711 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1712 /* PC_PRIM_VTX_CONTROL */
1713 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1714 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1715 PC_DRAW_TRIANGLES) |
1716 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1717 PC_DRAW_TRIANGLES) |
1718 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1719
1720 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1721 *cmds++ = 0x00000000; /* Viz query info */
1722 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1723 PC_DI_SRC_SEL_AUTO_INDEX,
1724 PC_DI_INDEX_SIZE_16_BIT,
1725 PC_DI_IGNORE_VISIBILITY);
1726 *cmds++ = 0x00000002; /* Num indices */
1727
1728 /* Create indirect buffer command for above command sequence */
1729 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1730
1731 return cmds;
1732}
1733
1734static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1735 struct adreno_context *drawctxt)
1736{
1737 unsigned int *start = tmp_ctx.cmd;
1738 unsigned int *cmd = start;
1739 unsigned int *lcc_start;
1740
1741 int i;
1742
1743 /* Flush HLSQ lazy updates */
1744 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1745 *cmd++ = 0x7; /* HLSQ_FLUSH */
1746 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1747 *cmd++ = 0;
1748
1749 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1750 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1751 *cmd++ = (unsigned int)
1752 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1753 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1754 0; /* No end addr for full invalidate */
1755
1756 lcc_start = cmd;
1757
1758 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1759 cmd++;
1760
1761#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1762 /* Force mismatch */
1763 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1764#else
1765 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1766#endif
1767
1768 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1769 cmd = reg_range(cmd, context_register_ranges[i * 2],
1770 context_register_ranges[i * 2 + 1]);
1771 }
1772
1773 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1774 (cmd - lcc_start) - 1);
1775
1776#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1777 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1778#else
1779 lcc_start[2] |= (1 << 24) | (4 << 16);
1780#endif
1781
1782 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1783 *cmd++ = cp_type0_packet(global_registers[i], 1);
1784 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1785 *cmd++ = 0x00000000;
1786 }
1787
1788 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1789 tmp_ctx.cmd = cmd;
1790}
1791
1792static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1793 struct adreno_context *drawctxt)
1794{
1795 unsigned int *cmd = tmp_ctx.cmd;
1796 unsigned int *start = cmd;
1797 unsigned int mode = 4; /* Indirect mode */
1798 unsigned int stateblock;
1799 unsigned int numunits;
1800 unsigned int statetype;
1801
1802 drawctxt->cond_execs[2].hostptr = cmd;
1803 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1804 *cmd++ = 0;
1805 drawctxt->cond_execs[3].hostptr = cmd;
1806 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1807 *cmd++ = 0;
1808
1809#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1810 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1811 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1812 *cmd++ = 4 << 16;
1813 *cmd++ = 0x0;
1814#endif
1815 /* HLSQ full update */
1816 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1817 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1818 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1819
1820#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1821 /* Re-enable shadowing */
1822 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1823 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1824 *cmd++ = (4 << 16) | (1 << 24);
1825 *cmd++ = 0x0;
1826#endif
1827
1828 /* Load vertex shader constants */
1829 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1830 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1831 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1832 *cmd++ = 0x0000ffff;
1833 *cmd++ = 3; /* EXEC_COUNT */
1834 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1835 drawctxt->constant_load_commands[0].hostptr = cmd;
1836 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1837 &drawctxt->gpustate);
1838
1839 /*
1840 From fixup:
1841
1842 mode = 4 (indirect)
1843 stateblock = 4 (Vertex constants)
1844 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1845
1846 From register spec:
1847 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1848
1849 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1850 */
1851
1852 *cmd++ = 0; /* ord1 */
1853 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1854
1855 /* Load fragment shader constants */
1856 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1857 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1858 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1859 *cmd++ = 0x0000ffff;
1860 *cmd++ = 3; /* EXEC_COUNT */
1861 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1862 drawctxt->constant_load_commands[1].hostptr = cmd;
1863 drawctxt->constant_load_commands[1].gpuaddr =
1864 virt2gpu(cmd, &drawctxt->gpustate);
1865 /*
1866 From fixup:
1867
1868 mode = 4 (indirect)
1869 stateblock = 6 (Fragment constants)
1870 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1871
1872 From register spec:
1873 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1874
1875 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1876 */
1877
1878 *cmd++ = 0; /* ord1 */
1879 drawctxt->constant_load_commands[2].hostptr = cmd;
1880 drawctxt->constant_load_commands[2].gpuaddr =
1881 virt2gpu(cmd, &drawctxt->gpustate);
1882 /*
1883 From fixup:
1884 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1885 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1886
1887 From register spec:
1888 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1889 start offset in on chip RAM,
1890 128bit aligned
1891
1892 ord2 = base + offset | 1
1893 Because of the base alignment we can use
1894 ord2 = base | offset | 1
1895 */
1896 *cmd++ = 0; /* ord2 */
1897
1898 /* Restore VS texture memory objects */
1899 stateblock = 0;
1900 statetype = 1;
1901 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1902
1903 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1904 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1905 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1906 & 0xfffffffc) | statetype;
1907
1908 /* Restore VS texture mipmap addresses */
1909 stateblock = 1;
1910 statetype = 1;
1911 numunits = TEX_SIZE_MIPMAP / 4;
1912 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1913 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1914 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1915 & 0xfffffffc) | statetype;
1916
1917 /* Restore VS texture sampler objects */
1918 stateblock = 0;
1919 statetype = 0;
1920 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1921 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1922 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1923 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1924 & 0xfffffffc) | statetype;
1925
1926 /* Restore FS texture memory objects */
1927 stateblock = 2;
1928 statetype = 1;
1929 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1930 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1931 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1932 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1933 & 0xfffffffc) | statetype;
1934
1935 /* Restore FS texture mipmap addresses */
1936 stateblock = 3;
1937 statetype = 1;
1938 numunits = TEX_SIZE_MIPMAP / 4;
1939 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1940 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1941 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1942 & 0xfffffffc) | statetype;
1943
1944 /* Restore FS texture sampler objects */
1945 stateblock = 2;
1946 statetype = 0;
1947 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1948 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1949 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1950 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1951 & 0xfffffffc) | statetype;
1952
1953 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1954 tmp_ctx.cmd = cmd;
1955}
1956
1957static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1958 struct adreno_context *drawctxt)
1959{
1960 unsigned int *cmd = tmp_ctx.cmd;
1961 unsigned int *start = cmd;
1962
1963 /* Vertex shader */
1964 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1965 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1966 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1967 *cmd++ = 1;
1968 *cmd++ = 3; /* EXEC_COUNT */
1969
1970 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1971 drawctxt->shader_load_commands[0].hostptr = cmd;
1972 drawctxt->shader_load_commands[0].gpuaddr =
1973 virt2gpu(cmd, &drawctxt->gpustate);
1974 /*
1975 From fixup:
1976
1977 mode = 4 (indirect)
1978 stateblock = 4 (Vertex shader)
1979 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1980
1981 From regspec:
1982 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1983 If bit31 is 1, it means overflow
1984 or any long shader.
1985
1986 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1987 */
1988 *cmd++ = 0; /*ord1 */
1989 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1990
1991 /* Fragment shader */
1992 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1993 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1994 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1995 *cmd++ = 1;
1996 *cmd++ = 3; /* EXEC_COUNT */
1997
1998 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1999 drawctxt->shader_load_commands[1].hostptr = cmd;
2000 drawctxt->shader_load_commands[1].gpuaddr =
2001 virt2gpu(cmd, &drawctxt->gpustate);
2002 /*
2003 From fixup:
2004
2005 mode = 4 (indirect)
2006 stateblock = 6 (Fragment shader)
2007 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2008
2009 From regspec:
2010 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2011 If bit31 is 1, it means overflow
2012 or any long shader.
2013
2014 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2015 */
2016 *cmd++ = 0; /*ord1 */
2017 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2018 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2019
2020 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2021 tmp_ctx.cmd = cmd;
2022}
2023
2024static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2025 struct adreno_context *drawctxt)
2026{
2027 unsigned int *cmd = tmp_ctx.cmd;
2028 unsigned int *start = cmd;
2029
2030 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2031 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2032 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2033 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2034 = virt2gpu(cmd, &drawctxt->gpustate);
2035 *cmd++ = 0;
2036
2037 /* Create indirect buffer command for above command sequence */
2038 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2039
2040 tmp_ctx.cmd = cmd;
2041}
2042
2043/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2044static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2045 struct adreno_context *drawctxt)
2046{
2047 unsigned int *cmd = tmp_ctx.cmd;
2048 unsigned int *start = cmd;
2049
2050#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2051 /* Save shader sizes */
2052 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2053 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2054 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2055
2056 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2057 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2058 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2059
2060 /* Save constant sizes */
2061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2062 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2063 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2064
2065 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2066 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2067 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2068
2069 /* Save constant offsets */
2070 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2071 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2072 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2073#else
2074 /* Save shader sizes */
2075 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2076 30, (4 << 19) | (4 << 16),
2077 drawctxt->shader_load_commands[0].gpuaddr);
2078
2079 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2080 30, (6 << 19) | (4 << 16),
2081 drawctxt->shader_load_commands[1].gpuaddr);
2082
2083 /* Save constant sizes */
2084 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2085 23, (4 << 19) | (4 << 16),
2086 drawctxt->constant_load_commands[0].gpuaddr);
2087
2088 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2089 23, (6 << 19) | (4 << 16),
2090 drawctxt->constant_load_commands[1].gpuaddr);
2091
2092 /* Modify constant restore conditionals */
2093 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2094 0, 0, drawctxt->cond_execs[2].gpuaddr);
2095
2096 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2097 0, 0, drawctxt->cond_execs[3].gpuaddr);
2098
2099 /* Save fragment constant shadow offset */
2100 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2101 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2102 drawctxt->constant_load_commands[2].gpuaddr);
2103#endif
2104
2105 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2106 discard all the shader data */
2107
2108 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2109 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2110
2111 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2112
2113 tmp_ctx.cmd = cmd;
2114}
2115
2116static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2117 struct adreno_context *drawctxt)
2118{
2119 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2120
2121 build_regrestore_cmds(adreno_dev, drawctxt);
2122 build_constantrestore_cmds(adreno_dev, drawctxt);
2123 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2124 build_regconstantsave_cmds(adreno_dev, drawctxt);
2125 build_shader_save_cmds(adreno_dev, drawctxt);
2126 build_shader_restore_cmds(adreno_dev, drawctxt);
2127 build_restore_fixup_cmds(adreno_dev, drawctxt);
2128 build_save_fixup_cmds(adreno_dev, drawctxt);
2129
2130 return 0;
2131}
2132
2133/* create buffers for saving/restoring registers, constants, & GMEM */
2134static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2135 struct adreno_context *drawctxt)
2136{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002137 int result;
2138
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002139 calc_gmemsize(&drawctxt->context_gmem_shadow,
2140 adreno_dev->gmemspace.sizebytes);
2141 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2142
Jordan Crousea7ec4212012-02-04 10:23:52 -07002143 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2144 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002145
Jordan Crousea7ec4212012-02-04 10:23:52 -07002146 if (result)
2147 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002148
2149 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2150 &tmp_ctx.cmd);
2151
2152 /* Dow we need to idle? */
2153 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2154
2155 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2156 &drawctxt->context_gmem_shadow);
2157 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2158 &drawctxt->context_gmem_shadow);
2159
2160 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2161 KGSL_CACHE_OP_FLUSH);
2162
Jordan Crousea7ec4212012-02-04 10:23:52 -07002163 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2164
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002165 return 0;
2166}
2167
2168static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2169 struct adreno_context *drawctxt)
2170{
2171 int ret;
2172
2173 /*
2174 * Allocate memory for the GPU state and the context commands.
2175 * Despite the name, this is much more then just storage for
2176 * the gpustate. This contains command space for gmem save
2177 * and texture and vertex buffer storage too
2178 */
2179
2180 ret = kgsl_allocate(&drawctxt->gpustate,
2181 drawctxt->pagetable, CONTEXT_SIZE);
2182
2183 if (ret)
2184 return ret;
2185
2186 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2187 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2188
2189 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2190 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2191 if (ret)
2192 goto done;
2193
2194 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2195 }
2196
2197 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2198 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2199
2200done:
2201 if (ret)
2202 kgsl_sharedmem_free(&drawctxt->gpustate);
2203
2204 return ret;
2205}
2206
2207static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2208 struct adreno_context *context)
2209{
2210 struct kgsl_device *device = &adreno_dev->dev;
2211
2212 if (context == NULL)
2213 return;
2214
2215 if (context->flags & CTXT_FLAGS_GPU_HANG)
2216 KGSL_CTXT_WARN(device,
2217 "Current active context has caused gpu hang\n");
2218
2219 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2220 /* Fixup self modifying IBs for save operations */
2221 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2222 context->save_fixup, 3);
2223
2224 /* save registers and constants. */
2225 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2226 context->regconstant_save, 3);
2227
2228 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2229 /* Save shader instructions */
2230 adreno_ringbuffer_issuecmds(device,
2231 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2232
2233 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2234 }
2235 }
2236
2237 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2238 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2239 /*
2240 * Save GMEM (note: changes shader. shader must
2241 * already be saved.)
2242 */
2243
2244 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2245 context->context_gmem_shadow.
2246 gmem_save, 3);
2247 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2248 }
2249}
2250
2251static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2252 struct adreno_context *context)
2253{
2254 struct kgsl_device *device = &adreno_dev->dev;
2255 unsigned int cmds[5];
2256
2257 if (context == NULL) {
2258 /* No context - set the default pagetable and thats it */
2259 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2260 return;
2261 }
2262
2263 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2264
2265 cmds[0] = cp_nop_packet(1);
2266 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2267 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2268 cmds[3] = device->memstore.gpuaddr +
2269 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2270 cmds[4] = (unsigned int)context;
2271 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2272 kgsl_mmu_setstate(device, context->pagetable);
2273
2274 /*
2275 * Restore GMEM. (note: changes shader.
2276 * Shader must not already be restored.)
2277 */
2278
2279 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2280 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2281 context->context_gmem_shadow.
2282 gmem_restore, 3);
2283 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2284 }
2285
2286 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2287 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2288 context->reg_restore, 3);
2289
2290 /* Fixup self modifying IBs for restore operations */
2291 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2292 context->restore_fixup, 3);
2293
2294 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2295 context->constant_restore, 3);
2296
2297 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2298 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2299 context->shader_restore, 3);
2300
2301 /* Restore HLSQ_CONTROL_0 register */
2302 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2303 context->hlsqcontrol_restore, 3);
2304 }
2305}
2306
2307static void a3xx_rb_init(struct adreno_device *adreno_dev,
2308 struct adreno_ringbuffer *rb)
2309{
2310 unsigned int *cmds, cmds_gpu;
2311 cmds = adreno_ringbuffer_allocspace(rb, 18);
2312 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2313
2314 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2315 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 /* Protected mode control - turned off for A3XX */
2330 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333
2334 adreno_ringbuffer_submit(rb);
2335}
2336
2337static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2338{
2339 struct kgsl_device *device = &adreno_dev->dev;
2340 const char *err = "";
2341
2342 switch (bit) {
2343 case A3XX_INT_RBBM_AHB_ERROR: {
2344 unsigned int reg;
2345
2346 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2347
2348 /*
2349 * Return the word address of the erroring register so that it
2350 * matches the register specification
2351 */
2352
2353 KGSL_DRV_CRIT(device,
2354 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2355 reg & (1 << 28) ? "WRITE" : "READ",
2356 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2357 (reg >> 24) & 0x3);
2358
2359 /* Clear the error */
2360 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2361 return;
2362 }
2363 case A3XX_INT_RBBM_REG_TIMEOUT:
2364 err = "RBBM: AHB register timeout";
2365 break;
2366 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2367 err = "RBBM: ME master split timeout";
2368 break;
2369 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2370 err = "RBBM: PFP master split timeout";
2371 break;
2372 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2373 err = "RBBM: ATB bus oveflow";
2374 break;
2375 case A3XX_INT_VFD_ERROR:
2376 err = "VFD: Out of bounds access";
2377 break;
2378 case A3XX_INT_CP_T0_PACKET_IN_IB:
2379 err = "ringbuffer TO packet in IB interrupt";
2380 break;
2381 case A3XX_INT_CP_OPCODE_ERROR:
2382 err = "ringbuffer opcode error interrupt";
2383 break;
2384 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2385 err = "ringbuffer reserved bit error interrupt";
2386 break;
2387 case A3XX_INT_CP_HW_FAULT:
2388 err = "ringbuffer hardware fault";
2389 break;
2390 case A3XX_INT_CP_REG_PROTECT_FAULT:
2391 err = "ringbuffer protected mode error interrupt";
2392 break;
2393 case A3XX_INT_CP_AHB_ERROR_HALT:
2394 err = "ringbuffer AHB error interrupt";
2395 break;
2396 case A3XX_INT_MISC_HANG_DETECT:
2397 err = "MISC: GPU hang detected";
2398 break;
2399 case A3XX_INT_UCHE_OOB_ACCESS:
2400 err = "UCHE: Out of bounds access";
2401 break;
2402 }
2403
2404 KGSL_DRV_CRIT(device, "%s\n", err);
2405 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2406}
2407
2408static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2409{
2410 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2411
2412 if (irq == A3XX_INT_CP_RB_INT) {
2413 kgsl_sharedmem_writel(&rb->device->memstore,
2414 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2415 wmb();
2416 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2417 }
2418
2419 wake_up_interruptible_all(&rb->device->wait_queue);
2420
2421 /* Schedule work to free mem and issue ibs */
2422 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2423
2424 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2425 rb->device->id, NULL);
2426}
2427
2428#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2429
2430#define A3XX_INT_MASK \
2431 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2432 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002433 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002434 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2435 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2436 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2437 (1 << A3XX_INT_CP_HW_FAULT) | \
2438 (1 << A3XX_INT_CP_IB1_INT) | \
2439 (1 << A3XX_INT_CP_IB2_INT) | \
2440 (1 << A3XX_INT_CP_RB_INT) | \
2441 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2442 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002443 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2444
2445static struct {
2446 void (*func)(struct adreno_device *, int);
2447} a3xx_irq_funcs[] = {
2448 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2449 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2450 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2451 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2452 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2453 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2455 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2456 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2460 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2461 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2462 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2463 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2464 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2465 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2466 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2467 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2468 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2469 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2470 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2471 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002472 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002473 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2474 /* 26 to 31 - Unused */
2475};
2476
2477static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2478{
2479 struct kgsl_device *device = &adreno_dev->dev;
2480 irqreturn_t ret = IRQ_NONE;
2481 unsigned int status, tmp;
2482 int i;
2483
2484 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2485
2486 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2487 if (tmp & 1) {
2488 if (a3xx_irq_funcs[i].func != NULL) {
2489 a3xx_irq_funcs[i].func(adreno_dev, i);
2490 ret = IRQ_HANDLED;
2491 } else {
2492 KGSL_DRV_CRIT(device,
2493 "Unhandled interrupt bit %x\n", i);
2494 }
2495 }
2496
2497 tmp >>= 1;
2498 }
2499
2500 if (status)
2501 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2502 status);
2503 return ret;
2504}
2505
2506static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2507{
2508 struct kgsl_device *device = &adreno_dev->dev;
2509
2510 if (state)
2511 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2512 else
2513 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2514}
2515
2516static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2517{
2518 struct kgsl_device *device = &adreno_dev->dev;
2519 unsigned int reg, val;
2520
2521 /* Freeze the counter */
2522 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2523 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2524 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2525
2526 /* Read the value */
2527 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2528
2529 /* Reset the counter */
2530 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2531 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2532
2533 /* Re-enable the counter */
2534 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2535 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2536 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2537
2538 return val;
2539}
2540
2541static void a3xx_start(struct adreno_device *adreno_dev)
2542{
2543 struct kgsl_device *device = &adreno_dev->dev;
2544
Jordan Crousec6b3a992012-02-04 10:23:51 -07002545 /* GMEM size on A320 is 512K */
2546 adreno_dev->gmemspace.sizebytes = SZ_512K;
2547
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002548 /* Reset the core */
2549 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2550 0x00000001);
2551 msleep(20);
2552
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002553 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002554
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002555 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2556 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2557 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2558 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2559 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2560 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2561 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2562
2563 /* Enable WR-REQ */
2564 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2565
2566 /* Set up round robin arbitration between both AXI ports */
2567 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2568
2569 /* Set up AOOO */
2570 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2571 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2572
2573 /* Enable 1K sort */
2574 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2575 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002576
2577 /* Make all blocks contribute to the GPU BUSY perf counter */
2578 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2579
2580 /* Enable the RBBM error reporting bits. This lets us get
2581 useful information on failure */
2582
2583 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2584
2585 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002586 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002587
2588 /* Turn on the power counters */
2589 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002590
2591 /* Turn on hang detection - this spews a lot of useful information
2592 * into the RBBM registers on a hang */
2593
2594 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2595 (1 << 16) | 0xFFF);
2596
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002597}
2598
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002599/* Defined in adreno_a3xx_snapshot.c */
2600void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2601 int *remain, int hang);
2602
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002603struct adreno_gpudev adreno_a3xx_gpudev = {
2604 .reg_rbbm_status = A3XX_RBBM_STATUS,
2605 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2606 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2607
2608 .ctxt_create = a3xx_drawctxt_create,
2609 .ctxt_save = a3xx_drawctxt_save,
2610 .ctxt_restore = a3xx_drawctxt_restore,
2611 .rb_init = a3xx_rb_init,
2612 .irq_control = a3xx_irq_control,
2613 .irq_handler = a3xx_irq_handler,
2614 .busy_cycles = a3xx_busy_cycles,
2615 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002616 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002617};