blob: 5187eb161b39893bb03d033951e2c7639c683da3 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053015#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070016
17#include "kgsl.h"
18#include "adreno.h"
19#include "kgsl_sharedmem.h"
20#include "kgsl_cffdump.h"
21#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060022#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070023
Jordan Crouse0c2761a2012-02-01 22:11:12 -070024/*
25 * Set of registers to dump for A3XX on postmortem and snapshot.
26 * Registers in pairs - first value is the start offset, second
27 * is the stop offset (inclusive)
28 */
29
30const unsigned int a3xx_registers[] = {
31 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
32 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
33 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
34 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
35 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
36 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
37 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
38 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070039 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070040 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
41 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
42 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
43 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
44 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
45 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
46 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
47 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
48 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
49 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
50 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
51 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
52 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
53 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
54 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
55 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
56 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
57 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
58 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
59 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
60 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
61 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
62 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
63 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070064 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
65 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
66 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070067};
68
69const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
70
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070071/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
72 * functions.
73 */
74
75#define _SET(_shift, _val) ((_val) << (_shift))
76
77/*
78 ****************************************************************************
79 *
80 * Context state shadow structure:
81 *
82 * +---------------------+------------+-------------+---------------------+---+
83 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
84 * +---------------------+------------+-------------+---------------------+---+
85 *
86 * 8K - ALU Constant Shadow (8K aligned)
87 * 4K - H/W Register Shadow (8K aligned)
88 * 5K - Command and Vertex Buffers
89 * 8K - Shader Instruction Shadow
90 * ~6K - Texture Constant Shadow
91 *
92 *
93 ***************************************************************************
94 */
95
96/* Sizes of all sections in state shadow memory */
97#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
98#define REG_SHADOW_SIZE (4*1024) /* 4KB */
99#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
100#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
101#define TEX_SIZE_MIPMAP 1936 /* bytes */
102#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
103#define TEX_SHADOW_SIZE \
104 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
105 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
106#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
107
108/* Total context size, excluding GMEM shadow */
109#define CONTEXT_SIZE \
110 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
111 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
112 TEX_SHADOW_SIZE)
113
114/* Offsets to different sections in context shadow memory */
115#define REG_OFFSET ALU_SHADOW_SIZE
116#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
117#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
118#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
119#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
120#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122#define FS_TEX_OFFSET_MEM_OBJECTS \
123 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
124#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
125#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
126
127/* The offset for fragment shader data in HLSQ context */
128#define SSIZE (16*1024)
129
130#define HLSQ_SAMPLER_OFFSET 0x000
131#define HLSQ_MEMOBJ_OFFSET 0x400
132#define HLSQ_MIPMAP_OFFSET 0x800
133
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700134/* Use shadow RAM */
135#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700136
Jordan Croused0070882012-02-21 08:54:52 -0700137#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700138
139#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
140 vis_cull_mode) \
141 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
142 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
143 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
144 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
145 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
146 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
147
148/*
149 * List of context registers (starting from dword offset 0x2000).
150 * Each line contains start and end of a range of registers.
151 */
152static const unsigned int context_register_ranges[] = {
153 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
154 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
155 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
156 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
157 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
158 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
159 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
160 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
161 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
162 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
163 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
164 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
165 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
166 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
167 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
168 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
169 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
170 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
171 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
172 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
173 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
174 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
175 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
176 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
177 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
178 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
179 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
180 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
181 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
182 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
183 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
184};
185
186/* Global registers that need to be saved separately */
187static const unsigned int global_registers[] = {
188 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
189 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
190 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
191 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
192 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
193 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
194 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
195 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
196 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
197 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
198 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
199 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
200 A3XX_VSC_BIN_SIZE,
201 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
202 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
203 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
204 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
205 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
206 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
207 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
208 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
209 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
210 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
211 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
212 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
213 A3XX_VSC_SIZE_ADDRESS
214};
215
216#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
217
218/* A scratchpad used to build commands during context create */
219static struct tmp_ctx {
220 unsigned int *cmd; /* Next available dword in C&V buffer */
221
222 /* Addresses in comamnd buffer where registers are saved */
223 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
224 uint32_t gmem_base; /* Base GPU address of GMEM */
225} tmp_ctx;
226
227#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
228/*
229 * Function for executing dest = ( (reg & and) ROL rol ) | or
230 */
231static unsigned int *rmw_regtomem(unsigned int *cmd,
232 unsigned int reg, unsigned int and,
233 unsigned int rol, unsigned int or,
234 unsigned int dest)
235{
236 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
237 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
238 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
239 *cmd++ = 0x00000000; /* AND value */
240 *cmd++ = reg; /* OR address */
241
242 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
243 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
244 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
245 *cmd++ = and; /* AND value */
246 *cmd++ = or; /* OR value */
247
248 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
249 *cmd++ = A3XX_CP_SCRATCH_REG2;
250 *cmd++ = dest;
251
252 return cmd;
253}
254#endif
255
256static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
257 struct adreno_context *drawctxt)
258{
259 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700260 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700261 unsigned int i;
262
263 drawctxt->constant_save_commands[0].hostptr = cmd;
264 drawctxt->constant_save_commands[0].gpuaddr =
265 virt2gpu(cmd, &drawctxt->gpustate);
266 cmd++;
267
Jordan Crousea7ec4212012-02-04 10:23:52 -0700268 start = cmd;
269
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700270 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
271 *cmd++ = 0;
272
273#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
274 /*
275 * Context registers are already shadowed; just need to
276 * disable shadowing to prevent corruption.
277 */
278
279 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
280 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
281 *cmd++ = 4 << 16; /* regs, start=0 */
282 *cmd++ = 0x0; /* count = 0 */
283
284#else
285 /*
286 * Make sure the HW context has the correct register values before
287 * reading them.
288 */
289
290 /* Write context registers into shadow */
291 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
292 unsigned int start = context_register_ranges[i * 2];
293 unsigned int end = context_register_ranges[i * 2 + 1];
294 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
295 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
296 start;
297 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
298 & 0xFFFFE000) + (start - 0x2000) * 4;
299 }
300#endif
301
302 /* Need to handle some of the global registers separately */
303 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
304 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
305 *cmd++ = global_registers[i];
306 *cmd++ = tmp_ctx.reg_values[i];
307 }
308
309 /* Save vertex shader constants */
310 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
311 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = 0x0000FFFF;
314 *cmd++ = 3; /* EXEC_COUNT */
315 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
316 drawctxt->constant_save_commands[1].hostptr = cmd;
317 drawctxt->constant_save_commands[1].gpuaddr =
318 virt2gpu(cmd, &drawctxt->gpustate);
319 /*
320 From fixup:
321
322 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
323 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
324
325 From register spec:
326 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
327 */
328 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
329 /* ALU constant shadow base */
330 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
331
332 /* Save fragment shader constants */
333 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
334 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = 0x0000FFFF;
337 *cmd++ = 3; /* EXEC_COUNT */
338 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
339 drawctxt->constant_save_commands[2].hostptr = cmd;
340 drawctxt->constant_save_commands[2].gpuaddr =
341 virt2gpu(cmd, &drawctxt->gpustate);
342 /*
343 From fixup:
344
345 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
346 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
347
348 From register spec:
349 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
350 */
351 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
352
353 /*
354 From fixup:
355
356 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
357 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
358
359 From register spec:
360 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
361 start offset in on chip RAM,
362 128bit aligned
363
364 dst = base + offset
365 Because of the base alignment we can use
366 dst = base | offset
367 */
368 *cmd++ = 0; /* dst */
369
370 /* Save VS texture memory objects */
371 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
372 *cmd++ =
373 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
374 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
375 *cmd++ =
376 (drawctxt->gpustate.gpuaddr +
377 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
378
379 /* Save VS texture mipmap pointers */
380 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
381 *cmd++ =
382 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
383 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
384 *cmd++ =
385 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
386
387 /* Save VS texture sampler objects */
388 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
389 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
390 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
391 *cmd++ =
392 (drawctxt->gpustate.gpuaddr +
393 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
394
395 /* Save FS texture memory objects */
396 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
397 *cmd++ =
398 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
399 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
400 *cmd++ =
401 (drawctxt->gpustate.gpuaddr +
402 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
403
404 /* Save FS texture mipmap pointers */
405 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
406 *cmd++ =
407 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
408 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
409 *cmd++ =
410 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
411
412 /* Save FS texture sampler objects */
413 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
414 *cmd++ =
415 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
416 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
417 *cmd++ =
418 (drawctxt->gpustate.gpuaddr +
419 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
420
421 /* Create indirect buffer command for above command sequence */
422 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
423
424 tmp_ctx.cmd = cmd;
425}
426
427/* Copy GMEM contents to system memory shadow. */
428static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
429 struct adreno_context *drawctxt,
430 struct gmem_shadow_t *shadow)
431{
432 unsigned int *cmds = tmp_ctx.cmd;
433 unsigned int *start = cmds;
434
435 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
436 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
437
438 /* RB_MODE_CONTROL */
439 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
440 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
441 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
442 /* RB_RENDER_CONTROL */
443 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
444 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
445
446 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
447 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
448 /* RB_COPY_CONTROL */
449 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
450 RB_CLEAR_MODE_RESOLVE) |
451 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
452 tmp_ctx.gmem_base >> 14);
453 /* RB_COPY_DEST_BASE */
454 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
455 shadow->gmemshadow.gpuaddr >> 5);
456 /* RB_COPY_DEST_PITCH */
457 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
458 (shadow->pitch * 4) / 32);
459 /* RB_COPY_DEST_INFO */
460 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
461 RB_TILINGMODE_LINEAR) |
462 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
463 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
464 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
465
466 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
467 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
468 /* GRAS_SC_CONTROL */
469 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
470
471 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
472 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
473 /* VFD_CONTROL_0 */
474 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
475 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
476 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
477 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
478 /* VFD_CONTROL_1 */
479 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
480 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
481 _SET(VFD_CTRLREG1_REGID4INST, 252);
482
483 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
484 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
485 /* VFD_FETCH_INSTR_0_0 */
486 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
487 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
488 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
489 /* VFD_FETCH_INSTR_1_0 */
490 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
491 shadow->quad_vertices.gpuaddr);
492
493 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
494 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
495 /* VFD_DECODE_INSTR_0 */
496 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
497 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
498 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
499 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
500 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
501 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
502
503 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
504 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
505 /* HLSQ_CONTROL_0_REG */
506 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
507 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
508 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
509 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
510 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
511 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
512 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
513 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
514 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
515 /* HLSQ_CONTROL_1_REG */
516 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
517 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
518 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
519 /* HLSQ_CONTROL_2_REG */
520 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
521 /* HLSQ_CONTROL_3_REG */
522 *cmds++ = 0x00000000;
523
524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
525 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
526 /* HLSQ_VS_CONTROL_REG */
527 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
528 /* HLSQ_FS_CONTROL_REG */
529 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
530 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
531 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
532 /* HLSQ_CONST_VSPRESV_RANGE_REG */
533 *cmds++ = 0x00000000;
534 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
535 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
536 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
537
538 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
539 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
540 /* SP_FS_LENGTH_REG */
541 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
542
543 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
544 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
545 /* SP_SP_CTRL_REG */
546 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
547 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
548
549 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
550 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
551 /* SP_VS_CTRL_REG0 */
552 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
553 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
554 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
555 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
556 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
557 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
558 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
559 /* SP_VS_CTRL_REG1 */
560 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
561 /* SP_VS_PARAM_REG */
562 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
563 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
564 /* SP_VS_OUT_REG_0 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_1 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_2 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_3 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_4 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_5 */
575 *cmds++ = 0x00000000;
576 /* SP_VS_OUT_REG_6 */
577 *cmds++ = 0x00000000;
578 /* SP_VS_OUT_REG_7 */
579 *cmds++ = 0x00000000;
580
581 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
582 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
583 /* SP_VS_VPC_DST_REG_0 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_1 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_VPC_DST_REG_2 */
588 *cmds++ = 0x00000000;
589 /* SP_VS_VPC_DST_REG_3 */
590 *cmds++ = 0x00000000;
591 /* SP_VS_OBJ_OFFSET_REG */
592 *cmds++ = 0x00000000;
593 /* SP_VS_OBJ_START_REG */
594 *cmds++ = 0x00000000;
595
596 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
597 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
598 /* SP_VS_LENGTH_REG */
599 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
600 /* SP_FS_CTRL_REG0 */
601 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
602 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
603 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
604 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
605 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
606 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
607 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
608 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
609 /* SP_FS_CTRL_REG1 */
610 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
611 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
612 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
613 /* SP_FS_OBJ_OFFSET_REG */
614 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
615 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
616 /* SP_FS_OBJ_START_REG */
617 *cmds++ = 0x00000000;
618
619 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
620 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
621 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
622 *cmds++ = 0x00000000;
623 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
624 *cmds++ = 0x00000000;
625
626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
627 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
628 /* SP_FS_OUTPUT_REG */
629 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
630
631 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
632 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
633 /* SP_FS_MRT_REG_0 */
634 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
635 /* SP_FS_MRT_REG_1 */
636 *cmds++ = 0x00000000;
637 /* SP_FS_MRT_REG_2 */
638 *cmds++ = 0x00000000;
639 /* SP_FS_MRT_REG_3 */
640 *cmds++ = 0x00000000;
641
642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
643 *cmds++ = CP_REG(A3XX_VPC_ATTR);
644 /* VPC_ATTR */
645 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
646 _SET(VPC_VPCATTR_LMSIZE, 1);
647 /* VPC_PACK */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_0 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_1 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARRYING_INTERUPT_MODE_2 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARRYING_INTERUPT_MODE_3 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_0 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_1 */
660 *cmds++ = 0x00000000;
661 /* VPC_VARYING_PS_REPL_MODE_2 */
662 *cmds++ = 0x00000000;
663 /* VPC_VARYING_PS_REPL_MODE_3 */
664 *cmds++ = 0x00000000;
665
666 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
667 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
668 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
669 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
670 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
671 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
672 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
673
674 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
675 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
676 /* end; */
677 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
678 /* nop; */
679 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
680 /* nop; */
681 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
682
683 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
684 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
685 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
686 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
687 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
688 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
689 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
690
691 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
692 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
693 /* end; */
694 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
695 /* nop; */
696 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
697 /* nop; */
698 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
699
700 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
701 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
702 /* RB_MSAA_CONTROL */
703 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
704 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
705
706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
707 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
708 /* RB_DEPTH_CONTROL */
709 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
710
711 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
712 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
713 /* RB_MRT_CONTROL0 */
714 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
715 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
716 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
717 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
718
719 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
720 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
721 /* RB_MRT_BLEND_CONTROL0 */
722 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
723 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
724 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
725 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
726 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
727 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
728 /* RB_MRT_CONTROL1 */
729 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
730 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
731 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
732
733 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
734 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
735 /* RB_MRT_BLEND_CONTROL1 */
736 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
737 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
738 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
739 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
740 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
741 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
742 /* RB_MRT_CONTROL2 */
743 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
744 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
745 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
746
747 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
748 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
749 /* RB_MRT_BLEND_CONTROL2 */
750 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
751 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
752 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
753 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
755 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
756 /* RB_MRT_CONTROL3 */
757 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
758 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
759 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
760
761 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
762 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
763 /* RB_MRT_BLEND_CONTROL3 */
764 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
765 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
766 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
767 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
769 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
770
771 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
772 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
773 /* VFD_INDEX_MIN */
774 *cmds++ = 0x00000000;
775 /* VFD_INDEX_MAX */
776 *cmds++ = 0xFFFFFFFF;
777 /* VFD_INSTANCEID_OFFSET */
778 *cmds++ = 0x00000000;
779 /* VFD_INDEX_OFFSET */
780 *cmds++ = 0x00000000;
781
782 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
783 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
784 /* VFD_VS_THREADING_THRESHOLD */
785 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
786 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
787
788 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
789 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
790 /* TPL1_TP_VS_TEX_OFFSET */
791 *cmds++ = 0;
792
793 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
794 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
795 /* TPL1_TP_FS_TEX_OFFSET */
796 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
797 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
798 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
799
800 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
801 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
802 /* PC_PRIM_VTX_CNTL */
803 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
804 PC_DRAW_TRIANGLES) |
805 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
806 PC_DRAW_TRIANGLES) |
807 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
808
809 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
810 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
811 /* GRAS_SC_WINDOW_SCISSOR_TL */
812 *cmds++ = 0x00000000;
813 /* GRAS_SC_WINDOW_SCISSOR_BR */
814 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
815 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
816
817 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
818 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
819 /* GRAS_SC_SCREEN_SCISSOR_TL */
820 *cmds++ = 0x00000000;
821 /* GRAS_SC_SCREEN_SCISSOR_BR */
822 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
823 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
824
825 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
826 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
827 /* GRAS_CL_VPORT_XOFFSET */
828 *cmds++ = 0x00000000;
829 /* GRAS_CL_VPORT_XSCALE */
830 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
831 /* GRAS_CL_VPORT_YOFFSET */
832 *cmds++ = 0x00000000;
833 /* GRAS_CL_VPORT_YSCALE */
834 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
835
836 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
837 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
838 /* GRAS_CL_VPORT_ZOFFSET */
839 *cmds++ = 0x00000000;
840 /* GRAS_CL_VPORT_ZSCALE */
841 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
842
843 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
844 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
845 /* GRAS_CL_CLIP_CNTL */
846 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
850 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
851
852 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
853 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
854 /* GRAS_CL_GB_CLIP_ADJ */
855 *cmds++ = 0x00000000;
856
857 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
858 *cmds++ = 0x00000000;
859
860 /*
861 * Resolve using two draw calls with a dummy register
862 * write in between. This is a HLM workaround
863 * that should be removed later.
864 */
865 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
866 *cmds++ = 0x00000000; /* Viz query info */
867 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
868 PC_DI_SRC_SEL_IMMEDIATE,
869 PC_DI_INDEX_SIZE_32_BIT,
870 PC_DI_IGNORE_VISIBILITY);
871 *cmds++ = 0x00000003; /* Num indices */
872 *cmds++ = 0x00000000; /* Index 0 */
873 *cmds++ = 0x00000001; /* Index 1 */
874 *cmds++ = 0x00000002; /* Index 2 */
875
876 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
877 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
878 *cmds++ = 0x00000000;
879
880 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
881 *cmds++ = 0x00000000; /* Viz query info */
882 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
883 PC_DI_SRC_SEL_IMMEDIATE,
884 PC_DI_INDEX_SIZE_32_BIT,
885 PC_DI_IGNORE_VISIBILITY);
886 *cmds++ = 0x00000003; /* Num indices */
887 *cmds++ = 0x00000002; /* Index 0 */
888 *cmds++ = 0x00000001; /* Index 1 */
889 *cmds++ = 0x00000003; /* Index 2 */
890
891 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
892 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
893 *cmds++ = 0x00000000;
894
895 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
896 *cmds++ = 0x00000000;
897
898 /* Create indirect buffer command for above command sequence */
899 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
900
901 return cmds;
902}
903
904static void build_shader_save_cmds(struct adreno_device *adreno_dev,
905 struct adreno_context *drawctxt)
906{
907 unsigned int *cmd = tmp_ctx.cmd;
908 unsigned int *start;
909
910 /* Reserve space for boolean values used for COND_EXEC packet */
911 drawctxt->cond_execs[0].hostptr = cmd;
912 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
913 *cmd++ = 0;
914 drawctxt->cond_execs[1].hostptr = cmd;
915 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
916 *cmd++ = 0;
917
918 drawctxt->shader_save_commands[0].hostptr = cmd;
919 drawctxt->shader_save_commands[0].gpuaddr =
920 virt2gpu(cmd, &drawctxt->gpustate);
921 *cmd++ = 0;
922 drawctxt->shader_save_commands[1].hostptr = cmd;
923 drawctxt->shader_save_commands[1].gpuaddr =
924 virt2gpu(cmd, &drawctxt->gpustate);
925 *cmd++ = 0;
926
927 start = cmd;
928
929 /* Save vertex shader */
930
931 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
932 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
933 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
934 *cmd++ = 0x0000FFFF;
935 *cmd++ = 3; /* EXEC_COUNT */
936
937 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
938 drawctxt->shader_save_commands[2].hostptr = cmd;
939 drawctxt->shader_save_commands[2].gpuaddr =
940 virt2gpu(cmd, &drawctxt->gpustate);
941 /*
942 From fixup:
943
944 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
945
946 From regspec:
947 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
948 If bit31 is 1, it means overflow
949 or any long shader.
950
951 src = (HLSQ_SHADOW_BASE + 0x1000)/4
952 */
953 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
954 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
955
956 /* Save fragment shader */
957 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
958 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
959 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
960 *cmd++ = 0x0000FFFF;
961 *cmd++ = 3; /* EXEC_COUNT */
962
963 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
964 drawctxt->shader_save_commands[3].hostptr = cmd;
965 drawctxt->shader_save_commands[3].gpuaddr =
966 virt2gpu(cmd, &drawctxt->gpustate);
967 /*
968 From fixup:
969
970 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
971
972 From regspec:
973 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
974 If bit31 is 1, it means overflow
975 or any long shader.
976
977 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
978 From regspec:
979
980 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
981 First instruction of the whole shader will be stored from
982 the offset in instruction cache, unit = 256bits, a cache line.
983 It can start from 0 if no VS available.
984
985 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
986 */
987 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
988 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
989 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
990
991 /* Create indirect buffer command for above command sequence */
992 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
993
994 tmp_ctx.cmd = cmd;
995}
996
997/*
998 * Make an IB to modify context save IBs with the correct shader instruction
999 * and constant sizes and offsets.
1000 */
1001
1002static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1003 struct adreno_context *drawctxt)
1004{
1005 unsigned int *cmd = tmp_ctx.cmd;
1006 unsigned int *start = cmd;
1007
1008 /* Flush HLSQ lazy updates */
1009 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1010 *cmd++ = 0x7; /* HLSQ_FLUSH */
1011 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1012 *cmd++ = 0;
1013
1014 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1015 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1016 *cmd++ = (unsigned int)
1017 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1018 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1019 0; /* No end addr for full invalidate */
1020
1021 /* Make sure registers are flushed */
1022 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1023 *cmd++ = 0;
1024
1025#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1026
1027 /* Save shader sizes */
1028 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1029 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1030 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1031
1032 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1033 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1034 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1035
1036 /* Save shader offsets */
1037 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1038 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1039 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1040
1041 /* Save constant sizes */
1042 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1043 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1044 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1045 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1046 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1047 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1048
1049 /* Save FS constant offset */
1050 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1051 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1052 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1053
1054
1055 /* Save VS instruction store mode */
1056 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1057 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1058 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1059
1060 /* Save FS instruction store mode */
1061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1062 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1063 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1064#else
1065
1066 /* Shader save */
1067 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1068 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1069 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1070 drawctxt->shader_save_commands[2].gpuaddr);
1071
1072 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1073 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1074 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1075 *cmd++ = 0x00000000; /* AND value */
1076 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1077 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1078 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1079 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1080 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1081 A3XX_CP_SCRATCH_REG2;
1082 *cmd++ = 0x7f000000; /* AND value */
1083 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1084
1085 /*
1086 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1087 * SP_FS_OBJ_OFFSET_REG
1088 */
1089
1090 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1091 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1092 *cmd++ = 0x00000000; /* AND value */
1093 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1094 /*
1095 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1096 * 0x00000000
1097 */
1098 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1099 *cmd++ = A3XX_CP_SCRATCH_REG3;
1100 *cmd++ = 0xfe000000; /* AND value */
1101 *cmd++ = 0x00000000; /* OR value */
1102 /*
1103 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1104 */
1105 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1106 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1107 *cmd++ = 0xffffffff; /* AND value */
1108 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1109
1110 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1111 *cmd++ = A3XX_CP_SCRATCH_REG2;
1112 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1113
1114 /* Constant save */
1115 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001116 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1117 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001118 drawctxt->constant_save_commands[1].gpuaddr);
1119
1120 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001121 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1122 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001123 drawctxt->constant_save_commands[2].gpuaddr);
1124
1125 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1126 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1127 drawctxt->constant_save_commands[2].gpuaddr
1128 + sizeof(unsigned int));
1129
1130 /* Modify constant save conditionals */
1131 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1132 0, 0, drawctxt->cond_execs[2].gpuaddr);
1133
1134 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1135 0, 0, drawctxt->cond_execs[3].gpuaddr);
1136
1137 /* Save VS instruction store mode */
1138
1139 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1140 31, 0, drawctxt->cond_execs[0].gpuaddr);
1141
1142 /* Save FS instruction store mode */
1143 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1144 31, 0, drawctxt->cond_execs[1].gpuaddr);
1145
1146#endif
1147
1148 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1149
1150 tmp_ctx.cmd = cmd;
1151}
1152
1153/****************************************************************************/
1154/* Functions to build context restore IBs */
1155/****************************************************************************/
1156
1157static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1158 struct adreno_context *drawctxt,
1159 struct gmem_shadow_t *shadow)
1160{
1161 unsigned int *cmds = tmp_ctx.cmd;
1162 unsigned int *start = cmds;
1163
1164 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1165 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1166 /* HLSQ_CONTROL_0_REG */
1167 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1168 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1169 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1170 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1171 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1172 /* HLSQ_CONTROL_1_REG */
1173 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1174 /* HLSQ_CONTROL_2_REG */
1175 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1176 /* HLSQ_CONTROL3_REG */
1177 *cmds++ = 0x00000000;
1178
1179 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1180 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1181 /* RB_MRT_BUF_INFO0 */
1182 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1183 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1184 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1185 (shadow->gmem_pitch * 4 * 8) / 256);
1186 /* RB_MRT_BUF_BASE0 */
1187 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1188
1189 /* Texture samplers */
1190 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1191 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1192 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1193 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1194 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1195 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1196 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1197 *cmds++ = 0x00000240;
1198 *cmds++ = 0x00000000;
1199
1200 /* Texture memobjs */
1201 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1202 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1203 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1204 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1205 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1206 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1207 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1208 *cmds++ = 0x4cc06880;
1209 *cmds++ = shadow->height | (shadow->width << 14);
1210 *cmds++ = (shadow->pitch*4*8) << 9;
1211 *cmds++ = 0x00000000;
1212
1213 /* Mipmap bases */
1214 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1215 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1216 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1217 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1218 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1219 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1220 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1221 *cmds++ = shadow->gmemshadow.gpuaddr;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233 *cmds++ = 0x00000000;
1234 *cmds++ = 0x00000000;
1235
1236 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1237 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1238 /* HLSQ_VS_CONTROL_REG */
1239 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1240 /* HLSQ_FS_CONTROL_REG */
1241 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1242 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1243 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1244 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1245 *cmds++ = 0x00000000;
1246 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1247 *cmds++ = 0x00000000;
1248
1249 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1250 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1251 /* SP_FS_LENGTH_REG */
1252 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1253
1254 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1255 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1256 /* SP_VS_CTRL_REG0 */
1257 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1258 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1259 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1260 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1261 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1262 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1263 /* SP_VS_CTRL_REG1 */
1264 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1265 /* SP_VS_PARAM_REG */
1266 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1267 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1268 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1269 /* SP_VS_OUT_REG0 */
1270 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1271 /* SP_VS_OUT_REG1 */
1272 *cmds++ = 0x00000000;
1273 /* SP_VS_OUT_REG2 */
1274 *cmds++ = 0x00000000;
1275 /* SP_VS_OUT_REG3 */
1276 *cmds++ = 0x00000000;
1277 /* SP_VS_OUT_REG4 */
1278 *cmds++ = 0x00000000;
1279 /* SP_VS_OUT_REG5 */
1280 *cmds++ = 0x00000000;
1281 /* SP_VS_OUT_REG6 */
1282 *cmds++ = 0x00000000;
1283 /* SP_VS_OUT_REG7 */
1284 *cmds++ = 0x00000000;
1285
1286 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1287 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1288 /* SP_VS_VPC_DST_REG0 */
1289 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1290 /* SP_VS_VPC_DST_REG1 */
1291 *cmds++ = 0x00000000;
1292 /* SP_VS_VPC_DST_REG2 */
1293 *cmds++ = 0x00000000;
1294 /* SP_VS_VPC_DST_REG3 */
1295 *cmds++ = 0x00000000;
1296 /* SP_VS_OBJ_OFFSET_REG */
1297 *cmds++ = 0x00000000;
1298 /* SP_VS_OBJ_START_REG */
1299 *cmds++ = 0x00000000;
1300
1301 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1302 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1303 /* SP_VS_LENGTH_REG */
1304 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1305 /* SP_FS_CTRL_REG0 */
1306 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1307 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1308 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1309 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1310 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1311 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1312 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1313 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1314 /* SP_FS_CTRL_REG1 */
1315 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1316 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1317 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1318 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001319 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1320 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001321 /* SP_FS_OBJ_START_REG */
1322 *cmds++ = 0x00000000;
1323
1324 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1325 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1326 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1327 *cmds++ = 0x00000000;
1328 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1329 *cmds++ = 0x00000000;
1330
1331 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1332 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1333 /* SP_FS_OUT_REG */
1334 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1335
Jordan Crousea7ec4212012-02-04 10:23:52 -07001336 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001337 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1338 /* SP_FS_MRT_REG0 */
1339 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1340 /* SP_FS_MRT_REG1 */
1341 *cmds++ = 0;
1342 /* SP_FS_MRT_REG2 */
1343 *cmds++ = 0;
1344 /* SP_FS_MRT_REG3 */
1345 *cmds++ = 0;
1346
1347 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1348 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1349 /* VPC_ATTR */
1350 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1351 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1352 _SET(VPC_VPCATTR_LMSIZE, 1);
1353 /* VPC_PACK */
1354 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1355 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1356 /* VPC_VARYING_INTERP_MODE_0 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_INTERP_MODE1 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARYING_INTERP_MODE2 */
1361 *cmds++ = 0x00000000;
1362 /* VPC_VARYING_IINTERP_MODE3 */
1363 *cmds++ = 0x00000000;
1364 /* VPC_VARRYING_PS_REPL_MODE_0 */
1365 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1381 /* VPC_VARRYING_PS_REPL_MODE_1 */
1382 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1398 /* VPC_VARRYING_PS_REPL_MODE_2 */
1399 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1415 /* VPC_VARRYING_PS_REPL_MODE_3 */
1416 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1430 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1431 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1432
Jordan Crousea7ec4212012-02-04 10:23:52 -07001433 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001434 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1435 /* SP_SP_CTRL_REG */
1436 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1437
1438 /* Load vertex shader */
1439 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1440 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1441 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1442 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1443 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1444 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1445 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1446 /* (sy)end; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450 /* nop; */
1451 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1452 /* nop; */
1453 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1454
1455 /* Load fragment shader */
1456 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1457 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1458 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1459 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1460 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1461 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1462 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1463 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1464 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1465 /* (rpt5)nop; */
1466 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1467 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1468 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1469 /* (sy)mov.f32f32 r1.x, r0.x; */
1470 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1471 /* mov.f32f32 r1.y, r0.y; */
1472 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1473 /* mov.f32f32 r1.z, r0.z; */
1474 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1475 /* mov.f32f32 r1.w, r0.w; */
1476 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1477 /* end; */
1478 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1479
1480 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1481 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1482 /* VFD_CONTROL_0 */
1483 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1484 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1485 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1486 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1487 /* VFD_CONTROL_1 */
1488 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1489 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1490 _SET(VFD_CTRLREG1_REGID4INST, 252);
1491
1492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1493 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1494 /* VFD_FETCH_INSTR_0_0 */
1495 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1496 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1497 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1498 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1499 /* VFD_FETCH_INSTR_1_0 */
1500 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1501 shadow->quad_vertices_restore.gpuaddr);
1502 /* VFD_FETCH_INSTR_0_1 */
1503 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1504 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1505 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1506 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1507 /* VFD_FETCH_INSTR_1_1 */
1508 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1509 shadow->quad_vertices_restore.gpuaddr + 16);
1510
1511 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1512 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1513 /* VFD_DECODE_INSTR_0 */
1514 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1515 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1516 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1517 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1518 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1519 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1520 /* VFD_DECODE_INSTR_1 */
1521 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1522 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1523 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1524 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1525 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1526 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1527
1528 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1529 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1530 /* RB_DEPTH_CONTROL */
1531 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1532
1533 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1534 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1535 /* RB_STENCIL_CONTROL */
1536 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1537 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1540 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1541 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1542 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1543 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1544
1545 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1546 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1547 /* RB_MODE_CONTROL */
1548 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1549 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1550
1551 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1552 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1553 /* RB_RENDER_CONTROL */
1554 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1555 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1556
1557 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1558 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1559 /* RB_MSAA_CONTROL */
1560 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1561 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1562
1563 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1564 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1565 /* RB_MRT_CONTROL0 */
1566 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1567 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1568 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1569 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1570
1571 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1572 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1573 /* RB_MRT_BLENDCONTROL0 */
1574 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1575 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1576 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1577 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1578 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1579 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1580 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1581 /* RB_MRT_CONTROL1 */
1582 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1583 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1584 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1585
1586 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1587 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1588 /* RB_MRT_BLENDCONTROL1 */
1589 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1590 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1591 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1592 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1593 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1594 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1595 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1596 /* RB_MRT_CONTROL2 */
1597 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1598 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1599 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1600
1601 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1602 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1603 /* RB_MRT_BLENDCONTROL2 */
1604 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1605 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1606 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1607 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1608 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1609 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1610 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1611 /* RB_MRT_CONTROL3 */
1612 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1613 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1614 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1615
1616 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1617 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1618 /* RB_MRT_BLENDCONTROL3 */
1619 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1620 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1621 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1622 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1623 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1624 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1625 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1626
1627 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1628 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1629 /* VFD_INDEX_MIN */
1630 *cmds++ = 0x00000000;
1631 /* VFD_INDEX_MAX */
1632 *cmds++ = 0xFFFFFFFF;
1633 /* VFD_INDEX_OFFSET */
1634 *cmds++ = 0x00000000;
1635 /* TPL1_TP_VS_TEX_OFFSET */
1636 *cmds++ = 0x00000000;
1637
1638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1639 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1640 /* VFD_VS_THREADING_THRESHOLD */
1641 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1642 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1643
1644 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1645 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1646 /* TPL1_TP_VS_TEX_OFFSET */
1647 *cmds++ = 0x00000000;
1648
1649 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1650 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1651 /* TPL1_TP_FS_TEX_OFFSET */
1652 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1653 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1654 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1655
1656 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1657 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1658 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001659 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1660 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1661 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001662
1663 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1664 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1665 /* GRAS_SU_MODE_CONTROL */
1666 *cmds++ = 0x00000000;
1667
1668 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1669 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1670 /* GRAS_SC_WINDOW_SCISSOR_TL */
1671 *cmds++ = 0x00000000;
1672 /* GRAS_SC_WINDOW_SCISSOR_BR */
1673 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1674 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1675
1676 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1677 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1678 /* GRAS_SC_SCREEN_SCISSOR_TL */
1679 *cmds++ = 0x00000000;
1680 /* GRAS_SC_SCREEN_SCISSOR_BR */
1681 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1682 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1683
1684 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1685 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1686 /* GRAS_CL_VPORT_XOFFSET */
1687 *cmds++ = 0x00000000;
1688 /* GRAS_CL_VPORT_XSCALE */
1689 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1690 /* GRAS_CL_VPORT_YOFFSET */
1691 *cmds++ = 0x00000000;
1692 /* GRAS_CL_VPORT_YSCALE */
1693 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1694
1695 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1696 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1697 /* GRAS_CL_VPORT_ZOFFSET */
1698 *cmds++ = 0x00000000;
1699 /* GRAS_CL_VPORT_ZSCALE */
1700 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1701
1702 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1703 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1704 /* GRAS_CL_CLIP_CNTL */
1705 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1706
1707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1708 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1709 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1710 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1711
1712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1713 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1714 /* PC_PRIM_VTX_CONTROL */
1715 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1716 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1717 PC_DRAW_TRIANGLES) |
1718 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1719 PC_DRAW_TRIANGLES) |
1720 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1721
1722 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1723 *cmds++ = 0x00000000; /* Viz query info */
1724 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1725 PC_DI_SRC_SEL_AUTO_INDEX,
1726 PC_DI_INDEX_SIZE_16_BIT,
1727 PC_DI_IGNORE_VISIBILITY);
1728 *cmds++ = 0x00000002; /* Num indices */
1729
1730 /* Create indirect buffer command for above command sequence */
1731 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1732
1733 return cmds;
1734}
1735
1736static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1737 struct adreno_context *drawctxt)
1738{
1739 unsigned int *start = tmp_ctx.cmd;
1740 unsigned int *cmd = start;
1741 unsigned int *lcc_start;
1742
1743 int i;
1744
1745 /* Flush HLSQ lazy updates */
1746 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1747 *cmd++ = 0x7; /* HLSQ_FLUSH */
1748 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1749 *cmd++ = 0;
1750
1751 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1752 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1753 *cmd++ = (unsigned int)
1754 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1755 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1756 0; /* No end addr for full invalidate */
1757
1758 lcc_start = cmd;
1759
1760 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1761 cmd++;
1762
1763#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1764 /* Force mismatch */
1765 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1766#else
1767 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1768#endif
1769
1770 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1771 cmd = reg_range(cmd, context_register_ranges[i * 2],
1772 context_register_ranges[i * 2 + 1]);
1773 }
1774
1775 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1776 (cmd - lcc_start) - 1);
1777
1778#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1779 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1780#else
1781 lcc_start[2] |= (1 << 24) | (4 << 16);
1782#endif
1783
1784 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1785 *cmd++ = cp_type0_packet(global_registers[i], 1);
1786 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1787 *cmd++ = 0x00000000;
1788 }
1789
1790 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1791 tmp_ctx.cmd = cmd;
1792}
1793
1794static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1795 struct adreno_context *drawctxt)
1796{
1797 unsigned int *cmd = tmp_ctx.cmd;
1798 unsigned int *start = cmd;
1799 unsigned int mode = 4; /* Indirect mode */
1800 unsigned int stateblock;
1801 unsigned int numunits;
1802 unsigned int statetype;
1803
1804 drawctxt->cond_execs[2].hostptr = cmd;
1805 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1806 *cmd++ = 0;
1807 drawctxt->cond_execs[3].hostptr = cmd;
1808 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1809 *cmd++ = 0;
1810
1811#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1812 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1813 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1814 *cmd++ = 4 << 16;
1815 *cmd++ = 0x0;
1816#endif
1817 /* HLSQ full update */
1818 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1819 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1820 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1821
1822#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1823 /* Re-enable shadowing */
1824 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1825 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1826 *cmd++ = (4 << 16) | (1 << 24);
1827 *cmd++ = 0x0;
1828#endif
1829
1830 /* Load vertex shader constants */
1831 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1832 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1833 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1834 *cmd++ = 0x0000ffff;
1835 *cmd++ = 3; /* EXEC_COUNT */
1836 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1837 drawctxt->constant_load_commands[0].hostptr = cmd;
1838 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1839 &drawctxt->gpustate);
1840
1841 /*
1842 From fixup:
1843
1844 mode = 4 (indirect)
1845 stateblock = 4 (Vertex constants)
1846 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1847
1848 From register spec:
1849 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1850
1851 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1852 */
1853
1854 *cmd++ = 0; /* ord1 */
1855 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1856
1857 /* Load fragment shader constants */
1858 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1859 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1860 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1861 *cmd++ = 0x0000ffff;
1862 *cmd++ = 3; /* EXEC_COUNT */
1863 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1864 drawctxt->constant_load_commands[1].hostptr = cmd;
1865 drawctxt->constant_load_commands[1].gpuaddr =
1866 virt2gpu(cmd, &drawctxt->gpustate);
1867 /*
1868 From fixup:
1869
1870 mode = 4 (indirect)
1871 stateblock = 6 (Fragment constants)
1872 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1873
1874 From register spec:
1875 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1876
1877 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1878 */
1879
1880 *cmd++ = 0; /* ord1 */
1881 drawctxt->constant_load_commands[2].hostptr = cmd;
1882 drawctxt->constant_load_commands[2].gpuaddr =
1883 virt2gpu(cmd, &drawctxt->gpustate);
1884 /*
1885 From fixup:
1886 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1887 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1888
1889 From register spec:
1890 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1891 start offset in on chip RAM,
1892 128bit aligned
1893
1894 ord2 = base + offset | 1
1895 Because of the base alignment we can use
1896 ord2 = base | offset | 1
1897 */
1898 *cmd++ = 0; /* ord2 */
1899
1900 /* Restore VS texture memory objects */
1901 stateblock = 0;
1902 statetype = 1;
1903 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1904
1905 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1906 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1907 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1908 & 0xfffffffc) | statetype;
1909
1910 /* Restore VS texture mipmap addresses */
1911 stateblock = 1;
1912 statetype = 1;
1913 numunits = TEX_SIZE_MIPMAP / 4;
1914 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1915 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1916 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1917 & 0xfffffffc) | statetype;
1918
1919 /* Restore VS texture sampler objects */
1920 stateblock = 0;
1921 statetype = 0;
1922 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1923 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1924 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1925 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1926 & 0xfffffffc) | statetype;
1927
1928 /* Restore FS texture memory objects */
1929 stateblock = 2;
1930 statetype = 1;
1931 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1932 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1933 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1934 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1935 & 0xfffffffc) | statetype;
1936
1937 /* Restore FS texture mipmap addresses */
1938 stateblock = 3;
1939 statetype = 1;
1940 numunits = TEX_SIZE_MIPMAP / 4;
1941 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1942 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1943 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1944 & 0xfffffffc) | statetype;
1945
1946 /* Restore FS texture sampler objects */
1947 stateblock = 2;
1948 statetype = 0;
1949 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1950 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1951 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1952 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1953 & 0xfffffffc) | statetype;
1954
1955 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1956 tmp_ctx.cmd = cmd;
1957}
1958
1959static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1960 struct adreno_context *drawctxt)
1961{
1962 unsigned int *cmd = tmp_ctx.cmd;
1963 unsigned int *start = cmd;
1964
1965 /* Vertex shader */
1966 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1967 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1968 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1969 *cmd++ = 1;
1970 *cmd++ = 3; /* EXEC_COUNT */
1971
1972 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1973 drawctxt->shader_load_commands[0].hostptr = cmd;
1974 drawctxt->shader_load_commands[0].gpuaddr =
1975 virt2gpu(cmd, &drawctxt->gpustate);
1976 /*
1977 From fixup:
1978
1979 mode = 4 (indirect)
1980 stateblock = 4 (Vertex shader)
1981 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1982
1983 From regspec:
1984 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1985 If bit31 is 1, it means overflow
1986 or any long shader.
1987
1988 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1989 */
1990 *cmd++ = 0; /*ord1 */
1991 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1992
1993 /* Fragment shader */
1994 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1995 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1996 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1997 *cmd++ = 1;
1998 *cmd++ = 3; /* EXEC_COUNT */
1999
2000 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2001 drawctxt->shader_load_commands[1].hostptr = cmd;
2002 drawctxt->shader_load_commands[1].gpuaddr =
2003 virt2gpu(cmd, &drawctxt->gpustate);
2004 /*
2005 From fixup:
2006
2007 mode = 4 (indirect)
2008 stateblock = 6 (Fragment shader)
2009 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2010
2011 From regspec:
2012 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2013 If bit31 is 1, it means overflow
2014 or any long shader.
2015
2016 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2017 */
2018 *cmd++ = 0; /*ord1 */
2019 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2020 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2021
2022 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2023 tmp_ctx.cmd = cmd;
2024}
2025
2026static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2027 struct adreno_context *drawctxt)
2028{
2029 unsigned int *cmd = tmp_ctx.cmd;
2030 unsigned int *start = cmd;
2031
2032 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2033 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2034 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2035 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2036 = virt2gpu(cmd, &drawctxt->gpustate);
2037 *cmd++ = 0;
2038
2039 /* Create indirect buffer command for above command sequence */
2040 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2041
2042 tmp_ctx.cmd = cmd;
2043}
2044
2045/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2046static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2047 struct adreno_context *drawctxt)
2048{
2049 unsigned int *cmd = tmp_ctx.cmd;
2050 unsigned int *start = cmd;
2051
2052#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2053 /* Save shader sizes */
2054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2055 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2056 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2057
2058 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2059 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2060 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2061
2062 /* Save constant sizes */
2063 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2064 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2065 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2066
2067 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2068 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2069 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2070
2071 /* Save constant offsets */
2072 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2073 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2074 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2075#else
2076 /* Save shader sizes */
2077 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2078 30, (4 << 19) | (4 << 16),
2079 drawctxt->shader_load_commands[0].gpuaddr);
2080
2081 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2082 30, (6 << 19) | (4 << 16),
2083 drawctxt->shader_load_commands[1].gpuaddr);
2084
2085 /* Save constant sizes */
2086 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2087 23, (4 << 19) | (4 << 16),
2088 drawctxt->constant_load_commands[0].gpuaddr);
2089
2090 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2091 23, (6 << 19) | (4 << 16),
2092 drawctxt->constant_load_commands[1].gpuaddr);
2093
2094 /* Modify constant restore conditionals */
2095 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2096 0, 0, drawctxt->cond_execs[2].gpuaddr);
2097
2098 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2099 0, 0, drawctxt->cond_execs[3].gpuaddr);
2100
2101 /* Save fragment constant shadow offset */
2102 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2103 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2104 drawctxt->constant_load_commands[2].gpuaddr);
2105#endif
2106
2107 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2108 discard all the shader data */
2109
2110 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2111 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2112
2113 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2114
2115 tmp_ctx.cmd = cmd;
2116}
2117
2118static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2119 struct adreno_context *drawctxt)
2120{
2121 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2122
2123 build_regrestore_cmds(adreno_dev, drawctxt);
2124 build_constantrestore_cmds(adreno_dev, drawctxt);
2125 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2126 build_regconstantsave_cmds(adreno_dev, drawctxt);
2127 build_shader_save_cmds(adreno_dev, drawctxt);
2128 build_shader_restore_cmds(adreno_dev, drawctxt);
2129 build_restore_fixup_cmds(adreno_dev, drawctxt);
2130 build_save_fixup_cmds(adreno_dev, drawctxt);
2131
2132 return 0;
2133}
2134
2135/* create buffers for saving/restoring registers, constants, & GMEM */
2136static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2137 struct adreno_context *drawctxt)
2138{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002139 int result;
2140
Jordan Crouse7501d452012-04-19 08:58:44 -06002141 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2142 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002143
Jordan Crousea7ec4212012-02-04 10:23:52 -07002144 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2145 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002146
Jordan Crousea7ec4212012-02-04 10:23:52 -07002147 if (result)
2148 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002149
2150 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2151 &tmp_ctx.cmd);
2152
2153 /* Dow we need to idle? */
2154 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2155
2156 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2157 &drawctxt->context_gmem_shadow);
2158 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2159 &drawctxt->context_gmem_shadow);
2160
2161 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2162 KGSL_CACHE_OP_FLUSH);
2163
Jordan Crousea7ec4212012-02-04 10:23:52 -07002164 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2165
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002166 return 0;
2167}
2168
2169static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2170 struct adreno_context *drawctxt)
2171{
2172 int ret;
2173
2174 /*
2175 * Allocate memory for the GPU state and the context commands.
2176 * Despite the name, this is much more then just storage for
2177 * the gpustate. This contains command space for gmem save
2178 * and texture and vertex buffer storage too
2179 */
2180
2181 ret = kgsl_allocate(&drawctxt->gpustate,
2182 drawctxt->pagetable, CONTEXT_SIZE);
2183
2184 if (ret)
2185 return ret;
2186
2187 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2188 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2189
2190 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2191 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2192 if (ret)
2193 goto done;
2194
2195 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2196 }
2197
2198 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2199 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2200
2201done:
2202 if (ret)
2203 kgsl_sharedmem_free(&drawctxt->gpustate);
2204
2205 return ret;
2206}
2207
2208static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2209 struct adreno_context *context)
2210{
2211 struct kgsl_device *device = &adreno_dev->dev;
2212
2213 if (context == NULL)
2214 return;
2215
2216 if (context->flags & CTXT_FLAGS_GPU_HANG)
2217 KGSL_CTXT_WARN(device,
2218 "Current active context has caused gpu hang\n");
2219
2220 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2221 /* Fixup self modifying IBs for save operations */
2222 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2223 context->save_fixup, 3);
2224
2225 /* save registers and constants. */
2226 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2227 context->regconstant_save, 3);
2228
2229 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2230 /* Save shader instructions */
2231 adreno_ringbuffer_issuecmds(device,
2232 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2233
2234 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2235 }
2236 }
2237
2238 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2239 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2240 /*
2241 * Save GMEM (note: changes shader. shader must
2242 * already be saved.)
2243 */
2244
2245 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2246 context->context_gmem_shadow.
2247 gmem_save, 3);
2248 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2249 }
2250}
2251
2252static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2253 struct adreno_context *context)
2254{
2255 struct kgsl_device *device = &adreno_dev->dev;
2256 unsigned int cmds[5];
2257
2258 if (context == NULL) {
2259 /* No context - set the default pagetable and thats it */
Shubhraprakash Das79447952012-04-26 18:12:23 -06002260 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002261 return;
2262 }
2263
2264 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2265
2266 cmds[0] = cp_nop_packet(1);
2267 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2268 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2269 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002270 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2271 cmds[4] = context->id;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002272 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
Shubhraprakash Das79447952012-04-26 18:12:23 -06002273 kgsl_mmu_setstate(&device->mmu, context->pagetable);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002274
2275 /*
2276 * Restore GMEM. (note: changes shader.
2277 * Shader must not already be restored.)
2278 */
2279
2280 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2281 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2282 context->context_gmem_shadow.
2283 gmem_restore, 3);
2284 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2285 }
2286
2287 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2288 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2289 context->reg_restore, 3);
2290
2291 /* Fixup self modifying IBs for restore operations */
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->restore_fixup, 3);
2294
2295 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2296 context->constant_restore, 3);
2297
2298 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2299 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2300 context->shader_restore, 3);
2301
2302 /* Restore HLSQ_CONTROL_0 register */
2303 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2304 context->hlsqcontrol_restore, 3);
2305 }
2306}
2307
2308static void a3xx_rb_init(struct adreno_device *adreno_dev,
2309 struct adreno_ringbuffer *rb)
2310{
2311 unsigned int *cmds, cmds_gpu;
2312 cmds = adreno_ringbuffer_allocspace(rb, 18);
2313 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2314
2315 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 /* Protected mode control - turned off for A3XX */
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2334
2335 adreno_ringbuffer_submit(rb);
2336}
2337
2338static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2339{
2340 struct kgsl_device *device = &adreno_dev->dev;
2341 const char *err = "";
2342
2343 switch (bit) {
2344 case A3XX_INT_RBBM_AHB_ERROR: {
2345 unsigned int reg;
2346
2347 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2348
2349 /*
2350 * Return the word address of the erroring register so that it
2351 * matches the register specification
2352 */
2353
2354 KGSL_DRV_CRIT(device,
2355 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2356 reg & (1 << 28) ? "WRITE" : "READ",
2357 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2358 (reg >> 24) & 0x3);
2359
2360 /* Clear the error */
2361 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2362 return;
2363 }
2364 case A3XX_INT_RBBM_REG_TIMEOUT:
2365 err = "RBBM: AHB register timeout";
2366 break;
2367 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2368 err = "RBBM: ME master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2371 err = "RBBM: PFP master split timeout";
2372 break;
2373 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2374 err = "RBBM: ATB bus oveflow";
2375 break;
2376 case A3XX_INT_VFD_ERROR:
2377 err = "VFD: Out of bounds access";
2378 break;
2379 case A3XX_INT_CP_T0_PACKET_IN_IB:
2380 err = "ringbuffer TO packet in IB interrupt";
2381 break;
2382 case A3XX_INT_CP_OPCODE_ERROR:
2383 err = "ringbuffer opcode error interrupt";
2384 break;
2385 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2386 err = "ringbuffer reserved bit error interrupt";
2387 break;
2388 case A3XX_INT_CP_HW_FAULT:
2389 err = "ringbuffer hardware fault";
2390 break;
2391 case A3XX_INT_CP_REG_PROTECT_FAULT:
2392 err = "ringbuffer protected mode error interrupt";
2393 break;
2394 case A3XX_INT_CP_AHB_ERROR_HALT:
2395 err = "ringbuffer AHB error interrupt";
2396 break;
2397 case A3XX_INT_MISC_HANG_DETECT:
2398 err = "MISC: GPU hang detected";
2399 break;
2400 case A3XX_INT_UCHE_OOB_ACCESS:
2401 err = "UCHE: Out of bounds access";
2402 break;
2403 }
2404
2405 KGSL_DRV_CRIT(device, "%s\n", err);
2406 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2407}
2408
2409static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2410{
2411 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2412
2413 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002414 unsigned int context_id;
2415 kgsl_sharedmem_readl(&adreno_dev->dev.memstore,
2416 &context_id,
2417 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2418 current_context));
2419 if (context_id < KGSL_MEMSTORE_MAX) {
2420 kgsl_sharedmem_writel(&rb->device->memstore,
2421 KGSL_MEMSTORE_OFFSET(context_id,
2422 ts_cmp_enable), 0);
2423 wmb();
2424 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002425 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2426 }
2427
2428 wake_up_interruptible_all(&rb->device->wait_queue);
2429
2430 /* Schedule work to free mem and issue ibs */
2431 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2432
2433 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2434 rb->device->id, NULL);
2435}
2436
2437#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2438
2439#define A3XX_INT_MASK \
2440 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2441 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002442 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002443 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2444 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2445 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2446 (1 << A3XX_INT_CP_HW_FAULT) | \
2447 (1 << A3XX_INT_CP_IB1_INT) | \
2448 (1 << A3XX_INT_CP_IB2_INT) | \
2449 (1 << A3XX_INT_CP_RB_INT) | \
2450 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2451 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002452 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2453
2454static struct {
2455 void (*func)(struct adreno_device *, int);
2456} a3xx_irq_funcs[] = {
2457 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2460 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2461 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2462 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2464 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2465 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2466 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2467 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2468 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2469 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2470 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2471 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2472 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2473 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2474 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2475 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2476 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2477 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2478 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2479 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2480 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002481 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002482 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2483 /* 26 to 31 - Unused */
2484};
2485
2486static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2487{
2488 struct kgsl_device *device = &adreno_dev->dev;
2489 irqreturn_t ret = IRQ_NONE;
2490 unsigned int status, tmp;
2491 int i;
2492
2493 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2494
2495 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2496 if (tmp & 1) {
2497 if (a3xx_irq_funcs[i].func != NULL) {
2498 a3xx_irq_funcs[i].func(adreno_dev, i);
2499 ret = IRQ_HANDLED;
2500 } else {
2501 KGSL_DRV_CRIT(device,
2502 "Unhandled interrupt bit %x\n", i);
2503 }
2504 }
2505
2506 tmp >>= 1;
2507 }
2508
Carter Cooperb769c912012-04-13 08:16:35 -06002509 trace_kgsl_a3xx_irq_status(device, status);
2510
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002511 if (status)
2512 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2513 status);
2514 return ret;
2515}
2516
2517static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2518{
2519 struct kgsl_device *device = &adreno_dev->dev;
2520
2521 if (state)
2522 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2523 else
2524 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2525}
2526
2527static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2528{
2529 struct kgsl_device *device = &adreno_dev->dev;
2530 unsigned int reg, val;
2531
2532 /* Freeze the counter */
2533 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2534 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2535 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2536
2537 /* Read the value */
2538 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2539
2540 /* Reset the counter */
2541 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2542 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2543
2544 /* Re-enable the counter */
2545 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2546 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2547 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2548
2549 return val;
2550}
2551
2552static void a3xx_start(struct adreno_device *adreno_dev)
2553{
2554 struct kgsl_device *device = &adreno_dev->dev;
2555
2556 /* Reset the core */
2557 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2558 0x00000001);
2559 msleep(20);
2560
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002561 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002562
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002563 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2564 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2565 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2566 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2567 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2568 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2569 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2570
2571 /* Enable WR-REQ */
2572 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2573
2574 /* Set up round robin arbitration between both AXI ports */
2575 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2576
2577 /* Set up AOOO */
2578 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2579 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2580
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302581 if (cpu_is_apq8064()) {
2582 /* Enable 1K sort */
2583 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2584 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2585 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002586 /* Make all blocks contribute to the GPU BUSY perf counter */
2587 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2588
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002589 /* Tune the hystersis counters for SP and CP idle detection */
2590 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2591 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2592
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002593 /* Enable the RBBM error reporting bits. This lets us get
2594 useful information on failure */
2595
2596 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2597
2598 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002599 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002600
2601 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002602 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002603
2604 /* Turn on hang detection - this spews a lot of useful information
2605 * into the RBBM registers on a hang */
2606
2607 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2608 (1 << 16) | 0xFFF);
2609
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002610}
2611
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002612/* Defined in adreno_a3xx_snapshot.c */
2613void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2614 int *remain, int hang);
2615
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002616struct adreno_gpudev adreno_a3xx_gpudev = {
2617 .reg_rbbm_status = A3XX_RBBM_STATUS,
2618 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2619 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2620
2621 .ctxt_create = a3xx_drawctxt_create,
2622 .ctxt_save = a3xx_drawctxt_save,
2623 .ctxt_restore = a3xx_drawctxt_restore,
2624 .rb_init = a3xx_rb_init,
2625 .irq_control = a3xx_irq_control,
2626 .irq_handler = a3xx_irq_handler,
2627 .busy_cycles = a3xx_busy_cycles,
2628 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002629 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002630};