blob: 32603bdaabc12283d3bf6266bfdac349ad9837f2 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053015#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070016
17#include "kgsl.h"
18#include "adreno.h"
19#include "kgsl_sharedmem.h"
20#include "kgsl_cffdump.h"
21#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060022#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070023
Jordan Crouse0c2761a2012-02-01 22:11:12 -070024/*
25 * Set of registers to dump for A3XX on postmortem and snapshot.
26 * Registers in pairs - first value is the start offset, second
27 * is the stop offset (inclusive)
28 */
29
30const unsigned int a3xx_registers[] = {
31 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
32 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
33 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
34 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
35 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
36 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
37 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
38 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070039 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070040 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
41 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
42 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
43 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
44 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
45 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
46 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
47 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
48 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
49 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
50 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
51 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
52 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
53 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
54 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
55 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
56 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
57 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
58 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
59 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
60 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
61 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
62 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
63 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070064 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
65 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
66 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070067};
68
69const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
70
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070071/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
72 * functions.
73 */
74
75#define _SET(_shift, _val) ((_val) << (_shift))
76
77/*
78 ****************************************************************************
79 *
80 * Context state shadow structure:
81 *
82 * +---------------------+------------+-------------+---------------------+---+
83 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
84 * +---------------------+------------+-------------+---------------------+---+
85 *
86 * 8K - ALU Constant Shadow (8K aligned)
87 * 4K - H/W Register Shadow (8K aligned)
88 * 5K - Command and Vertex Buffers
89 * 8K - Shader Instruction Shadow
90 * ~6K - Texture Constant Shadow
91 *
92 *
93 ***************************************************************************
94 */
95
96/* Sizes of all sections in state shadow memory */
97#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
98#define REG_SHADOW_SIZE (4*1024) /* 4KB */
99#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
100#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
101#define TEX_SIZE_MIPMAP 1936 /* bytes */
102#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
103#define TEX_SHADOW_SIZE \
104 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
105 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
106#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
107
108/* Total context size, excluding GMEM shadow */
109#define CONTEXT_SIZE \
110 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
111 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
112 TEX_SHADOW_SIZE)
113
114/* Offsets to different sections in context shadow memory */
115#define REG_OFFSET ALU_SHADOW_SIZE
116#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
117#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
118#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
119#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
120#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
121#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
122#define FS_TEX_OFFSET_MEM_OBJECTS \
123 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
124#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
125#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
126
127/* The offset for fragment shader data in HLSQ context */
128#define SSIZE (16*1024)
129
130#define HLSQ_SAMPLER_OFFSET 0x000
131#define HLSQ_MEMOBJ_OFFSET 0x400
132#define HLSQ_MIPMAP_OFFSET 0x800
133
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700134/* Use shadow RAM */
135#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700136
Jordan Croused0070882012-02-21 08:54:52 -0700137#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700138
139#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
140 vis_cull_mode) \
141 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
142 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
143 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
144 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
145 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
146 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
147
148/*
149 * List of context registers (starting from dword offset 0x2000).
150 * Each line contains start and end of a range of registers.
151 */
152static const unsigned int context_register_ranges[] = {
153 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
154 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
155 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
156 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
157 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
158 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
159 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
160 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
161 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
162 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
163 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
164 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
165 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
166 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
167 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
168 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
169 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
170 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
171 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
172 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
173 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
174 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
175 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
176 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
177 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
178 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
179 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
180 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
181 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
182 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
183 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
184};
185
186/* Global registers that need to be saved separately */
187static const unsigned int global_registers[] = {
188 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
189 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
190 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
191 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
192 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
193 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
194 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
195 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
196 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
197 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
198 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
199 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
200 A3XX_VSC_BIN_SIZE,
201 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
202 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
203 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
204 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
205 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
206 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
207 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
208 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
209 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
210 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
211 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
212 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
213 A3XX_VSC_SIZE_ADDRESS
214};
215
216#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
217
218/* A scratchpad used to build commands during context create */
219static struct tmp_ctx {
220 unsigned int *cmd; /* Next available dword in C&V buffer */
221
222 /* Addresses in comamnd buffer where registers are saved */
223 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
224 uint32_t gmem_base; /* Base GPU address of GMEM */
225} tmp_ctx;
226
227#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
228/*
229 * Function for executing dest = ( (reg & and) ROL rol ) | or
230 */
231static unsigned int *rmw_regtomem(unsigned int *cmd,
232 unsigned int reg, unsigned int and,
233 unsigned int rol, unsigned int or,
234 unsigned int dest)
235{
236 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
237 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
238 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
239 *cmd++ = 0x00000000; /* AND value */
240 *cmd++ = reg; /* OR address */
241
242 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
243 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
244 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
245 *cmd++ = and; /* AND value */
246 *cmd++ = or; /* OR value */
247
248 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
249 *cmd++ = A3XX_CP_SCRATCH_REG2;
250 *cmd++ = dest;
251
252 return cmd;
253}
254#endif
255
256static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
257 struct adreno_context *drawctxt)
258{
259 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700260 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700261 unsigned int i;
262
263 drawctxt->constant_save_commands[0].hostptr = cmd;
264 drawctxt->constant_save_commands[0].gpuaddr =
265 virt2gpu(cmd, &drawctxt->gpustate);
266 cmd++;
267
Jordan Crousea7ec4212012-02-04 10:23:52 -0700268 start = cmd;
269
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700270 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
271 *cmd++ = 0;
272
273#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
274 /*
275 * Context registers are already shadowed; just need to
276 * disable shadowing to prevent corruption.
277 */
278
279 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
280 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
281 *cmd++ = 4 << 16; /* regs, start=0 */
282 *cmd++ = 0x0; /* count = 0 */
283
284#else
285 /*
286 * Make sure the HW context has the correct register values before
287 * reading them.
288 */
289
290 /* Write context registers into shadow */
291 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
292 unsigned int start = context_register_ranges[i * 2];
293 unsigned int end = context_register_ranges[i * 2 + 1];
294 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
295 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
296 start;
297 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
298 & 0xFFFFE000) + (start - 0x2000) * 4;
299 }
300#endif
301
302 /* Need to handle some of the global registers separately */
303 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
304 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
305 *cmd++ = global_registers[i];
306 *cmd++ = tmp_ctx.reg_values[i];
307 }
308
309 /* Save vertex shader constants */
310 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
311 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = 0x0000FFFF;
314 *cmd++ = 3; /* EXEC_COUNT */
315 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
316 drawctxt->constant_save_commands[1].hostptr = cmd;
317 drawctxt->constant_save_commands[1].gpuaddr =
318 virt2gpu(cmd, &drawctxt->gpustate);
319 /*
320 From fixup:
321
322 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
323 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
324
325 From register spec:
326 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
327 */
328 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
329 /* ALU constant shadow base */
330 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
331
332 /* Save fragment shader constants */
333 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
334 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = 0x0000FFFF;
337 *cmd++ = 3; /* EXEC_COUNT */
338 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
339 drawctxt->constant_save_commands[2].hostptr = cmd;
340 drawctxt->constant_save_commands[2].gpuaddr =
341 virt2gpu(cmd, &drawctxt->gpustate);
342 /*
343 From fixup:
344
345 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
346 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
347
348 From register spec:
349 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
350 */
351 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
352
353 /*
354 From fixup:
355
356 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
357 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
358
359 From register spec:
360 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
361 start offset in on chip RAM,
362 128bit aligned
363
364 dst = base + offset
365 Because of the base alignment we can use
366 dst = base | offset
367 */
368 *cmd++ = 0; /* dst */
369
370 /* Save VS texture memory objects */
371 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
372 *cmd++ =
373 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
374 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
375 *cmd++ =
376 (drawctxt->gpustate.gpuaddr +
377 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
378
379 /* Save VS texture mipmap pointers */
380 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
381 *cmd++ =
382 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
383 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
384 *cmd++ =
385 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
386
387 /* Save VS texture sampler objects */
388 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
389 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
390 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
391 *cmd++ =
392 (drawctxt->gpustate.gpuaddr +
393 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
394
395 /* Save FS texture memory objects */
396 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
397 *cmd++ =
398 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
399 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
400 *cmd++ =
401 (drawctxt->gpustate.gpuaddr +
402 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
403
404 /* Save FS texture mipmap pointers */
405 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
406 *cmd++ =
407 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
408 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
409 *cmd++ =
410 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
411
412 /* Save FS texture sampler objects */
413 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
414 *cmd++ =
415 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
416 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
417 *cmd++ =
418 (drawctxt->gpustate.gpuaddr +
419 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
420
421 /* Create indirect buffer command for above command sequence */
422 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
423
424 tmp_ctx.cmd = cmd;
425}
426
427/* Copy GMEM contents to system memory shadow. */
428static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
429 struct adreno_context *drawctxt,
430 struct gmem_shadow_t *shadow)
431{
432 unsigned int *cmds = tmp_ctx.cmd;
433 unsigned int *start = cmds;
434
435 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
436 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
437
438 /* RB_MODE_CONTROL */
439 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
440 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
441 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
442 /* RB_RENDER_CONTROL */
443 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
444 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
445
446 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
447 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
448 /* RB_COPY_CONTROL */
449 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
450 RB_CLEAR_MODE_RESOLVE) |
451 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
452 tmp_ctx.gmem_base >> 14);
453 /* RB_COPY_DEST_BASE */
454 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
455 shadow->gmemshadow.gpuaddr >> 5);
456 /* RB_COPY_DEST_PITCH */
457 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
458 (shadow->pitch * 4) / 32);
459 /* RB_COPY_DEST_INFO */
460 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
461 RB_TILINGMODE_LINEAR) |
462 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
463 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
464 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
465
466 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
467 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
468 /* GRAS_SC_CONTROL */
469 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
470
471 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
472 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
473 /* VFD_CONTROL_0 */
474 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
475 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
476 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
477 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
478 /* VFD_CONTROL_1 */
479 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
480 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
481 _SET(VFD_CTRLREG1_REGID4INST, 252);
482
483 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
484 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
485 /* VFD_FETCH_INSTR_0_0 */
486 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
487 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
488 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
489 /* VFD_FETCH_INSTR_1_0 */
490 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
491 shadow->quad_vertices.gpuaddr);
492
493 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
494 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
495 /* VFD_DECODE_INSTR_0 */
496 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
497 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
498 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
499 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
500 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
501 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
502
503 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
504 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
505 /* HLSQ_CONTROL_0_REG */
506 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
507 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
508 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
509 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
510 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
511 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
512 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
513 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
514 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
515 /* HLSQ_CONTROL_1_REG */
516 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
517 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
518 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
519 /* HLSQ_CONTROL_2_REG */
520 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
521 /* HLSQ_CONTROL_3_REG */
522 *cmds++ = 0x00000000;
523
524 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
525 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
526 /* HLSQ_VS_CONTROL_REG */
527 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
528 /* HLSQ_FS_CONTROL_REG */
529 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
530 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
531 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
532 /* HLSQ_CONST_VSPRESV_RANGE_REG */
533 *cmds++ = 0x00000000;
534 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
535 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
536 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
537
538 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
539 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
540 /* SP_FS_LENGTH_REG */
541 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
542
543 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
544 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
545 /* SP_SP_CTRL_REG */
546 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
547 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
548
549 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
550 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
551 /* SP_VS_CTRL_REG0 */
552 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
553 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
554 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
555 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
556 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
557 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
558 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
559 /* SP_VS_CTRL_REG1 */
560 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
561 /* SP_VS_PARAM_REG */
562 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
563 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
564 /* SP_VS_OUT_REG_0 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_1 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_2 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_3 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_4 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_5 */
575 *cmds++ = 0x00000000;
576 /* SP_VS_OUT_REG_6 */
577 *cmds++ = 0x00000000;
578 /* SP_VS_OUT_REG_7 */
579 *cmds++ = 0x00000000;
580
581 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
582 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
583 /* SP_VS_VPC_DST_REG_0 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_1 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_VPC_DST_REG_2 */
588 *cmds++ = 0x00000000;
589 /* SP_VS_VPC_DST_REG_3 */
590 *cmds++ = 0x00000000;
591 /* SP_VS_OBJ_OFFSET_REG */
592 *cmds++ = 0x00000000;
593 /* SP_VS_OBJ_START_REG */
594 *cmds++ = 0x00000000;
595
596 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
597 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
598 /* SP_VS_LENGTH_REG */
599 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
600 /* SP_FS_CTRL_REG0 */
601 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
602 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
603 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
604 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
605 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
606 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
607 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
608 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
609 /* SP_FS_CTRL_REG1 */
610 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
611 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
612 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
613 /* SP_FS_OBJ_OFFSET_REG */
614 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
615 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
616 /* SP_FS_OBJ_START_REG */
617 *cmds++ = 0x00000000;
618
619 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
620 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
621 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
622 *cmds++ = 0x00000000;
623 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
624 *cmds++ = 0x00000000;
625
626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
627 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
628 /* SP_FS_OUTPUT_REG */
629 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
630
631 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
632 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
633 /* SP_FS_MRT_REG_0 */
634 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
635 /* SP_FS_MRT_REG_1 */
636 *cmds++ = 0x00000000;
637 /* SP_FS_MRT_REG_2 */
638 *cmds++ = 0x00000000;
639 /* SP_FS_MRT_REG_3 */
640 *cmds++ = 0x00000000;
641
642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
643 *cmds++ = CP_REG(A3XX_VPC_ATTR);
644 /* VPC_ATTR */
645 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
646 _SET(VPC_VPCATTR_LMSIZE, 1);
647 /* VPC_PACK */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_0 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_1 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARRYING_INTERUPT_MODE_2 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARRYING_INTERUPT_MODE_3 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_0 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_1 */
660 *cmds++ = 0x00000000;
661 /* VPC_VARYING_PS_REPL_MODE_2 */
662 *cmds++ = 0x00000000;
663 /* VPC_VARYING_PS_REPL_MODE_3 */
664 *cmds++ = 0x00000000;
665
666 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
667 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
668 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
669 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
670 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
671 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
672 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
673
674 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
675 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
676 /* end; */
677 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
678 /* nop; */
679 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
680 /* nop; */
681 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
682
683 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
684 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
685 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
686 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
687 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
688 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
689 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
690
691 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
692 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
693 /* end; */
694 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
695 /* nop; */
696 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
697 /* nop; */
698 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
699
700 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
701 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
702 /* RB_MSAA_CONTROL */
703 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
704 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
705
706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
707 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
708 /* RB_DEPTH_CONTROL */
709 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
710
711 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
712 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
713 /* RB_MRT_CONTROL0 */
714 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
715 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
716 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
717 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
718
719 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
720 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
721 /* RB_MRT_BLEND_CONTROL0 */
722 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
723 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
724 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
725 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
726 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
727 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
728 /* RB_MRT_CONTROL1 */
729 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
730 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
731 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
732
733 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
734 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
735 /* RB_MRT_BLEND_CONTROL1 */
736 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
737 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
738 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
739 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
740 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
741 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
742 /* RB_MRT_CONTROL2 */
743 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
744 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
745 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
746
747 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
748 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
749 /* RB_MRT_BLEND_CONTROL2 */
750 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
751 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
752 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
753 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
755 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
756 /* RB_MRT_CONTROL3 */
757 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
758 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
759 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
760
761 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
762 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
763 /* RB_MRT_BLEND_CONTROL3 */
764 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
765 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
766 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
767 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
769 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
770
771 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
772 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
773 /* VFD_INDEX_MIN */
774 *cmds++ = 0x00000000;
775 /* VFD_INDEX_MAX */
776 *cmds++ = 0xFFFFFFFF;
777 /* VFD_INSTANCEID_OFFSET */
778 *cmds++ = 0x00000000;
779 /* VFD_INDEX_OFFSET */
780 *cmds++ = 0x00000000;
781
782 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
783 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
784 /* VFD_VS_THREADING_THRESHOLD */
785 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
786 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
787
788 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
789 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
790 /* TPL1_TP_VS_TEX_OFFSET */
791 *cmds++ = 0;
792
793 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
794 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
795 /* TPL1_TP_FS_TEX_OFFSET */
796 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
797 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
798 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
799
800 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
801 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
802 /* PC_PRIM_VTX_CNTL */
803 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
804 PC_DRAW_TRIANGLES) |
805 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
806 PC_DRAW_TRIANGLES) |
807 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
808
809 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
810 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
811 /* GRAS_SC_WINDOW_SCISSOR_TL */
812 *cmds++ = 0x00000000;
813 /* GRAS_SC_WINDOW_SCISSOR_BR */
814 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
815 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
816
817 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
818 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
819 /* GRAS_SC_SCREEN_SCISSOR_TL */
820 *cmds++ = 0x00000000;
821 /* GRAS_SC_SCREEN_SCISSOR_BR */
822 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
823 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
824
825 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
826 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
827 /* GRAS_CL_VPORT_XOFFSET */
828 *cmds++ = 0x00000000;
829 /* GRAS_CL_VPORT_XSCALE */
830 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
831 /* GRAS_CL_VPORT_YOFFSET */
832 *cmds++ = 0x00000000;
833 /* GRAS_CL_VPORT_YSCALE */
834 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
835
836 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
837 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
838 /* GRAS_CL_VPORT_ZOFFSET */
839 *cmds++ = 0x00000000;
840 /* GRAS_CL_VPORT_ZSCALE */
841 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
842
843 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
844 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
845 /* GRAS_CL_CLIP_CNTL */
846 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
850 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
851
852 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
853 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
854 /* GRAS_CL_GB_CLIP_ADJ */
855 *cmds++ = 0x00000000;
856
857 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
858 *cmds++ = 0x00000000;
859
860 /*
861 * Resolve using two draw calls with a dummy register
862 * write in between. This is a HLM workaround
863 * that should be removed later.
864 */
865 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
866 *cmds++ = 0x00000000; /* Viz query info */
867 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
868 PC_DI_SRC_SEL_IMMEDIATE,
869 PC_DI_INDEX_SIZE_32_BIT,
870 PC_DI_IGNORE_VISIBILITY);
871 *cmds++ = 0x00000003; /* Num indices */
872 *cmds++ = 0x00000000; /* Index 0 */
873 *cmds++ = 0x00000001; /* Index 1 */
874 *cmds++ = 0x00000002; /* Index 2 */
875
876 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
877 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
878 *cmds++ = 0x00000000;
879
880 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
881 *cmds++ = 0x00000000; /* Viz query info */
882 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
883 PC_DI_SRC_SEL_IMMEDIATE,
884 PC_DI_INDEX_SIZE_32_BIT,
885 PC_DI_IGNORE_VISIBILITY);
886 *cmds++ = 0x00000003; /* Num indices */
887 *cmds++ = 0x00000002; /* Index 0 */
888 *cmds++ = 0x00000001; /* Index 1 */
889 *cmds++ = 0x00000003; /* Index 2 */
890
891 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
892 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
893 *cmds++ = 0x00000000;
894
895 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
896 *cmds++ = 0x00000000;
897
898 /* Create indirect buffer command for above command sequence */
899 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
900
901 return cmds;
902}
903
904static void build_shader_save_cmds(struct adreno_device *adreno_dev,
905 struct adreno_context *drawctxt)
906{
907 unsigned int *cmd = tmp_ctx.cmd;
908 unsigned int *start;
909
910 /* Reserve space for boolean values used for COND_EXEC packet */
911 drawctxt->cond_execs[0].hostptr = cmd;
912 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
913 *cmd++ = 0;
914 drawctxt->cond_execs[1].hostptr = cmd;
915 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
916 *cmd++ = 0;
917
918 drawctxt->shader_save_commands[0].hostptr = cmd;
919 drawctxt->shader_save_commands[0].gpuaddr =
920 virt2gpu(cmd, &drawctxt->gpustate);
921 *cmd++ = 0;
922 drawctxt->shader_save_commands[1].hostptr = cmd;
923 drawctxt->shader_save_commands[1].gpuaddr =
924 virt2gpu(cmd, &drawctxt->gpustate);
925 *cmd++ = 0;
926
927 start = cmd;
928
929 /* Save vertex shader */
930
931 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
932 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
933 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
934 *cmd++ = 0x0000FFFF;
935 *cmd++ = 3; /* EXEC_COUNT */
936
937 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
938 drawctxt->shader_save_commands[2].hostptr = cmd;
939 drawctxt->shader_save_commands[2].gpuaddr =
940 virt2gpu(cmd, &drawctxt->gpustate);
941 /*
942 From fixup:
943
944 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
945
946 From regspec:
947 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
948 If bit31 is 1, it means overflow
949 or any long shader.
950
951 src = (HLSQ_SHADOW_BASE + 0x1000)/4
952 */
953 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
954 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
955
956 /* Save fragment shader */
957 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
958 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
959 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
960 *cmd++ = 0x0000FFFF;
961 *cmd++ = 3; /* EXEC_COUNT */
962
963 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
964 drawctxt->shader_save_commands[3].hostptr = cmd;
965 drawctxt->shader_save_commands[3].gpuaddr =
966 virt2gpu(cmd, &drawctxt->gpustate);
967 /*
968 From fixup:
969
970 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
971
972 From regspec:
973 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
974 If bit31 is 1, it means overflow
975 or any long shader.
976
977 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
978 From regspec:
979
980 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
981 First instruction of the whole shader will be stored from
982 the offset in instruction cache, unit = 256bits, a cache line.
983 It can start from 0 if no VS available.
984
985 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
986 */
987 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
988 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
989 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
990
991 /* Create indirect buffer command for above command sequence */
992 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
993
994 tmp_ctx.cmd = cmd;
995}
996
997/*
998 * Make an IB to modify context save IBs with the correct shader instruction
999 * and constant sizes and offsets.
1000 */
1001
1002static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1003 struct adreno_context *drawctxt)
1004{
1005 unsigned int *cmd = tmp_ctx.cmd;
1006 unsigned int *start = cmd;
1007
1008 /* Flush HLSQ lazy updates */
1009 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1010 *cmd++ = 0x7; /* HLSQ_FLUSH */
1011 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1012 *cmd++ = 0;
1013
1014 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1015 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1016 *cmd++ = (unsigned int)
1017 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1018 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1019 0; /* No end addr for full invalidate */
1020
1021 /* Make sure registers are flushed */
1022 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1023 *cmd++ = 0;
1024
1025#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1026
1027 /* Save shader sizes */
1028 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1029 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1030 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1031
1032 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1033 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1034 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1035
1036 /* Save shader offsets */
1037 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1038 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1039 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1040
1041 /* Save constant sizes */
1042 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1043 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1044 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1045 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1046 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1047 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1048
1049 /* Save FS constant offset */
1050 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1051 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1052 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1053
1054
1055 /* Save VS instruction store mode */
1056 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1057 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1058 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1059
1060 /* Save FS instruction store mode */
1061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1062 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1063 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1064#else
1065
1066 /* Shader save */
1067 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1068 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1069 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1070 drawctxt->shader_save_commands[2].gpuaddr);
1071
1072 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1073 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1074 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1075 *cmd++ = 0x00000000; /* AND value */
1076 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1077 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1078 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1079 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1080 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1081 A3XX_CP_SCRATCH_REG2;
1082 *cmd++ = 0x7f000000; /* AND value */
1083 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1084
1085 /*
1086 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1087 * SP_FS_OBJ_OFFSET_REG
1088 */
1089
1090 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1091 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1092 *cmd++ = 0x00000000; /* AND value */
1093 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1094 /*
1095 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1096 * 0x00000000
1097 */
1098 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1099 *cmd++ = A3XX_CP_SCRATCH_REG3;
1100 *cmd++ = 0xfe000000; /* AND value */
1101 *cmd++ = 0x00000000; /* OR value */
1102 /*
1103 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1104 */
1105 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1106 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1107 *cmd++ = 0xffffffff; /* AND value */
1108 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1109
1110 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1111 *cmd++ = A3XX_CP_SCRATCH_REG2;
1112 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1113
1114 /* Constant save */
1115 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001116 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1117 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001118 drawctxt->constant_save_commands[1].gpuaddr);
1119
1120 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001121 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1122 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001123 drawctxt->constant_save_commands[2].gpuaddr);
1124
1125 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1126 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1127 drawctxt->constant_save_commands[2].gpuaddr
1128 + sizeof(unsigned int));
1129
1130 /* Modify constant save conditionals */
1131 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1132 0, 0, drawctxt->cond_execs[2].gpuaddr);
1133
1134 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1135 0, 0, drawctxt->cond_execs[3].gpuaddr);
1136
1137 /* Save VS instruction store mode */
1138
1139 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1140 31, 0, drawctxt->cond_execs[0].gpuaddr);
1141
1142 /* Save FS instruction store mode */
1143 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1144 31, 0, drawctxt->cond_execs[1].gpuaddr);
1145
1146#endif
1147
1148 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1149
1150 tmp_ctx.cmd = cmd;
1151}
1152
1153/****************************************************************************/
1154/* Functions to build context restore IBs */
1155/****************************************************************************/
1156
1157static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1158 struct adreno_context *drawctxt,
1159 struct gmem_shadow_t *shadow)
1160{
1161 unsigned int *cmds = tmp_ctx.cmd;
1162 unsigned int *start = cmds;
1163
1164 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1165 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1166 /* HLSQ_CONTROL_0_REG */
1167 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1168 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1169 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1170 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1171 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1172 /* HLSQ_CONTROL_1_REG */
1173 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1174 /* HLSQ_CONTROL_2_REG */
1175 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1176 /* HLSQ_CONTROL3_REG */
1177 *cmds++ = 0x00000000;
1178
1179 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1180 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1181 /* RB_MRT_BUF_INFO0 */
1182 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1183 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1184 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1185 (shadow->gmem_pitch * 4 * 8) / 256);
1186 /* RB_MRT_BUF_BASE0 */
1187 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1188
1189 /* Texture samplers */
1190 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1191 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1192 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1193 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1194 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1195 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1196 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1197 *cmds++ = 0x00000240;
1198 *cmds++ = 0x00000000;
1199
1200 /* Texture memobjs */
1201 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1202 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1203 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1204 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1205 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1206 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1207 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1208 *cmds++ = 0x4cc06880;
1209 *cmds++ = shadow->height | (shadow->width << 14);
1210 *cmds++ = (shadow->pitch*4*8) << 9;
1211 *cmds++ = 0x00000000;
1212
1213 /* Mipmap bases */
1214 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1215 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1216 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1217 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1218 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1219 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1220 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1221 *cmds++ = shadow->gmemshadow.gpuaddr;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233 *cmds++ = 0x00000000;
1234 *cmds++ = 0x00000000;
1235
1236 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1237 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1238 /* HLSQ_VS_CONTROL_REG */
1239 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1240 /* HLSQ_FS_CONTROL_REG */
1241 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1242 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1243 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1244 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1245 *cmds++ = 0x00000000;
1246 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1247 *cmds++ = 0x00000000;
1248
1249 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1250 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1251 /* SP_FS_LENGTH_REG */
1252 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1253
1254 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1255 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1256 /* SP_VS_CTRL_REG0 */
1257 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1258 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1259 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1260 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1261 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1262 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1263 /* SP_VS_CTRL_REG1 */
1264 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1265 /* SP_VS_PARAM_REG */
1266 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1267 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1268 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1269 /* SP_VS_OUT_REG0 */
1270 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1271 /* SP_VS_OUT_REG1 */
1272 *cmds++ = 0x00000000;
1273 /* SP_VS_OUT_REG2 */
1274 *cmds++ = 0x00000000;
1275 /* SP_VS_OUT_REG3 */
1276 *cmds++ = 0x00000000;
1277 /* SP_VS_OUT_REG4 */
1278 *cmds++ = 0x00000000;
1279 /* SP_VS_OUT_REG5 */
1280 *cmds++ = 0x00000000;
1281 /* SP_VS_OUT_REG6 */
1282 *cmds++ = 0x00000000;
1283 /* SP_VS_OUT_REG7 */
1284 *cmds++ = 0x00000000;
1285
1286 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1287 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1288 /* SP_VS_VPC_DST_REG0 */
1289 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1290 /* SP_VS_VPC_DST_REG1 */
1291 *cmds++ = 0x00000000;
1292 /* SP_VS_VPC_DST_REG2 */
1293 *cmds++ = 0x00000000;
1294 /* SP_VS_VPC_DST_REG3 */
1295 *cmds++ = 0x00000000;
1296 /* SP_VS_OBJ_OFFSET_REG */
1297 *cmds++ = 0x00000000;
1298 /* SP_VS_OBJ_START_REG */
1299 *cmds++ = 0x00000000;
1300
1301 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1302 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1303 /* SP_VS_LENGTH_REG */
1304 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1305 /* SP_FS_CTRL_REG0 */
1306 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1307 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1308 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1309 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1310 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1311 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1312 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1313 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1314 /* SP_FS_CTRL_REG1 */
1315 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1316 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1317 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1318 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001319 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1320 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001321 /* SP_FS_OBJ_START_REG */
1322 *cmds++ = 0x00000000;
1323
1324 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1325 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1326 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1327 *cmds++ = 0x00000000;
1328 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1329 *cmds++ = 0x00000000;
1330
1331 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1332 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1333 /* SP_FS_OUT_REG */
1334 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1335
Jordan Crousea7ec4212012-02-04 10:23:52 -07001336 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001337 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1338 /* SP_FS_MRT_REG0 */
1339 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1340 /* SP_FS_MRT_REG1 */
1341 *cmds++ = 0;
1342 /* SP_FS_MRT_REG2 */
1343 *cmds++ = 0;
1344 /* SP_FS_MRT_REG3 */
1345 *cmds++ = 0;
1346
1347 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1348 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1349 /* VPC_ATTR */
1350 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1351 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1352 _SET(VPC_VPCATTR_LMSIZE, 1);
1353 /* VPC_PACK */
1354 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1355 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1356 /* VPC_VARYING_INTERP_MODE_0 */
1357 *cmds++ = 0x00000000;
1358 /* VPC_VARYING_INTERP_MODE1 */
1359 *cmds++ = 0x00000000;
1360 /* VPC_VARYING_INTERP_MODE2 */
1361 *cmds++ = 0x00000000;
1362 /* VPC_VARYING_IINTERP_MODE3 */
1363 *cmds++ = 0x00000000;
1364 /* VPC_VARRYING_PS_REPL_MODE_0 */
1365 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1381 /* VPC_VARRYING_PS_REPL_MODE_1 */
1382 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1398 /* VPC_VARRYING_PS_REPL_MODE_2 */
1399 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1415 /* VPC_VARRYING_PS_REPL_MODE_3 */
1416 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1430 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1431 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1432
Jordan Crousea7ec4212012-02-04 10:23:52 -07001433 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001434 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1435 /* SP_SP_CTRL_REG */
1436 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1437
1438 /* Load vertex shader */
1439 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1440 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1441 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1442 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1443 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1444 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1445 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1446 /* (sy)end; */
1447 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1448 /* nop; */
1449 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1450 /* nop; */
1451 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1452 /* nop; */
1453 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1454
1455 /* Load fragment shader */
1456 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1457 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1458 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1459 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1460 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1461 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1462 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1463 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1464 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1465 /* (rpt5)nop; */
1466 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1467 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1468 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1469 /* (sy)mov.f32f32 r1.x, r0.x; */
1470 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1471 /* mov.f32f32 r1.y, r0.y; */
1472 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1473 /* mov.f32f32 r1.z, r0.z; */
1474 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1475 /* mov.f32f32 r1.w, r0.w; */
1476 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1477 /* end; */
1478 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1479
1480 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1481 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1482 /* VFD_CONTROL_0 */
1483 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1484 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1485 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1486 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1487 /* VFD_CONTROL_1 */
1488 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1489 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1490 _SET(VFD_CTRLREG1_REGID4INST, 252);
1491
1492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1493 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1494 /* VFD_FETCH_INSTR_0_0 */
1495 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1496 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1497 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1498 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1499 /* VFD_FETCH_INSTR_1_0 */
1500 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1501 shadow->quad_vertices_restore.gpuaddr);
1502 /* VFD_FETCH_INSTR_0_1 */
1503 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1504 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1505 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1506 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1507 /* VFD_FETCH_INSTR_1_1 */
1508 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1509 shadow->quad_vertices_restore.gpuaddr + 16);
1510
1511 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1512 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1513 /* VFD_DECODE_INSTR_0 */
1514 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1515 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1516 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1517 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1518 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1519 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1520 /* VFD_DECODE_INSTR_1 */
1521 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1522 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1523 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1524 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1525 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1526 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1527
1528 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1529 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1530 /* RB_DEPTH_CONTROL */
1531 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1532
1533 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1534 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1535 /* RB_STENCIL_CONTROL */
1536 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1537 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1540 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1541 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1542 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1543 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1544
1545 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1546 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1547 /* RB_MODE_CONTROL */
1548 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1549 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1550
1551 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1552 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1553 /* RB_RENDER_CONTROL */
1554 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1555 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1556
1557 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1558 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1559 /* RB_MSAA_CONTROL */
1560 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1561 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1562
1563 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1564 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1565 /* RB_MRT_CONTROL0 */
1566 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1567 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1568 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1569 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1570
1571 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1572 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1573 /* RB_MRT_BLENDCONTROL0 */
1574 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1575 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1576 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1577 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1578 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1579 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1580 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1581 /* RB_MRT_CONTROL1 */
1582 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1583 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1584 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1585
1586 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1587 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1588 /* RB_MRT_BLENDCONTROL1 */
1589 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1590 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1591 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1592 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1593 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1594 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1595 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1596 /* RB_MRT_CONTROL2 */
1597 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1598 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1599 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1600
1601 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1602 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1603 /* RB_MRT_BLENDCONTROL2 */
1604 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1605 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1606 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1607 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1608 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1609 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1610 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1611 /* RB_MRT_CONTROL3 */
1612 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1613 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1614 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1615
1616 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1617 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1618 /* RB_MRT_BLENDCONTROL3 */
1619 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1620 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1621 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1622 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1623 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1624 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1625 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1626
1627 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1628 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1629 /* VFD_INDEX_MIN */
1630 *cmds++ = 0x00000000;
1631 /* VFD_INDEX_MAX */
1632 *cmds++ = 0xFFFFFFFF;
1633 /* VFD_INDEX_OFFSET */
1634 *cmds++ = 0x00000000;
1635 /* TPL1_TP_VS_TEX_OFFSET */
1636 *cmds++ = 0x00000000;
1637
1638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1639 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1640 /* VFD_VS_THREADING_THRESHOLD */
1641 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1642 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1643
1644 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1645 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1646 /* TPL1_TP_VS_TEX_OFFSET */
1647 *cmds++ = 0x00000000;
1648
1649 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1650 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1651 /* TPL1_TP_FS_TEX_OFFSET */
1652 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1653 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1654 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1655
1656 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1657 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1658 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001659 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1660 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1661 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001662
1663 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1664 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1665 /* GRAS_SU_MODE_CONTROL */
1666 *cmds++ = 0x00000000;
1667
1668 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1669 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1670 /* GRAS_SC_WINDOW_SCISSOR_TL */
1671 *cmds++ = 0x00000000;
1672 /* GRAS_SC_WINDOW_SCISSOR_BR */
1673 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1674 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1675
1676 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1677 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1678 /* GRAS_SC_SCREEN_SCISSOR_TL */
1679 *cmds++ = 0x00000000;
1680 /* GRAS_SC_SCREEN_SCISSOR_BR */
1681 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1682 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1683
1684 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1685 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1686 /* GRAS_CL_VPORT_XOFFSET */
1687 *cmds++ = 0x00000000;
1688 /* GRAS_CL_VPORT_XSCALE */
1689 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1690 /* GRAS_CL_VPORT_YOFFSET */
1691 *cmds++ = 0x00000000;
1692 /* GRAS_CL_VPORT_YSCALE */
1693 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1694
1695 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1696 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1697 /* GRAS_CL_VPORT_ZOFFSET */
1698 *cmds++ = 0x00000000;
1699 /* GRAS_CL_VPORT_ZSCALE */
1700 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1701
1702 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1703 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1704 /* GRAS_CL_CLIP_CNTL */
1705 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1706
1707 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1708 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1709 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1710 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1711
1712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1713 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1714 /* PC_PRIM_VTX_CONTROL */
1715 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1716 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1717 PC_DRAW_TRIANGLES) |
1718 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1719 PC_DRAW_TRIANGLES) |
1720 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1721
1722 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1723 *cmds++ = 0x00000000; /* Viz query info */
1724 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1725 PC_DI_SRC_SEL_AUTO_INDEX,
1726 PC_DI_INDEX_SIZE_16_BIT,
1727 PC_DI_IGNORE_VISIBILITY);
1728 *cmds++ = 0x00000002; /* Num indices */
1729
1730 /* Create indirect buffer command for above command sequence */
1731 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1732
1733 return cmds;
1734}
1735
1736static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1737 struct adreno_context *drawctxt)
1738{
1739 unsigned int *start = tmp_ctx.cmd;
1740 unsigned int *cmd = start;
1741 unsigned int *lcc_start;
1742
1743 int i;
1744
1745 /* Flush HLSQ lazy updates */
1746 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1747 *cmd++ = 0x7; /* HLSQ_FLUSH */
1748 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1749 *cmd++ = 0;
1750
1751 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1752 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1753 *cmd++ = (unsigned int)
1754 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1755 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1756 0; /* No end addr for full invalidate */
1757
1758 lcc_start = cmd;
1759
1760 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1761 cmd++;
1762
1763#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1764 /* Force mismatch */
1765 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1766#else
1767 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1768#endif
1769
1770 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1771 cmd = reg_range(cmd, context_register_ranges[i * 2],
1772 context_register_ranges[i * 2 + 1]);
1773 }
1774
1775 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1776 (cmd - lcc_start) - 1);
1777
1778#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1779 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1780#else
1781 lcc_start[2] |= (1 << 24) | (4 << 16);
1782#endif
1783
1784 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1785 *cmd++ = cp_type0_packet(global_registers[i], 1);
1786 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1787 *cmd++ = 0x00000000;
1788 }
1789
1790 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1791 tmp_ctx.cmd = cmd;
1792}
1793
1794static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1795 struct adreno_context *drawctxt)
1796{
1797 unsigned int *cmd = tmp_ctx.cmd;
1798 unsigned int *start = cmd;
1799 unsigned int mode = 4; /* Indirect mode */
1800 unsigned int stateblock;
1801 unsigned int numunits;
1802 unsigned int statetype;
1803
1804 drawctxt->cond_execs[2].hostptr = cmd;
1805 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1806 *cmd++ = 0;
1807 drawctxt->cond_execs[3].hostptr = cmd;
1808 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1809 *cmd++ = 0;
1810
1811#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1812 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1813 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1814 *cmd++ = 4 << 16;
1815 *cmd++ = 0x0;
1816#endif
1817 /* HLSQ full update */
1818 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1819 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1820 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1821
1822#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1823 /* Re-enable shadowing */
1824 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1825 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1826 *cmd++ = (4 << 16) | (1 << 24);
1827 *cmd++ = 0x0;
1828#endif
1829
1830 /* Load vertex shader constants */
1831 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1832 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1833 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1834 *cmd++ = 0x0000ffff;
1835 *cmd++ = 3; /* EXEC_COUNT */
1836 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1837 drawctxt->constant_load_commands[0].hostptr = cmd;
1838 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1839 &drawctxt->gpustate);
1840
1841 /*
1842 From fixup:
1843
1844 mode = 4 (indirect)
1845 stateblock = 4 (Vertex constants)
1846 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1847
1848 From register spec:
1849 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1850
1851 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1852 */
1853
1854 *cmd++ = 0; /* ord1 */
1855 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1856
1857 /* Load fragment shader constants */
1858 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1859 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1860 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1861 *cmd++ = 0x0000ffff;
1862 *cmd++ = 3; /* EXEC_COUNT */
1863 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1864 drawctxt->constant_load_commands[1].hostptr = cmd;
1865 drawctxt->constant_load_commands[1].gpuaddr =
1866 virt2gpu(cmd, &drawctxt->gpustate);
1867 /*
1868 From fixup:
1869
1870 mode = 4 (indirect)
1871 stateblock = 6 (Fragment constants)
1872 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1873
1874 From register spec:
1875 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1876
1877 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1878 */
1879
1880 *cmd++ = 0; /* ord1 */
1881 drawctxt->constant_load_commands[2].hostptr = cmd;
1882 drawctxt->constant_load_commands[2].gpuaddr =
1883 virt2gpu(cmd, &drawctxt->gpustate);
1884 /*
1885 From fixup:
1886 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1887 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1888
1889 From register spec:
1890 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1891 start offset in on chip RAM,
1892 128bit aligned
1893
1894 ord2 = base + offset | 1
1895 Because of the base alignment we can use
1896 ord2 = base | offset | 1
1897 */
1898 *cmd++ = 0; /* ord2 */
1899
1900 /* Restore VS texture memory objects */
1901 stateblock = 0;
1902 statetype = 1;
1903 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1904
1905 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1906 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1907 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1908 & 0xfffffffc) | statetype;
1909
1910 /* Restore VS texture mipmap addresses */
1911 stateblock = 1;
1912 statetype = 1;
1913 numunits = TEX_SIZE_MIPMAP / 4;
1914 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1915 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1916 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1917 & 0xfffffffc) | statetype;
1918
1919 /* Restore VS texture sampler objects */
1920 stateblock = 0;
1921 statetype = 0;
1922 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1923 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1924 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1925 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1926 & 0xfffffffc) | statetype;
1927
1928 /* Restore FS texture memory objects */
1929 stateblock = 2;
1930 statetype = 1;
1931 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1932 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1933 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1934 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1935 & 0xfffffffc) | statetype;
1936
1937 /* Restore FS texture mipmap addresses */
1938 stateblock = 3;
1939 statetype = 1;
1940 numunits = TEX_SIZE_MIPMAP / 4;
1941 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1942 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1943 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1944 & 0xfffffffc) | statetype;
1945
1946 /* Restore FS texture sampler objects */
1947 stateblock = 2;
1948 statetype = 0;
1949 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1950 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1951 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1952 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1953 & 0xfffffffc) | statetype;
1954
1955 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1956 tmp_ctx.cmd = cmd;
1957}
1958
1959static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1960 struct adreno_context *drawctxt)
1961{
1962 unsigned int *cmd = tmp_ctx.cmd;
1963 unsigned int *start = cmd;
1964
1965 /* Vertex shader */
1966 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1967 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1968 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1969 *cmd++ = 1;
1970 *cmd++ = 3; /* EXEC_COUNT */
1971
1972 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1973 drawctxt->shader_load_commands[0].hostptr = cmd;
1974 drawctxt->shader_load_commands[0].gpuaddr =
1975 virt2gpu(cmd, &drawctxt->gpustate);
1976 /*
1977 From fixup:
1978
1979 mode = 4 (indirect)
1980 stateblock = 4 (Vertex shader)
1981 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1982
1983 From regspec:
1984 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1985 If bit31 is 1, it means overflow
1986 or any long shader.
1987
1988 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1989 */
1990 *cmd++ = 0; /*ord1 */
1991 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1992
1993 /* Fragment shader */
1994 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1995 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1996 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1997 *cmd++ = 1;
1998 *cmd++ = 3; /* EXEC_COUNT */
1999
2000 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2001 drawctxt->shader_load_commands[1].hostptr = cmd;
2002 drawctxt->shader_load_commands[1].gpuaddr =
2003 virt2gpu(cmd, &drawctxt->gpustate);
2004 /*
2005 From fixup:
2006
2007 mode = 4 (indirect)
2008 stateblock = 6 (Fragment shader)
2009 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2010
2011 From regspec:
2012 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2013 If bit31 is 1, it means overflow
2014 or any long shader.
2015
2016 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2017 */
2018 *cmd++ = 0; /*ord1 */
2019 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2020 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2021
2022 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2023 tmp_ctx.cmd = cmd;
2024}
2025
2026static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2027 struct adreno_context *drawctxt)
2028{
2029 unsigned int *cmd = tmp_ctx.cmd;
2030 unsigned int *start = cmd;
2031
2032 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2033 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2034 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2035 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2036 = virt2gpu(cmd, &drawctxt->gpustate);
2037 *cmd++ = 0;
2038
2039 /* Create indirect buffer command for above command sequence */
2040 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2041
2042 tmp_ctx.cmd = cmd;
2043}
2044
2045/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2046static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2047 struct adreno_context *drawctxt)
2048{
2049 unsigned int *cmd = tmp_ctx.cmd;
2050 unsigned int *start = cmd;
2051
2052#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2053 /* Save shader sizes */
2054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2055 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2056 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2057
2058 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2059 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2060 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2061
2062 /* Save constant sizes */
2063 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2064 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2065 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2066
2067 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2068 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2069 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2070
2071 /* Save constant offsets */
2072 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2073 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2074 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2075#else
2076 /* Save shader sizes */
2077 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2078 30, (4 << 19) | (4 << 16),
2079 drawctxt->shader_load_commands[0].gpuaddr);
2080
2081 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2082 30, (6 << 19) | (4 << 16),
2083 drawctxt->shader_load_commands[1].gpuaddr);
2084
2085 /* Save constant sizes */
2086 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2087 23, (4 << 19) | (4 << 16),
2088 drawctxt->constant_load_commands[0].gpuaddr);
2089
2090 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2091 23, (6 << 19) | (4 << 16),
2092 drawctxt->constant_load_commands[1].gpuaddr);
2093
2094 /* Modify constant restore conditionals */
2095 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2096 0, 0, drawctxt->cond_execs[2].gpuaddr);
2097
2098 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2099 0, 0, drawctxt->cond_execs[3].gpuaddr);
2100
2101 /* Save fragment constant shadow offset */
2102 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2103 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2104 drawctxt->constant_load_commands[2].gpuaddr);
2105#endif
2106
2107 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2108 discard all the shader data */
2109
2110 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2111 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2112
2113 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2114
2115 tmp_ctx.cmd = cmd;
2116}
2117
2118static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2119 struct adreno_context *drawctxt)
2120{
2121 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2122
2123 build_regrestore_cmds(adreno_dev, drawctxt);
2124 build_constantrestore_cmds(adreno_dev, drawctxt);
2125 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2126 build_regconstantsave_cmds(adreno_dev, drawctxt);
2127 build_shader_save_cmds(adreno_dev, drawctxt);
2128 build_shader_restore_cmds(adreno_dev, drawctxt);
2129 build_restore_fixup_cmds(adreno_dev, drawctxt);
2130 build_save_fixup_cmds(adreno_dev, drawctxt);
2131
2132 return 0;
2133}
2134
2135/* create buffers for saving/restoring registers, constants, & GMEM */
2136static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2137 struct adreno_context *drawctxt)
2138{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002139 int result;
2140
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002141 calc_gmemsize(&drawctxt->context_gmem_shadow,
2142 adreno_dev->gmemspace.sizebytes);
2143 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2144
Jordan Crousea7ec4212012-02-04 10:23:52 -07002145 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2146 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002147
Jordan Crousea7ec4212012-02-04 10:23:52 -07002148 if (result)
2149 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002150
2151 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2152 &tmp_ctx.cmd);
2153
2154 /* Dow we need to idle? */
2155 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2156
2157 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2158 &drawctxt->context_gmem_shadow);
2159 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2160 &drawctxt->context_gmem_shadow);
2161
2162 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2163 KGSL_CACHE_OP_FLUSH);
2164
Jordan Crousea7ec4212012-02-04 10:23:52 -07002165 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2166
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002167 return 0;
2168}
2169
2170static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2171 struct adreno_context *drawctxt)
2172{
2173 int ret;
2174
2175 /*
2176 * Allocate memory for the GPU state and the context commands.
2177 * Despite the name, this is much more then just storage for
2178 * the gpustate. This contains command space for gmem save
2179 * and texture and vertex buffer storage too
2180 */
2181
2182 ret = kgsl_allocate(&drawctxt->gpustate,
2183 drawctxt->pagetable, CONTEXT_SIZE);
2184
2185 if (ret)
2186 return ret;
2187
2188 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2189 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2190
2191 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2192 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2193 if (ret)
2194 goto done;
2195
2196 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2197 }
2198
2199 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2200 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2201
2202done:
2203 if (ret)
2204 kgsl_sharedmem_free(&drawctxt->gpustate);
2205
2206 return ret;
2207}
2208
2209static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2210 struct adreno_context *context)
2211{
2212 struct kgsl_device *device = &adreno_dev->dev;
2213
2214 if (context == NULL)
2215 return;
2216
2217 if (context->flags & CTXT_FLAGS_GPU_HANG)
2218 KGSL_CTXT_WARN(device,
2219 "Current active context has caused gpu hang\n");
2220
2221 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2222 /* Fixup self modifying IBs for save operations */
2223 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2224 context->save_fixup, 3);
2225
2226 /* save registers and constants. */
2227 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2228 context->regconstant_save, 3);
2229
2230 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2231 /* Save shader instructions */
2232 adreno_ringbuffer_issuecmds(device,
2233 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2234
2235 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2236 }
2237 }
2238
2239 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2240 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2241 /*
2242 * Save GMEM (note: changes shader. shader must
2243 * already be saved.)
2244 */
2245
2246 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2247 context->context_gmem_shadow.
2248 gmem_save, 3);
2249 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2250 }
2251}
2252
2253static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2254 struct adreno_context *context)
2255{
2256 struct kgsl_device *device = &adreno_dev->dev;
2257 unsigned int cmds[5];
2258
2259 if (context == NULL) {
2260 /* No context - set the default pagetable and thats it */
2261 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2262 return;
2263 }
2264
2265 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2266
2267 cmds[0] = cp_nop_packet(1);
2268 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2269 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2270 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002271 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2272 cmds[4] = context->id;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002273 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2274 kgsl_mmu_setstate(device, context->pagetable);
2275
2276 /*
2277 * Restore GMEM. (note: changes shader.
2278 * Shader must not already be restored.)
2279 */
2280
2281 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2282 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2283 context->context_gmem_shadow.
2284 gmem_restore, 3);
2285 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2286 }
2287
2288 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2289 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2290 context->reg_restore, 3);
2291
2292 /* Fixup self modifying IBs for restore operations */
2293 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2294 context->restore_fixup, 3);
2295
2296 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2297 context->constant_restore, 3);
2298
2299 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2300 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2301 context->shader_restore, 3);
2302
2303 /* Restore HLSQ_CONTROL_0 register */
2304 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2305 context->hlsqcontrol_restore, 3);
2306 }
2307}
2308
2309static void a3xx_rb_init(struct adreno_device *adreno_dev,
2310 struct adreno_ringbuffer *rb)
2311{
2312 unsigned int *cmds, cmds_gpu;
2313 cmds = adreno_ringbuffer_allocspace(rb, 18);
2314 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2315
2316 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2331 /* Protected mode control - turned off for A3XX */
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2334 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2335
2336 adreno_ringbuffer_submit(rb);
2337}
2338
2339static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2340{
2341 struct kgsl_device *device = &adreno_dev->dev;
2342 const char *err = "";
2343
2344 switch (bit) {
2345 case A3XX_INT_RBBM_AHB_ERROR: {
2346 unsigned int reg;
2347
2348 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2349
2350 /*
2351 * Return the word address of the erroring register so that it
2352 * matches the register specification
2353 */
2354
2355 KGSL_DRV_CRIT(device,
2356 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2357 reg & (1 << 28) ? "WRITE" : "READ",
2358 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2359 (reg >> 24) & 0x3);
2360
2361 /* Clear the error */
2362 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2363 return;
2364 }
2365 case A3XX_INT_RBBM_REG_TIMEOUT:
2366 err = "RBBM: AHB register timeout";
2367 break;
2368 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2369 err = "RBBM: ME master split timeout";
2370 break;
2371 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2372 err = "RBBM: PFP master split timeout";
2373 break;
2374 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2375 err = "RBBM: ATB bus oveflow";
2376 break;
2377 case A3XX_INT_VFD_ERROR:
2378 err = "VFD: Out of bounds access";
2379 break;
2380 case A3XX_INT_CP_T0_PACKET_IN_IB:
2381 err = "ringbuffer TO packet in IB interrupt";
2382 break;
2383 case A3XX_INT_CP_OPCODE_ERROR:
2384 err = "ringbuffer opcode error interrupt";
2385 break;
2386 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2387 err = "ringbuffer reserved bit error interrupt";
2388 break;
2389 case A3XX_INT_CP_HW_FAULT:
2390 err = "ringbuffer hardware fault";
2391 break;
2392 case A3XX_INT_CP_REG_PROTECT_FAULT:
2393 err = "ringbuffer protected mode error interrupt";
2394 break;
2395 case A3XX_INT_CP_AHB_ERROR_HALT:
2396 err = "ringbuffer AHB error interrupt";
2397 break;
2398 case A3XX_INT_MISC_HANG_DETECT:
2399 err = "MISC: GPU hang detected";
2400 break;
2401 case A3XX_INT_UCHE_OOB_ACCESS:
2402 err = "UCHE: Out of bounds access";
2403 break;
2404 }
2405
2406 KGSL_DRV_CRIT(device, "%s\n", err);
2407 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2408}
2409
2410static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2411{
2412 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2413
2414 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002415 unsigned int context_id;
2416 kgsl_sharedmem_readl(&adreno_dev->dev.memstore,
2417 &context_id,
2418 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2419 current_context));
2420 if (context_id < KGSL_MEMSTORE_MAX) {
2421 kgsl_sharedmem_writel(&rb->device->memstore,
2422 KGSL_MEMSTORE_OFFSET(context_id,
2423 ts_cmp_enable), 0);
2424 wmb();
2425 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002426 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2427 }
2428
2429 wake_up_interruptible_all(&rb->device->wait_queue);
2430
2431 /* Schedule work to free mem and issue ibs */
2432 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2433
2434 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2435 rb->device->id, NULL);
2436}
2437
2438#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2439
2440#define A3XX_INT_MASK \
2441 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2442 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002443 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002444 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2445 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2446 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2447 (1 << A3XX_INT_CP_HW_FAULT) | \
2448 (1 << A3XX_INT_CP_IB1_INT) | \
2449 (1 << A3XX_INT_CP_IB2_INT) | \
2450 (1 << A3XX_INT_CP_RB_INT) | \
2451 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2452 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002453 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2454
2455static struct {
2456 void (*func)(struct adreno_device *, int);
2457} a3xx_irq_funcs[] = {
2458 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2460 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2461 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2462 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2464 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2465 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2466 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2467 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2468 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2469 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2470 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2471 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2472 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2473 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2474 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2475 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2476 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2477 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2478 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2479 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2480 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2481 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002482 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002483 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2484 /* 26 to 31 - Unused */
2485};
2486
2487static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2488{
2489 struct kgsl_device *device = &adreno_dev->dev;
2490 irqreturn_t ret = IRQ_NONE;
2491 unsigned int status, tmp;
2492 int i;
2493
2494 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2495
2496 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2497 if (tmp & 1) {
2498 if (a3xx_irq_funcs[i].func != NULL) {
2499 a3xx_irq_funcs[i].func(adreno_dev, i);
2500 ret = IRQ_HANDLED;
2501 } else {
2502 KGSL_DRV_CRIT(device,
2503 "Unhandled interrupt bit %x\n", i);
2504 }
2505 }
2506
2507 tmp >>= 1;
2508 }
2509
Carter Cooperb769c912012-04-13 08:16:35 -06002510 trace_kgsl_a3xx_irq_status(device, status);
2511
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002512 if (status)
2513 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2514 status);
2515 return ret;
2516}
2517
2518static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2519{
2520 struct kgsl_device *device = &adreno_dev->dev;
2521
2522 if (state)
2523 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2524 else
2525 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2526}
2527
2528static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2529{
2530 struct kgsl_device *device = &adreno_dev->dev;
2531 unsigned int reg, val;
2532
2533 /* Freeze the counter */
2534 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2535 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2536 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2537
2538 /* Read the value */
2539 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2540
2541 /* Reset the counter */
2542 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2543 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2544
2545 /* Re-enable the counter */
2546 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2547 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2548 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2549
2550 return val;
2551}
2552
2553static void a3xx_start(struct adreno_device *adreno_dev)
2554{
2555 struct kgsl_device *device = &adreno_dev->dev;
2556
2557 /* Reset the core */
2558 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2559 0x00000001);
2560 msleep(20);
2561
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002562 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002563
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002564 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2565 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2566 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2567 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2568 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2569 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2570 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2571
2572 /* Enable WR-REQ */
2573 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2574
2575 /* Set up round robin arbitration between both AXI ports */
2576 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2577
2578 /* Set up AOOO */
2579 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2580 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2581
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302582 if (cpu_is_apq8064()) {
2583 /* Enable 1K sort */
2584 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2585 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2586 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002587 /* Make all blocks contribute to the GPU BUSY perf counter */
2588 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2589
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002590 /* Tune the hystersis counters for SP and CP idle detection */
2591 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2592 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2593
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002594 /* Enable the RBBM error reporting bits. This lets us get
2595 useful information on failure */
2596
2597 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2598
2599 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002600 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002601
2602 /* Turn on the power counters */
2603 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002604
2605 /* Turn on hang detection - this spews a lot of useful information
2606 * into the RBBM registers on a hang */
2607
2608 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2609 (1 << 16) | 0xFFF);
2610
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002611}
2612
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002613/* Defined in adreno_a3xx_snapshot.c */
2614void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2615 int *remain, int hang);
2616
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002617struct adreno_gpudev adreno_a3xx_gpudev = {
2618 .reg_rbbm_status = A3XX_RBBM_STATUS,
2619 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2620 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2621
2622 .ctxt_create = a3xx_drawctxt_create,
2623 .ctxt_save = a3xx_drawctxt_save,
2624 .ctxt_restore = a3xx_drawctxt_restore,
2625 .rb_init = a3xx_rb_init,
2626 .irq_control = a3xx_irq_control,
2627 .irq_handler = a3xx_irq_handler,
2628 .busy_cycles = a3xx_busy_cycles,
2629 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002630 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002631};