blob: c01e676bf211e9659715f28903e5009a99f2b5f3 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053015#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070016
17#include "kgsl.h"
18#include "adreno.h"
19#include "kgsl_sharedmem.h"
20#include "kgsl_cffdump.h"
21#include "a3xx_reg.h"
22
Jordan Crouse0c2761a2012-02-01 22:11:12 -070023/*
24 * Set of registers to dump for A3XX on postmortem and snapshot.
25 * Registers in pairs - first value is the start offset, second
26 * is the stop offset (inclusive)
27 */
28
29const unsigned int a3xx_registers[] = {
30 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
31 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
32 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
33 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
34 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
35 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
36 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
37 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070038 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070039 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
40 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
41 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
42 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
43 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
44 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
45 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
46 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
47 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
48 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
49 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
50 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
51 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
52 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
53 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
54 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
55 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
56 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
57 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
58 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
59 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
60 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
61 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
62 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070063 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
64 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
65 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070066};
67
68const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
69
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070070/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
71 * functions.
72 */
73
74#define _SET(_shift, _val) ((_val) << (_shift))
75
76/*
77 ****************************************************************************
78 *
79 * Context state shadow structure:
80 *
81 * +---------------------+------------+-------------+---------------------+---+
82 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
83 * +---------------------+------------+-------------+---------------------+---+
84 *
85 * 8K - ALU Constant Shadow (8K aligned)
86 * 4K - H/W Register Shadow (8K aligned)
87 * 5K - Command and Vertex Buffers
88 * 8K - Shader Instruction Shadow
89 * ~6K - Texture Constant Shadow
90 *
91 *
92 ***************************************************************************
93 */
94
95/* Sizes of all sections in state shadow memory */
96#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
97#define REG_SHADOW_SIZE (4*1024) /* 4KB */
98#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
99#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
100#define TEX_SIZE_MIPMAP 1936 /* bytes */
101#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
102#define TEX_SHADOW_SIZE \
103 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
104 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
105#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
106
107/* Total context size, excluding GMEM shadow */
108#define CONTEXT_SIZE \
109 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
110 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
111 TEX_SHADOW_SIZE)
112
113/* Offsets to different sections in context shadow memory */
114#define REG_OFFSET ALU_SHADOW_SIZE
115#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
116#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
117#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
118#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
119#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
120#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
121#define FS_TEX_OFFSET_MEM_OBJECTS \
122 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
123#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
124#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
125
126/* The offset for fragment shader data in HLSQ context */
127#define SSIZE (16*1024)
128
129#define HLSQ_SAMPLER_OFFSET 0x000
130#define HLSQ_MEMOBJ_OFFSET 0x400
131#define HLSQ_MIPMAP_OFFSET 0x800
132
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700133/* Use shadow RAM */
134#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700135
Jordan Croused0070882012-02-21 08:54:52 -0700136#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700137
138#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
139 vis_cull_mode) \
140 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
141 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
142 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
143 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
144 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
145 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
146
147/*
148 * List of context registers (starting from dword offset 0x2000).
149 * Each line contains start and end of a range of registers.
150 */
151static const unsigned int context_register_ranges[] = {
152 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
153 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
154 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
155 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
156 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
157 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
158 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
159 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
160 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
161 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
162 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
163 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
164 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
165 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
166 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
167 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
168 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
169 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
170 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
171 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
172 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
173 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
174 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
175 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
176 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
177 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
178 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
179 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
180 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
181 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
182 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
183};
184
185/* Global registers that need to be saved separately */
186static const unsigned int global_registers[] = {
187 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
188 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
189 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
190 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
191 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
192 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
193 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
194 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
195 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
196 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
197 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
198 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
199 A3XX_VSC_BIN_SIZE,
200 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
201 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
202 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
203 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
204 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
205 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
206 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
207 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
208 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
209 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
210 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
211 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
212 A3XX_VSC_SIZE_ADDRESS
213};
214
215#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
216
217/* A scratchpad used to build commands during context create */
218static struct tmp_ctx {
219 unsigned int *cmd; /* Next available dword in C&V buffer */
220
221 /* Addresses in comamnd buffer where registers are saved */
222 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
223 uint32_t gmem_base; /* Base GPU address of GMEM */
224} tmp_ctx;
225
226#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
227/*
228 * Function for executing dest = ( (reg & and) ROL rol ) | or
229 */
230static unsigned int *rmw_regtomem(unsigned int *cmd,
231 unsigned int reg, unsigned int and,
232 unsigned int rol, unsigned int or,
233 unsigned int dest)
234{
235 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
236 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
237 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
238 *cmd++ = 0x00000000; /* AND value */
239 *cmd++ = reg; /* OR address */
240
241 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
242 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
243 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
244 *cmd++ = and; /* AND value */
245 *cmd++ = or; /* OR value */
246
247 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
248 *cmd++ = A3XX_CP_SCRATCH_REG2;
249 *cmd++ = dest;
250
251 return cmd;
252}
253#endif
254
255static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
256 struct adreno_context *drawctxt)
257{
258 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700259 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700260 unsigned int i;
261
262 drawctxt->constant_save_commands[0].hostptr = cmd;
263 drawctxt->constant_save_commands[0].gpuaddr =
264 virt2gpu(cmd, &drawctxt->gpustate);
265 cmd++;
266
Jordan Crousea7ec4212012-02-04 10:23:52 -0700267 start = cmd;
268
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700269 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
270 *cmd++ = 0;
271
272#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
273 /*
274 * Context registers are already shadowed; just need to
275 * disable shadowing to prevent corruption.
276 */
277
278 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
279 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
280 *cmd++ = 4 << 16; /* regs, start=0 */
281 *cmd++ = 0x0; /* count = 0 */
282
283#else
284 /*
285 * Make sure the HW context has the correct register values before
286 * reading them.
287 */
288
289 /* Write context registers into shadow */
290 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
291 unsigned int start = context_register_ranges[i * 2];
292 unsigned int end = context_register_ranges[i * 2 + 1];
293 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
294 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
295 start;
296 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
297 & 0xFFFFE000) + (start - 0x2000) * 4;
298 }
299#endif
300
301 /* Need to handle some of the global registers separately */
302 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
303 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
304 *cmd++ = global_registers[i];
305 *cmd++ = tmp_ctx.reg_values[i];
306 }
307
308 /* Save vertex shader constants */
309 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
310 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
311 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
312 *cmd++ = 0x0000FFFF;
313 *cmd++ = 3; /* EXEC_COUNT */
314 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
315 drawctxt->constant_save_commands[1].hostptr = cmd;
316 drawctxt->constant_save_commands[1].gpuaddr =
317 virt2gpu(cmd, &drawctxt->gpustate);
318 /*
319 From fixup:
320
321 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
322 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
323
324 From register spec:
325 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
326 */
327 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
328 /* ALU constant shadow base */
329 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
330
331 /* Save fragment shader constants */
332 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
333 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
334 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
335 *cmd++ = 0x0000FFFF;
336 *cmd++ = 3; /* EXEC_COUNT */
337 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
338 drawctxt->constant_save_commands[2].hostptr = cmd;
339 drawctxt->constant_save_commands[2].gpuaddr =
340 virt2gpu(cmd, &drawctxt->gpustate);
341 /*
342 From fixup:
343
344 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
345 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
346
347 From register spec:
348 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
349 */
350 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
351
352 /*
353 From fixup:
354
355 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
356 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
357
358 From register spec:
359 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
360 start offset in on chip RAM,
361 128bit aligned
362
363 dst = base + offset
364 Because of the base alignment we can use
365 dst = base | offset
366 */
367 *cmd++ = 0; /* dst */
368
369 /* Save VS texture memory objects */
370 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
371 *cmd++ =
372 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
373 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
374 *cmd++ =
375 (drawctxt->gpustate.gpuaddr +
376 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
377
378 /* Save VS texture mipmap pointers */
379 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
380 *cmd++ =
381 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
382 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
383 *cmd++ =
384 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
385
386 /* Save VS texture sampler objects */
387 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
388 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
389 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
390 *cmd++ =
391 (drawctxt->gpustate.gpuaddr +
392 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
393
394 /* Save FS texture memory objects */
395 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
396 *cmd++ =
397 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
398 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
399 *cmd++ =
400 (drawctxt->gpustate.gpuaddr +
401 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
402
403 /* Save FS texture mipmap pointers */
404 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
405 *cmd++ =
406 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
407 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
408 *cmd++ =
409 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
410
411 /* Save FS texture sampler objects */
412 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
413 *cmd++ =
414 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
415 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
416 *cmd++ =
417 (drawctxt->gpustate.gpuaddr +
418 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
419
420 /* Create indirect buffer command for above command sequence */
421 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
422
423 tmp_ctx.cmd = cmd;
424}
425
426/* Copy GMEM contents to system memory shadow. */
427static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
428 struct adreno_context *drawctxt,
429 struct gmem_shadow_t *shadow)
430{
431 unsigned int *cmds = tmp_ctx.cmd;
432 unsigned int *start = cmds;
433
434 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
435 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
436
437 /* RB_MODE_CONTROL */
438 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
439 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
440 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
441 /* RB_RENDER_CONTROL */
442 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
443 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
444
445 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
446 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
447 /* RB_COPY_CONTROL */
448 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
449 RB_CLEAR_MODE_RESOLVE) |
450 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
451 tmp_ctx.gmem_base >> 14);
452 /* RB_COPY_DEST_BASE */
453 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
454 shadow->gmemshadow.gpuaddr >> 5);
455 /* RB_COPY_DEST_PITCH */
456 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
457 (shadow->pitch * 4) / 32);
458 /* RB_COPY_DEST_INFO */
459 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
460 RB_TILINGMODE_LINEAR) |
461 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
462 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
463 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
464
465 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
466 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
467 /* GRAS_SC_CONTROL */
468 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
471 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
472 /* VFD_CONTROL_0 */
473 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
474 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
475 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
476 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
477 /* VFD_CONTROL_1 */
478 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
479 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
480 _SET(VFD_CTRLREG1_REGID4INST, 252);
481
482 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
483 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
484 /* VFD_FETCH_INSTR_0_0 */
485 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
486 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
487 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
488 /* VFD_FETCH_INSTR_1_0 */
489 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
490 shadow->quad_vertices.gpuaddr);
491
492 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
493 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
494 /* VFD_DECODE_INSTR_0 */
495 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
496 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
497 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
498 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
499 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
500 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
501
502 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
503 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
504 /* HLSQ_CONTROL_0_REG */
505 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
506 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
507 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
508 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
509 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
510 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
511 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
513 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
514 /* HLSQ_CONTROL_1_REG */
515 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
516 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
517 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
518 /* HLSQ_CONTROL_2_REG */
519 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
520 /* HLSQ_CONTROL_3_REG */
521 *cmds++ = 0x00000000;
522
523 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
524 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
525 /* HLSQ_VS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
527 /* HLSQ_FS_CONTROL_REG */
528 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
529 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
530 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
531 /* HLSQ_CONST_VSPRESV_RANGE_REG */
532 *cmds++ = 0x00000000;
533 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
534 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
535 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
536
537 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
538 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
539 /* SP_FS_LENGTH_REG */
540 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
541
542 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
543 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
544 /* SP_SP_CTRL_REG */
545 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
546 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
547
548 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
549 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
550 /* SP_VS_CTRL_REG0 */
551 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
552 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
553 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
554 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
555 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
556 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
557 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
558 /* SP_VS_CTRL_REG1 */
559 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
560 /* SP_VS_PARAM_REG */
561 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
562 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
563 /* SP_VS_OUT_REG_0 */
564 *cmds++ = 0x00000000;
565 /* SP_VS_OUT_REG_1 */
566 *cmds++ = 0x00000000;
567 /* SP_VS_OUT_REG_2 */
568 *cmds++ = 0x00000000;
569 /* SP_VS_OUT_REG_3 */
570 *cmds++ = 0x00000000;
571 /* SP_VS_OUT_REG_4 */
572 *cmds++ = 0x00000000;
573 /* SP_VS_OUT_REG_5 */
574 *cmds++ = 0x00000000;
575 /* SP_VS_OUT_REG_6 */
576 *cmds++ = 0x00000000;
577 /* SP_VS_OUT_REG_7 */
578 *cmds++ = 0x00000000;
579
580 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
581 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
582 /* SP_VS_VPC_DST_REG_0 */
583 *cmds++ = 0x00000000;
584 /* SP_VS_VPC_DST_REG_1 */
585 *cmds++ = 0x00000000;
586 /* SP_VS_VPC_DST_REG_2 */
587 *cmds++ = 0x00000000;
588 /* SP_VS_VPC_DST_REG_3 */
589 *cmds++ = 0x00000000;
590 /* SP_VS_OBJ_OFFSET_REG */
591 *cmds++ = 0x00000000;
592 /* SP_VS_OBJ_START_REG */
593 *cmds++ = 0x00000000;
594
595 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
596 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
597 /* SP_VS_LENGTH_REG */
598 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
599 /* SP_FS_CTRL_REG0 */
600 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
601 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
602 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
603 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
604 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
605 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
606 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
607 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
608 /* SP_FS_CTRL_REG1 */
609 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
610 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
611 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
612 /* SP_FS_OBJ_OFFSET_REG */
613 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
614 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
615 /* SP_FS_OBJ_START_REG */
616 *cmds++ = 0x00000000;
617
618 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
619 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
620 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
621 *cmds++ = 0x00000000;
622 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
623 *cmds++ = 0x00000000;
624
625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
626 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
627 /* SP_FS_OUTPUT_REG */
628 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
629
630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
631 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
632 /* SP_FS_MRT_REG_0 */
633 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
634 /* SP_FS_MRT_REG_1 */
635 *cmds++ = 0x00000000;
636 /* SP_FS_MRT_REG_2 */
637 *cmds++ = 0x00000000;
638 /* SP_FS_MRT_REG_3 */
639 *cmds++ = 0x00000000;
640
641 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
642 *cmds++ = CP_REG(A3XX_VPC_ATTR);
643 /* VPC_ATTR */
644 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
645 _SET(VPC_VPCATTR_LMSIZE, 1);
646 /* VPC_PACK */
647 *cmds++ = 0x00000000;
648 /* VPC_VARRYING_INTERUPT_MODE_0 */
649 *cmds++ = 0x00000000;
650 /* VPC_VARRYING_INTERUPT_MODE_1 */
651 *cmds++ = 0x00000000;
652 /* VPC_VARRYING_INTERUPT_MODE_2 */
653 *cmds++ = 0x00000000;
654 /* VPC_VARRYING_INTERUPT_MODE_3 */
655 *cmds++ = 0x00000000;
656 /* VPC_VARYING_PS_REPL_MODE_0 */
657 *cmds++ = 0x00000000;
658 /* VPC_VARYING_PS_REPL_MODE_1 */
659 *cmds++ = 0x00000000;
660 /* VPC_VARYING_PS_REPL_MODE_2 */
661 *cmds++ = 0x00000000;
662 /* VPC_VARYING_PS_REPL_MODE_3 */
663 *cmds++ = 0x00000000;
664
665 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
666 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
667 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
668 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
669 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
670 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
671 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
672
673 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
674 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
675 /* end; */
676 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
677 /* nop; */
678 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
679 /* nop; */
680 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
681
682 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
683 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
684 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
685 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
686 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
687 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
688 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
689
690 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
691 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
692 /* end; */
693 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
694 /* nop; */
695 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
696 /* nop; */
697 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
698
699 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
700 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
701 /* RB_MSAA_CONTROL */
702 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
703 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
704
705 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
706 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
707 /* RB_DEPTH_CONTROL */
708 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
709
710 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
711 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
712 /* RB_MRT_CONTROL0 */
713 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
714 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
715 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
716 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
719 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
720 /* RB_MRT_BLEND_CONTROL0 */
721 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
722 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
723 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
724 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
725 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
726 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
727 /* RB_MRT_CONTROL1 */
728 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
729 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
730 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
731
732 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
733 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
734 /* RB_MRT_BLEND_CONTROL1 */
735 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
736 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
737 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
738 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
739 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
740 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
741 /* RB_MRT_CONTROL2 */
742 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
743 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
744 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
745
746 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
747 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
748 /* RB_MRT_BLEND_CONTROL2 */
749 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
750 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
751 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
752 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
753 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
755 /* RB_MRT_CONTROL3 */
756 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
757 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
758 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
759
760 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
761 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
762 /* RB_MRT_BLEND_CONTROL3 */
763 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
764 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
765 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
766 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
767 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
769
770 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
771 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
772 /* VFD_INDEX_MIN */
773 *cmds++ = 0x00000000;
774 /* VFD_INDEX_MAX */
775 *cmds++ = 0xFFFFFFFF;
776 /* VFD_INSTANCEID_OFFSET */
777 *cmds++ = 0x00000000;
778 /* VFD_INDEX_OFFSET */
779 *cmds++ = 0x00000000;
780
781 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
782 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
783 /* VFD_VS_THREADING_THRESHOLD */
784 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
785 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
786
787 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
788 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
789 /* TPL1_TP_VS_TEX_OFFSET */
790 *cmds++ = 0;
791
792 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
793 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
794 /* TPL1_TP_FS_TEX_OFFSET */
795 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
796 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
797 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
798
799 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
800 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
801 /* PC_PRIM_VTX_CNTL */
802 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
803 PC_DRAW_TRIANGLES) |
804 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
805 PC_DRAW_TRIANGLES) |
806 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
807
808 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
809 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
810 /* GRAS_SC_WINDOW_SCISSOR_TL */
811 *cmds++ = 0x00000000;
812 /* GRAS_SC_WINDOW_SCISSOR_BR */
813 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
814 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
815
816 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
817 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
818 /* GRAS_SC_SCREEN_SCISSOR_TL */
819 *cmds++ = 0x00000000;
820 /* GRAS_SC_SCREEN_SCISSOR_BR */
821 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
822 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
823
824 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
825 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
826 /* GRAS_CL_VPORT_XOFFSET */
827 *cmds++ = 0x00000000;
828 /* GRAS_CL_VPORT_XSCALE */
829 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
830 /* GRAS_CL_VPORT_YOFFSET */
831 *cmds++ = 0x00000000;
832 /* GRAS_CL_VPORT_YSCALE */
833 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
834
835 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
836 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
837 /* GRAS_CL_VPORT_ZOFFSET */
838 *cmds++ = 0x00000000;
839 /* GRAS_CL_VPORT_ZSCALE */
840 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
841
842 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
843 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
844 /* GRAS_CL_CLIP_CNTL */
845 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
846 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
847 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
848 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
849 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
850
851 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
852 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
853 /* GRAS_CL_GB_CLIP_ADJ */
854 *cmds++ = 0x00000000;
855
856 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
857 *cmds++ = 0x00000000;
858
859 /*
860 * Resolve using two draw calls with a dummy register
861 * write in between. This is a HLM workaround
862 * that should be removed later.
863 */
864 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
865 *cmds++ = 0x00000000; /* Viz query info */
866 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
867 PC_DI_SRC_SEL_IMMEDIATE,
868 PC_DI_INDEX_SIZE_32_BIT,
869 PC_DI_IGNORE_VISIBILITY);
870 *cmds++ = 0x00000003; /* Num indices */
871 *cmds++ = 0x00000000; /* Index 0 */
872 *cmds++ = 0x00000001; /* Index 1 */
873 *cmds++ = 0x00000002; /* Index 2 */
874
875 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
876 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
877 *cmds++ = 0x00000000;
878
879 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
880 *cmds++ = 0x00000000; /* Viz query info */
881 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
882 PC_DI_SRC_SEL_IMMEDIATE,
883 PC_DI_INDEX_SIZE_32_BIT,
884 PC_DI_IGNORE_VISIBILITY);
885 *cmds++ = 0x00000003; /* Num indices */
886 *cmds++ = 0x00000002; /* Index 0 */
887 *cmds++ = 0x00000001; /* Index 1 */
888 *cmds++ = 0x00000003; /* Index 2 */
889
890 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
891 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
892 *cmds++ = 0x00000000;
893
894 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
895 *cmds++ = 0x00000000;
896
897 /* Create indirect buffer command for above command sequence */
898 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
899
900 return cmds;
901}
902
903static void build_shader_save_cmds(struct adreno_device *adreno_dev,
904 struct adreno_context *drawctxt)
905{
906 unsigned int *cmd = tmp_ctx.cmd;
907 unsigned int *start;
908
909 /* Reserve space for boolean values used for COND_EXEC packet */
910 drawctxt->cond_execs[0].hostptr = cmd;
911 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
912 *cmd++ = 0;
913 drawctxt->cond_execs[1].hostptr = cmd;
914 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
915 *cmd++ = 0;
916
917 drawctxt->shader_save_commands[0].hostptr = cmd;
918 drawctxt->shader_save_commands[0].gpuaddr =
919 virt2gpu(cmd, &drawctxt->gpustate);
920 *cmd++ = 0;
921 drawctxt->shader_save_commands[1].hostptr = cmd;
922 drawctxt->shader_save_commands[1].gpuaddr =
923 virt2gpu(cmd, &drawctxt->gpustate);
924 *cmd++ = 0;
925
926 start = cmd;
927
928 /* Save vertex shader */
929
930 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
931 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
932 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
933 *cmd++ = 0x0000FFFF;
934 *cmd++ = 3; /* EXEC_COUNT */
935
936 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
937 drawctxt->shader_save_commands[2].hostptr = cmd;
938 drawctxt->shader_save_commands[2].gpuaddr =
939 virt2gpu(cmd, &drawctxt->gpustate);
940 /*
941 From fixup:
942
943 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
944
945 From regspec:
946 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
947 If bit31 is 1, it means overflow
948 or any long shader.
949
950 src = (HLSQ_SHADOW_BASE + 0x1000)/4
951 */
952 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
953 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
954
955 /* Save fragment shader */
956 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
957 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
958 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
959 *cmd++ = 0x0000FFFF;
960 *cmd++ = 3; /* EXEC_COUNT */
961
962 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
963 drawctxt->shader_save_commands[3].hostptr = cmd;
964 drawctxt->shader_save_commands[3].gpuaddr =
965 virt2gpu(cmd, &drawctxt->gpustate);
966 /*
967 From fixup:
968
969 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
970
971 From regspec:
972 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
973 If bit31 is 1, it means overflow
974 or any long shader.
975
976 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
977 From regspec:
978
979 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
980 First instruction of the whole shader will be stored from
981 the offset in instruction cache, unit = 256bits, a cache line.
982 It can start from 0 if no VS available.
983
984 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
985 */
986 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
987 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
988 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
989
990 /* Create indirect buffer command for above command sequence */
991 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
992
993 tmp_ctx.cmd = cmd;
994}
995
996/*
997 * Make an IB to modify context save IBs with the correct shader instruction
998 * and constant sizes and offsets.
999 */
1000
1001static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1002 struct adreno_context *drawctxt)
1003{
1004 unsigned int *cmd = tmp_ctx.cmd;
1005 unsigned int *start = cmd;
1006
1007 /* Flush HLSQ lazy updates */
1008 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1009 *cmd++ = 0x7; /* HLSQ_FLUSH */
1010 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1011 *cmd++ = 0;
1012
1013 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1014 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1015 *cmd++ = (unsigned int)
1016 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1017 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1018 0; /* No end addr for full invalidate */
1019
1020 /* Make sure registers are flushed */
1021 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1022 *cmd++ = 0;
1023
1024#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1025
1026 /* Save shader sizes */
1027 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1028 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1029 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1033 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1034
1035 /* Save shader offsets */
1036 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1037 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1038 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1039
1040 /* Save constant sizes */
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1043 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1044 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1045 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1046 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1047
1048 /* Save FS constant offset */
1049 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1050 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1051 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1052
1053
1054 /* Save VS instruction store mode */
1055 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1056 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1057 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1058
1059 /* Save FS instruction store mode */
1060 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1061 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1062 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1063#else
1064
1065 /* Shader save */
1066 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1067 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1068 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1069 drawctxt->shader_save_commands[2].gpuaddr);
1070
1071 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1072 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1073 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1074 *cmd++ = 0x00000000; /* AND value */
1075 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1076 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1077 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1078 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1079 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1080 A3XX_CP_SCRATCH_REG2;
1081 *cmd++ = 0x7f000000; /* AND value */
1082 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1083
1084 /*
1085 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1086 * SP_FS_OBJ_OFFSET_REG
1087 */
1088
1089 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1090 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1091 *cmd++ = 0x00000000; /* AND value */
1092 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1093 /*
1094 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1095 * 0x00000000
1096 */
1097 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1098 *cmd++ = A3XX_CP_SCRATCH_REG3;
1099 *cmd++ = 0xfe000000; /* AND value */
1100 *cmd++ = 0x00000000; /* OR value */
1101 /*
1102 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1103 */
1104 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1105 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1106 *cmd++ = 0xffffffff; /* AND value */
1107 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1108
1109 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1110 *cmd++ = A3XX_CP_SCRATCH_REG2;
1111 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1112
1113 /* Constant save */
1114 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001115 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1116 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001117 drawctxt->constant_save_commands[1].gpuaddr);
1118
1119 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001120 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1121 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001122 drawctxt->constant_save_commands[2].gpuaddr);
1123
1124 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1125 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1126 drawctxt->constant_save_commands[2].gpuaddr
1127 + sizeof(unsigned int));
1128
1129 /* Modify constant save conditionals */
1130 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1131 0, 0, drawctxt->cond_execs[2].gpuaddr);
1132
1133 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1134 0, 0, drawctxt->cond_execs[3].gpuaddr);
1135
1136 /* Save VS instruction store mode */
1137
1138 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1139 31, 0, drawctxt->cond_execs[0].gpuaddr);
1140
1141 /* Save FS instruction store mode */
1142 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1143 31, 0, drawctxt->cond_execs[1].gpuaddr);
1144
1145#endif
1146
1147 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1148
1149 tmp_ctx.cmd = cmd;
1150}
1151
1152/****************************************************************************/
1153/* Functions to build context restore IBs */
1154/****************************************************************************/
1155
1156static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1157 struct adreno_context *drawctxt,
1158 struct gmem_shadow_t *shadow)
1159{
1160 unsigned int *cmds = tmp_ctx.cmd;
1161 unsigned int *start = cmds;
1162
1163 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1164 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1165 /* HLSQ_CONTROL_0_REG */
1166 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1167 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1168 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1169 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1170 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1171 /* HLSQ_CONTROL_1_REG */
1172 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1173 /* HLSQ_CONTROL_2_REG */
1174 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1175 /* HLSQ_CONTROL3_REG */
1176 *cmds++ = 0x00000000;
1177
1178 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1179 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1180 /* RB_MRT_BUF_INFO0 */
1181 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1182 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1183 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1184 (shadow->gmem_pitch * 4 * 8) / 256);
1185 /* RB_MRT_BUF_BASE0 */
1186 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1187
1188 /* Texture samplers */
1189 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1190 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1191 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1192 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1193 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1194 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1195 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1196 *cmds++ = 0x00000240;
1197 *cmds++ = 0x00000000;
1198
1199 /* Texture memobjs */
1200 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1201 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1202 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1203 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1204 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1205 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1206 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1207 *cmds++ = 0x4cc06880;
1208 *cmds++ = shadow->height | (shadow->width << 14);
1209 *cmds++ = (shadow->pitch*4*8) << 9;
1210 *cmds++ = 0x00000000;
1211
1212 /* Mipmap bases */
1213 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1214 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1215 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1216 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1217 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1218 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1219 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1220 *cmds++ = shadow->gmemshadow.gpuaddr;
1221 *cmds++ = 0x00000000;
1222 *cmds++ = 0x00000000;
1223 *cmds++ = 0x00000000;
1224 *cmds++ = 0x00000000;
1225 *cmds++ = 0x00000000;
1226 *cmds++ = 0x00000000;
1227 *cmds++ = 0x00000000;
1228 *cmds++ = 0x00000000;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233 *cmds++ = 0x00000000;
1234
1235 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1236 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1237 /* HLSQ_VS_CONTROL_REG */
1238 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1239 /* HLSQ_FS_CONTROL_REG */
1240 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1241 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1242 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1243 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1244 *cmds++ = 0x00000000;
1245 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1246 *cmds++ = 0x00000000;
1247
1248 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1249 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1250 /* SP_FS_LENGTH_REG */
1251 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1252
1253 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1254 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1255 /* SP_VS_CTRL_REG0 */
1256 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1257 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1258 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1259 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1260 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1261 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1262 /* SP_VS_CTRL_REG1 */
1263 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1264 /* SP_VS_PARAM_REG */
1265 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1266 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1267 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1268 /* SP_VS_OUT_REG0 */
1269 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1270 /* SP_VS_OUT_REG1 */
1271 *cmds++ = 0x00000000;
1272 /* SP_VS_OUT_REG2 */
1273 *cmds++ = 0x00000000;
1274 /* SP_VS_OUT_REG3 */
1275 *cmds++ = 0x00000000;
1276 /* SP_VS_OUT_REG4 */
1277 *cmds++ = 0x00000000;
1278 /* SP_VS_OUT_REG5 */
1279 *cmds++ = 0x00000000;
1280 /* SP_VS_OUT_REG6 */
1281 *cmds++ = 0x00000000;
1282 /* SP_VS_OUT_REG7 */
1283 *cmds++ = 0x00000000;
1284
1285 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1286 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1287 /* SP_VS_VPC_DST_REG0 */
1288 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1289 /* SP_VS_VPC_DST_REG1 */
1290 *cmds++ = 0x00000000;
1291 /* SP_VS_VPC_DST_REG2 */
1292 *cmds++ = 0x00000000;
1293 /* SP_VS_VPC_DST_REG3 */
1294 *cmds++ = 0x00000000;
1295 /* SP_VS_OBJ_OFFSET_REG */
1296 *cmds++ = 0x00000000;
1297 /* SP_VS_OBJ_START_REG */
1298 *cmds++ = 0x00000000;
1299
1300 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1301 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1302 /* SP_VS_LENGTH_REG */
1303 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1304 /* SP_FS_CTRL_REG0 */
1305 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1306 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1307 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1308 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1309 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1310 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1311 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1312 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1313 /* SP_FS_CTRL_REG1 */
1314 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1315 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1316 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1317 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001318 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1319 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001320 /* SP_FS_OBJ_START_REG */
1321 *cmds++ = 0x00000000;
1322
1323 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1324 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1325 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1326 *cmds++ = 0x00000000;
1327 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1328 *cmds++ = 0x00000000;
1329
1330 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1331 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1332 /* SP_FS_OUT_REG */
1333 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1334
Jordan Crousea7ec4212012-02-04 10:23:52 -07001335 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001336 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1337 /* SP_FS_MRT_REG0 */
1338 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1339 /* SP_FS_MRT_REG1 */
1340 *cmds++ = 0;
1341 /* SP_FS_MRT_REG2 */
1342 *cmds++ = 0;
1343 /* SP_FS_MRT_REG3 */
1344 *cmds++ = 0;
1345
1346 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1347 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1348 /* VPC_ATTR */
1349 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1350 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1351 _SET(VPC_VPCATTR_LMSIZE, 1);
1352 /* VPC_PACK */
1353 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1354 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1355 /* VPC_VARYING_INTERP_MODE_0 */
1356 *cmds++ = 0x00000000;
1357 /* VPC_VARYING_INTERP_MODE1 */
1358 *cmds++ = 0x00000000;
1359 /* VPC_VARYING_INTERP_MODE2 */
1360 *cmds++ = 0x00000000;
1361 /* VPC_VARYING_IINTERP_MODE3 */
1362 *cmds++ = 0x00000000;
1363 /* VPC_VARRYING_PS_REPL_MODE_0 */
1364 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1365 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1366 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1367 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1368 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1369 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1370 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1371 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1372 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1380 /* VPC_VARRYING_PS_REPL_MODE_1 */
1381 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1388 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1389 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1397 /* VPC_VARRYING_PS_REPL_MODE_2 */
1398 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1405 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1406 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1414 /* VPC_VARRYING_PS_REPL_MODE_3 */
1415 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1422 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1423 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1430 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1431
Jordan Crousea7ec4212012-02-04 10:23:52 -07001432 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001433 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1434 /* SP_SP_CTRL_REG */
1435 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1436
1437 /* Load vertex shader */
1438 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1439 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1440 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1441 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1442 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1443 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1444 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1445 /* (sy)end; */
1446 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1447 /* nop; */
1448 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1449 /* nop; */
1450 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1451 /* nop; */
1452 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1453
1454 /* Load fragment shader */
1455 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1456 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1457 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1458 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1459 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1460 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1461 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1462 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1463 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1464 /* (rpt5)nop; */
1465 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1466 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1467 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1468 /* (sy)mov.f32f32 r1.x, r0.x; */
1469 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1470 /* mov.f32f32 r1.y, r0.y; */
1471 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1472 /* mov.f32f32 r1.z, r0.z; */
1473 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1474 /* mov.f32f32 r1.w, r0.w; */
1475 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1476 /* end; */
1477 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1478
1479 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1480 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1481 /* VFD_CONTROL_0 */
1482 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1483 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1484 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1485 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1486 /* VFD_CONTROL_1 */
1487 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1488 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1489 _SET(VFD_CTRLREG1_REGID4INST, 252);
1490
1491 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1492 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1493 /* VFD_FETCH_INSTR_0_0 */
1494 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1495 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1496 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1497 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1498 /* VFD_FETCH_INSTR_1_0 */
1499 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1500 shadow->quad_vertices_restore.gpuaddr);
1501 /* VFD_FETCH_INSTR_0_1 */
1502 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1503 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1504 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1505 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1506 /* VFD_FETCH_INSTR_1_1 */
1507 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1508 shadow->quad_vertices_restore.gpuaddr + 16);
1509
1510 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1511 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1512 /* VFD_DECODE_INSTR_0 */
1513 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1514 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1515 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1516 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1517 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1518 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1519 /* VFD_DECODE_INSTR_1 */
1520 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1521 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1522 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1523 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1524 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1525 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1526
1527 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1528 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1529 /* RB_DEPTH_CONTROL */
1530 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1531
1532 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1533 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1534 /* RB_STENCIL_CONTROL */
1535 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1536 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1537 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1538 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1539 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1540 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1541 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1542 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1543
1544 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1545 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1546 /* RB_MODE_CONTROL */
1547 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1548 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1549
1550 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1551 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1552 /* RB_RENDER_CONTROL */
1553 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1554 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1555
1556 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1557 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1558 /* RB_MSAA_CONTROL */
1559 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1560 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1561
1562 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1563 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1564 /* RB_MRT_CONTROL0 */
1565 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1566 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1567 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1568 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1569
1570 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1571 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1572 /* RB_MRT_BLENDCONTROL0 */
1573 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1574 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1575 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1576 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1577 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1578 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1579 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1580 /* RB_MRT_CONTROL1 */
1581 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1582 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1583 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1584
1585 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1586 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1587 /* RB_MRT_BLENDCONTROL1 */
1588 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1589 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1590 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1591 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1592 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1593 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1594 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1595 /* RB_MRT_CONTROL2 */
1596 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1597 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1598 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1599
1600 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1601 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1602 /* RB_MRT_BLENDCONTROL2 */
1603 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1604 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1605 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1606 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1607 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1608 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1609 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1610 /* RB_MRT_CONTROL3 */
1611 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1612 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1613 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1614
1615 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1616 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1617 /* RB_MRT_BLENDCONTROL3 */
1618 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1619 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1620 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1621 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1622 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1623 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1624 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1625
1626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1627 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1628 /* VFD_INDEX_MIN */
1629 *cmds++ = 0x00000000;
1630 /* VFD_INDEX_MAX */
1631 *cmds++ = 0xFFFFFFFF;
1632 /* VFD_INDEX_OFFSET */
1633 *cmds++ = 0x00000000;
1634 /* TPL1_TP_VS_TEX_OFFSET */
1635 *cmds++ = 0x00000000;
1636
1637 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1638 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1639 /* VFD_VS_THREADING_THRESHOLD */
1640 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1641 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1642
1643 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1644 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1645 /* TPL1_TP_VS_TEX_OFFSET */
1646 *cmds++ = 0x00000000;
1647
1648 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1649 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1650 /* TPL1_TP_FS_TEX_OFFSET */
1651 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1652 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1653 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1654
1655 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1656 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1657 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001658 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1659 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1660 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001661
1662 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1663 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1664 /* GRAS_SU_MODE_CONTROL */
1665 *cmds++ = 0x00000000;
1666
1667 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1668 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1669 /* GRAS_SC_WINDOW_SCISSOR_TL */
1670 *cmds++ = 0x00000000;
1671 /* GRAS_SC_WINDOW_SCISSOR_BR */
1672 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1673 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1674
1675 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1676 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1677 /* GRAS_SC_SCREEN_SCISSOR_TL */
1678 *cmds++ = 0x00000000;
1679 /* GRAS_SC_SCREEN_SCISSOR_BR */
1680 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1681 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1682
1683 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1684 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1685 /* GRAS_CL_VPORT_XOFFSET */
1686 *cmds++ = 0x00000000;
1687 /* GRAS_CL_VPORT_XSCALE */
1688 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1689 /* GRAS_CL_VPORT_YOFFSET */
1690 *cmds++ = 0x00000000;
1691 /* GRAS_CL_VPORT_YSCALE */
1692 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1693
1694 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1695 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1696 /* GRAS_CL_VPORT_ZOFFSET */
1697 *cmds++ = 0x00000000;
1698 /* GRAS_CL_VPORT_ZSCALE */
1699 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1700
1701 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1702 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1703 /* GRAS_CL_CLIP_CNTL */
1704 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1705
1706 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1707 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1708 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1709 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1710
1711 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1712 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1713 /* PC_PRIM_VTX_CONTROL */
1714 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1715 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1716 PC_DRAW_TRIANGLES) |
1717 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1718 PC_DRAW_TRIANGLES) |
1719 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1720
1721 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1722 *cmds++ = 0x00000000; /* Viz query info */
1723 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1724 PC_DI_SRC_SEL_AUTO_INDEX,
1725 PC_DI_INDEX_SIZE_16_BIT,
1726 PC_DI_IGNORE_VISIBILITY);
1727 *cmds++ = 0x00000002; /* Num indices */
1728
1729 /* Create indirect buffer command for above command sequence */
1730 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1731
1732 return cmds;
1733}
1734
1735static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1736 struct adreno_context *drawctxt)
1737{
1738 unsigned int *start = tmp_ctx.cmd;
1739 unsigned int *cmd = start;
1740 unsigned int *lcc_start;
1741
1742 int i;
1743
1744 /* Flush HLSQ lazy updates */
1745 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1746 *cmd++ = 0x7; /* HLSQ_FLUSH */
1747 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1748 *cmd++ = 0;
1749
1750 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1751 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1752 *cmd++ = (unsigned int)
1753 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1754 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1755 0; /* No end addr for full invalidate */
1756
1757 lcc_start = cmd;
1758
1759 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1760 cmd++;
1761
1762#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1763 /* Force mismatch */
1764 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1765#else
1766 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1767#endif
1768
1769 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1770 cmd = reg_range(cmd, context_register_ranges[i * 2],
1771 context_register_ranges[i * 2 + 1]);
1772 }
1773
1774 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1775 (cmd - lcc_start) - 1);
1776
1777#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1778 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1779#else
1780 lcc_start[2] |= (1 << 24) | (4 << 16);
1781#endif
1782
1783 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1784 *cmd++ = cp_type0_packet(global_registers[i], 1);
1785 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1786 *cmd++ = 0x00000000;
1787 }
1788
1789 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1790 tmp_ctx.cmd = cmd;
1791}
1792
1793static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1794 struct adreno_context *drawctxt)
1795{
1796 unsigned int *cmd = tmp_ctx.cmd;
1797 unsigned int *start = cmd;
1798 unsigned int mode = 4; /* Indirect mode */
1799 unsigned int stateblock;
1800 unsigned int numunits;
1801 unsigned int statetype;
1802
1803 drawctxt->cond_execs[2].hostptr = cmd;
1804 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1805 *cmd++ = 0;
1806 drawctxt->cond_execs[3].hostptr = cmd;
1807 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1808 *cmd++ = 0;
1809
1810#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1811 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1812 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1813 *cmd++ = 4 << 16;
1814 *cmd++ = 0x0;
1815#endif
1816 /* HLSQ full update */
1817 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1818 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1819 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1820
1821#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1822 /* Re-enable shadowing */
1823 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1824 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1825 *cmd++ = (4 << 16) | (1 << 24);
1826 *cmd++ = 0x0;
1827#endif
1828
1829 /* Load vertex shader constants */
1830 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1831 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1832 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1833 *cmd++ = 0x0000ffff;
1834 *cmd++ = 3; /* EXEC_COUNT */
1835 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1836 drawctxt->constant_load_commands[0].hostptr = cmd;
1837 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1838 &drawctxt->gpustate);
1839
1840 /*
1841 From fixup:
1842
1843 mode = 4 (indirect)
1844 stateblock = 4 (Vertex constants)
1845 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1846
1847 From register spec:
1848 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1849
1850 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1851 */
1852
1853 *cmd++ = 0; /* ord1 */
1854 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1855
1856 /* Load fragment shader constants */
1857 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1858 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1859 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1860 *cmd++ = 0x0000ffff;
1861 *cmd++ = 3; /* EXEC_COUNT */
1862 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1863 drawctxt->constant_load_commands[1].hostptr = cmd;
1864 drawctxt->constant_load_commands[1].gpuaddr =
1865 virt2gpu(cmd, &drawctxt->gpustate);
1866 /*
1867 From fixup:
1868
1869 mode = 4 (indirect)
1870 stateblock = 6 (Fragment constants)
1871 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1872
1873 From register spec:
1874 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1875
1876 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1877 */
1878
1879 *cmd++ = 0; /* ord1 */
1880 drawctxt->constant_load_commands[2].hostptr = cmd;
1881 drawctxt->constant_load_commands[2].gpuaddr =
1882 virt2gpu(cmd, &drawctxt->gpustate);
1883 /*
1884 From fixup:
1885 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1886 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1887
1888 From register spec:
1889 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1890 start offset in on chip RAM,
1891 128bit aligned
1892
1893 ord2 = base + offset | 1
1894 Because of the base alignment we can use
1895 ord2 = base | offset | 1
1896 */
1897 *cmd++ = 0; /* ord2 */
1898
1899 /* Restore VS texture memory objects */
1900 stateblock = 0;
1901 statetype = 1;
1902 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1903
1904 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1905 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1906 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1907 & 0xfffffffc) | statetype;
1908
1909 /* Restore VS texture mipmap addresses */
1910 stateblock = 1;
1911 statetype = 1;
1912 numunits = TEX_SIZE_MIPMAP / 4;
1913 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1914 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1915 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1916 & 0xfffffffc) | statetype;
1917
1918 /* Restore VS texture sampler objects */
1919 stateblock = 0;
1920 statetype = 0;
1921 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1922 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1923 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1924 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1925 & 0xfffffffc) | statetype;
1926
1927 /* Restore FS texture memory objects */
1928 stateblock = 2;
1929 statetype = 1;
1930 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1931 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1932 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1933 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1934 & 0xfffffffc) | statetype;
1935
1936 /* Restore FS texture mipmap addresses */
1937 stateblock = 3;
1938 statetype = 1;
1939 numunits = TEX_SIZE_MIPMAP / 4;
1940 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1941 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1942 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1943 & 0xfffffffc) | statetype;
1944
1945 /* Restore FS texture sampler objects */
1946 stateblock = 2;
1947 statetype = 0;
1948 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1949 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1950 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1951 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1952 & 0xfffffffc) | statetype;
1953
1954 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1955 tmp_ctx.cmd = cmd;
1956}
1957
1958static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1959 struct adreno_context *drawctxt)
1960{
1961 unsigned int *cmd = tmp_ctx.cmd;
1962 unsigned int *start = cmd;
1963
1964 /* Vertex shader */
1965 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1966 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1967 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1968 *cmd++ = 1;
1969 *cmd++ = 3; /* EXEC_COUNT */
1970
1971 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1972 drawctxt->shader_load_commands[0].hostptr = cmd;
1973 drawctxt->shader_load_commands[0].gpuaddr =
1974 virt2gpu(cmd, &drawctxt->gpustate);
1975 /*
1976 From fixup:
1977
1978 mode = 4 (indirect)
1979 stateblock = 4 (Vertex shader)
1980 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1981
1982 From regspec:
1983 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1984 If bit31 is 1, it means overflow
1985 or any long shader.
1986
1987 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1988 */
1989 *cmd++ = 0; /*ord1 */
1990 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1991
1992 /* Fragment shader */
1993 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1994 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1995 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1996 *cmd++ = 1;
1997 *cmd++ = 3; /* EXEC_COUNT */
1998
1999 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2000 drawctxt->shader_load_commands[1].hostptr = cmd;
2001 drawctxt->shader_load_commands[1].gpuaddr =
2002 virt2gpu(cmd, &drawctxt->gpustate);
2003 /*
2004 From fixup:
2005
2006 mode = 4 (indirect)
2007 stateblock = 6 (Fragment shader)
2008 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2009
2010 From regspec:
2011 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2012 If bit31 is 1, it means overflow
2013 or any long shader.
2014
2015 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2016 */
2017 *cmd++ = 0; /*ord1 */
2018 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2019 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2020
2021 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2022 tmp_ctx.cmd = cmd;
2023}
2024
2025static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2026 struct adreno_context *drawctxt)
2027{
2028 unsigned int *cmd = tmp_ctx.cmd;
2029 unsigned int *start = cmd;
2030
2031 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2032 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2033 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2034 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2035 = virt2gpu(cmd, &drawctxt->gpustate);
2036 *cmd++ = 0;
2037
2038 /* Create indirect buffer command for above command sequence */
2039 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2040
2041 tmp_ctx.cmd = cmd;
2042}
2043
2044/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2045static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2046 struct adreno_context *drawctxt)
2047{
2048 unsigned int *cmd = tmp_ctx.cmd;
2049 unsigned int *start = cmd;
2050
2051#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2052 /* Save shader sizes */
2053 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2054 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2055 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2056
2057 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2058 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2059 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2060
2061 /* Save constant sizes */
2062 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2063 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2064 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2065
2066 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2067 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2068 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2069
2070 /* Save constant offsets */
2071 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2072 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2073 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2074#else
2075 /* Save shader sizes */
2076 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2077 30, (4 << 19) | (4 << 16),
2078 drawctxt->shader_load_commands[0].gpuaddr);
2079
2080 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2081 30, (6 << 19) | (4 << 16),
2082 drawctxt->shader_load_commands[1].gpuaddr);
2083
2084 /* Save constant sizes */
2085 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2086 23, (4 << 19) | (4 << 16),
2087 drawctxt->constant_load_commands[0].gpuaddr);
2088
2089 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2090 23, (6 << 19) | (4 << 16),
2091 drawctxt->constant_load_commands[1].gpuaddr);
2092
2093 /* Modify constant restore conditionals */
2094 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2095 0, 0, drawctxt->cond_execs[2].gpuaddr);
2096
2097 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2098 0, 0, drawctxt->cond_execs[3].gpuaddr);
2099
2100 /* Save fragment constant shadow offset */
2101 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2102 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2103 drawctxt->constant_load_commands[2].gpuaddr);
2104#endif
2105
2106 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2107 discard all the shader data */
2108
2109 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2110 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2111
2112 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2113
2114 tmp_ctx.cmd = cmd;
2115}
2116
2117static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2118 struct adreno_context *drawctxt)
2119{
2120 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2121
2122 build_regrestore_cmds(adreno_dev, drawctxt);
2123 build_constantrestore_cmds(adreno_dev, drawctxt);
2124 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2125 build_regconstantsave_cmds(adreno_dev, drawctxt);
2126 build_shader_save_cmds(adreno_dev, drawctxt);
2127 build_shader_restore_cmds(adreno_dev, drawctxt);
2128 build_restore_fixup_cmds(adreno_dev, drawctxt);
2129 build_save_fixup_cmds(adreno_dev, drawctxt);
2130
2131 return 0;
2132}
2133
2134/* create buffers for saving/restoring registers, constants, & GMEM */
2135static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2136 struct adreno_context *drawctxt)
2137{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002138 int result;
2139
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002140 calc_gmemsize(&drawctxt->context_gmem_shadow,
2141 adreno_dev->gmemspace.sizebytes);
2142 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
2143
Jordan Crousea7ec4212012-02-04 10:23:52 -07002144 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2145 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002146
Jordan Crousea7ec4212012-02-04 10:23:52 -07002147 if (result)
2148 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002149
2150 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2151 &tmp_ctx.cmd);
2152
2153 /* Dow we need to idle? */
2154 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2155
2156 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2157 &drawctxt->context_gmem_shadow);
2158 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2159 &drawctxt->context_gmem_shadow);
2160
2161 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2162 KGSL_CACHE_OP_FLUSH);
2163
Jordan Crousea7ec4212012-02-04 10:23:52 -07002164 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2165
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002166 return 0;
2167}
2168
2169static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2170 struct adreno_context *drawctxt)
2171{
2172 int ret;
2173
2174 /*
2175 * Allocate memory for the GPU state and the context commands.
2176 * Despite the name, this is much more then just storage for
2177 * the gpustate. This contains command space for gmem save
2178 * and texture and vertex buffer storage too
2179 */
2180
2181 ret = kgsl_allocate(&drawctxt->gpustate,
2182 drawctxt->pagetable, CONTEXT_SIZE);
2183
2184 if (ret)
2185 return ret;
2186
2187 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2188 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2189
2190 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2191 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2192 if (ret)
2193 goto done;
2194
2195 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2196 }
2197
2198 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2199 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2200
2201done:
2202 if (ret)
2203 kgsl_sharedmem_free(&drawctxt->gpustate);
2204
2205 return ret;
2206}
2207
2208static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2209 struct adreno_context *context)
2210{
2211 struct kgsl_device *device = &adreno_dev->dev;
2212
2213 if (context == NULL)
2214 return;
2215
2216 if (context->flags & CTXT_FLAGS_GPU_HANG)
2217 KGSL_CTXT_WARN(device,
2218 "Current active context has caused gpu hang\n");
2219
2220 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2221 /* Fixup self modifying IBs for save operations */
2222 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2223 context->save_fixup, 3);
2224
2225 /* save registers and constants. */
2226 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2227 context->regconstant_save, 3);
2228
2229 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2230 /* Save shader instructions */
2231 adreno_ringbuffer_issuecmds(device,
2232 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2233
2234 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2235 }
2236 }
2237
2238 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2239 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2240 /*
2241 * Save GMEM (note: changes shader. shader must
2242 * already be saved.)
2243 */
2244
2245 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2246 context->context_gmem_shadow.
2247 gmem_save, 3);
2248 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2249 }
2250}
2251
2252static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2253 struct adreno_context *context)
2254{
2255 struct kgsl_device *device = &adreno_dev->dev;
2256 unsigned int cmds[5];
2257
2258 if (context == NULL) {
2259 /* No context - set the default pagetable and thats it */
2260 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
2261 return;
2262 }
2263
2264 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2265
2266 cmds[0] = cp_nop_packet(1);
2267 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2268 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2269 cmds[3] = device->memstore.gpuaddr +
Wei Zouc8c01632012-03-24 17:27:26 -07002270 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
2271 cmds[4] = (unsigned int)context;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002272 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
2273 kgsl_mmu_setstate(device, context->pagetable);
2274
2275 /*
2276 * Restore GMEM. (note: changes shader.
2277 * Shader must not already be restored.)
2278 */
2279
2280 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2281 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2282 context->context_gmem_shadow.
2283 gmem_restore, 3);
2284 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2285 }
2286
2287 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2288 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2289 context->reg_restore, 3);
2290
2291 /* Fixup self modifying IBs for restore operations */
2292 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2293 context->restore_fixup, 3);
2294
2295 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2296 context->constant_restore, 3);
2297
2298 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2299 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2300 context->shader_restore, 3);
2301
2302 /* Restore HLSQ_CONTROL_0 register */
2303 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2304 context->hlsqcontrol_restore, 3);
2305 }
2306}
2307
2308static void a3xx_rb_init(struct adreno_device *adreno_dev,
2309 struct adreno_ringbuffer *rb)
2310{
2311 unsigned int *cmds, cmds_gpu;
2312 cmds = adreno_ringbuffer_allocspace(rb, 18);
2313 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2314
2315 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2316 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2317 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2318 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2319 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2320 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2321 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2322 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2330 /* Protected mode control - turned off for A3XX */
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2334
2335 adreno_ringbuffer_submit(rb);
2336}
2337
2338static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2339{
2340 struct kgsl_device *device = &adreno_dev->dev;
2341 const char *err = "";
2342
2343 switch (bit) {
2344 case A3XX_INT_RBBM_AHB_ERROR: {
2345 unsigned int reg;
2346
2347 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2348
2349 /*
2350 * Return the word address of the erroring register so that it
2351 * matches the register specification
2352 */
2353
2354 KGSL_DRV_CRIT(device,
2355 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2356 reg & (1 << 28) ? "WRITE" : "READ",
2357 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2358 (reg >> 24) & 0x3);
2359
2360 /* Clear the error */
2361 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2362 return;
2363 }
2364 case A3XX_INT_RBBM_REG_TIMEOUT:
2365 err = "RBBM: AHB register timeout";
2366 break;
2367 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2368 err = "RBBM: ME master split timeout";
2369 break;
2370 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2371 err = "RBBM: PFP master split timeout";
2372 break;
2373 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2374 err = "RBBM: ATB bus oveflow";
2375 break;
2376 case A3XX_INT_VFD_ERROR:
2377 err = "VFD: Out of bounds access";
2378 break;
2379 case A3XX_INT_CP_T0_PACKET_IN_IB:
2380 err = "ringbuffer TO packet in IB interrupt";
2381 break;
2382 case A3XX_INT_CP_OPCODE_ERROR:
2383 err = "ringbuffer opcode error interrupt";
2384 break;
2385 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2386 err = "ringbuffer reserved bit error interrupt";
2387 break;
2388 case A3XX_INT_CP_HW_FAULT:
2389 err = "ringbuffer hardware fault";
2390 break;
2391 case A3XX_INT_CP_REG_PROTECT_FAULT:
2392 err = "ringbuffer protected mode error interrupt";
2393 break;
2394 case A3XX_INT_CP_AHB_ERROR_HALT:
2395 err = "ringbuffer AHB error interrupt";
2396 break;
2397 case A3XX_INT_MISC_HANG_DETECT:
2398 err = "MISC: GPU hang detected";
2399 break;
2400 case A3XX_INT_UCHE_OOB_ACCESS:
2401 err = "UCHE: Out of bounds access";
2402 break;
2403 }
2404
2405 KGSL_DRV_CRIT(device, "%s\n", err);
2406 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2407}
2408
2409static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2410{
2411 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
2412
2413 if (irq == A3XX_INT_CP_RB_INT) {
Wei Zouc8c01632012-03-24 17:27:26 -07002414 kgsl_sharedmem_writel(&rb->device->memstore,
2415 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable), 0);
2416 wmb();
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002417 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
2418 }
2419
2420 wake_up_interruptible_all(&rb->device->wait_queue);
2421
2422 /* Schedule work to free mem and issue ibs */
2423 queue_work(rb->device->work_queue, &rb->device->ts_expired_ws);
2424
2425 atomic_notifier_call_chain(&rb->device->ts_notifier_list,
2426 rb->device->id, NULL);
2427}
2428
2429#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2430
2431#define A3XX_INT_MASK \
2432 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
2433 (1 << A3XX_INT_RBBM_REG_TIMEOUT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002434 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002435 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2436 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2437 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2438 (1 << A3XX_INT_CP_HW_FAULT) | \
2439 (1 << A3XX_INT_CP_IB1_INT) | \
2440 (1 << A3XX_INT_CP_IB2_INT) | \
2441 (1 << A3XX_INT_CP_RB_INT) | \
2442 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2443 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002444 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2445
2446static struct {
2447 void (*func)(struct adreno_device *, int);
2448} a3xx_irq_funcs[] = {
2449 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2450 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2451 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2452 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2453 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2454 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2455 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2456 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2457 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2458 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2459 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2460 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2461 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2462 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2463 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2464 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2465 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2466 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2467 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2468 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2469 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2470 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2471 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2472 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002473 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002474 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2475 /* 26 to 31 - Unused */
2476};
2477
2478static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2479{
2480 struct kgsl_device *device = &adreno_dev->dev;
2481 irqreturn_t ret = IRQ_NONE;
2482 unsigned int status, tmp;
2483 int i;
2484
2485 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2486
2487 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2488 if (tmp & 1) {
2489 if (a3xx_irq_funcs[i].func != NULL) {
2490 a3xx_irq_funcs[i].func(adreno_dev, i);
2491 ret = IRQ_HANDLED;
2492 } else {
2493 KGSL_DRV_CRIT(device,
2494 "Unhandled interrupt bit %x\n", i);
2495 }
2496 }
2497
2498 tmp >>= 1;
2499 }
2500
2501 if (status)
2502 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2503 status);
2504 return ret;
2505}
2506
2507static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2508{
2509 struct kgsl_device *device = &adreno_dev->dev;
2510
2511 if (state)
2512 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
2513 else
2514 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2515}
2516
2517static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2518{
2519 struct kgsl_device *device = &adreno_dev->dev;
2520 unsigned int reg, val;
2521
2522 /* Freeze the counter */
2523 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2524 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2525 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2526
2527 /* Read the value */
2528 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2529
2530 /* Reset the counter */
2531 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2532 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2533
2534 /* Re-enable the counter */
2535 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2536 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2537 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2538
2539 return val;
2540}
2541
2542static void a3xx_start(struct adreno_device *adreno_dev)
2543{
2544 struct kgsl_device *device = &adreno_dev->dev;
2545
2546 /* Reset the core */
2547 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2548 0x00000001);
2549 msleep(20);
2550
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002551 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002552
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002553 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2554 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2555 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2556 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2557 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2558 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2559 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2560
2561 /* Enable WR-REQ */
2562 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2563
2564 /* Set up round robin arbitration between both AXI ports */
2565 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2566
2567 /* Set up AOOO */
2568 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2569 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2570
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302571 if (cpu_is_apq8064()) {
2572 /* Enable 1K sort */
2573 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2574 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2575 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002576 /* Make all blocks contribute to the GPU BUSY perf counter */
2577 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2578
2579 /* Enable the RBBM error reporting bits. This lets us get
2580 useful information on failure */
2581
2582 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2583
2584 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002585 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002586
2587 /* Turn on the power counters */
2588 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002589
2590 /* Turn on hang detection - this spews a lot of useful information
2591 * into the RBBM registers on a hang */
2592
2593 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2594 (1 << 16) | 0xFFF);
2595
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002596}
2597
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002598/* Defined in adreno_a3xx_snapshot.c */
2599void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2600 int *remain, int hang);
2601
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002602struct adreno_gpudev adreno_a3xx_gpudev = {
2603 .reg_rbbm_status = A3XX_RBBM_STATUS,
2604 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2605 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2606
2607 .ctxt_create = a3xx_drawctxt_create,
2608 .ctxt_save = a3xx_drawctxt_save,
2609 .ctxt_restore = a3xx_drawctxt_restore,
2610 .rb_init = a3xx_rb_init,
2611 .irq_control = a3xx_irq_control,
2612 .irq_handler = a3xx_irq_handler,
2613 .busy_cycles = a3xx_busy_cycles,
2614 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002615 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002616};