blob: 58a096358807e21e0d4ee510c2137ffda444cc3e [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Steve Mucklef132c6c2012-06-06 18:30:57 -070015#include <linux/sched.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053016#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070017
18#include "kgsl.h"
19#include "adreno.h"
20#include "kgsl_sharedmem.h"
21#include "kgsl_cffdump.h"
22#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060023#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070024
Jordan Crouse0c2761a2012-02-01 22:11:12 -070025/*
26 * Set of registers to dump for A3XX on postmortem and snapshot.
27 * Registers in pairs - first value is the start offset, second
28 * is the stop offset (inclusive)
29 */
30
31const unsigned int a3xx_registers[] = {
32 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
33 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
34 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
35 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
36 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
37 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
38 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
39 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070040 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070041 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
42 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
43 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
44 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
45 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
46 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
47 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
48 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
49 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
50 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
51 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
52 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
53 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
54 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
55 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
56 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
57 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
58 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
59 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
60 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
61 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
62 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
63 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
64 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070065 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
66 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
67 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070068};
69
70const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
71
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070072/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
73 * functions.
74 */
75
76#define _SET(_shift, _val) ((_val) << (_shift))
77
78/*
79 ****************************************************************************
80 *
81 * Context state shadow structure:
82 *
83 * +---------------------+------------+-------------+---------------------+---+
84 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
85 * +---------------------+------------+-------------+---------------------+---+
86 *
87 * 8K - ALU Constant Shadow (8K aligned)
88 * 4K - H/W Register Shadow (8K aligned)
89 * 5K - Command and Vertex Buffers
90 * 8K - Shader Instruction Shadow
91 * ~6K - Texture Constant Shadow
92 *
93 *
94 ***************************************************************************
95 */
96
97/* Sizes of all sections in state shadow memory */
98#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
99#define REG_SHADOW_SIZE (4*1024) /* 4KB */
100#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
101#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
102#define TEX_SIZE_MIPMAP 1936 /* bytes */
103#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
104#define TEX_SHADOW_SIZE \
105 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
106 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
107#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
108
109/* Total context size, excluding GMEM shadow */
110#define CONTEXT_SIZE \
111 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
112 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
113 TEX_SHADOW_SIZE)
114
115/* Offsets to different sections in context shadow memory */
116#define REG_OFFSET ALU_SHADOW_SIZE
117#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
118#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
119#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
120#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
121#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
122#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
123#define FS_TEX_OFFSET_MEM_OBJECTS \
124 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
125#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
126#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
127
128/* The offset for fragment shader data in HLSQ context */
129#define SSIZE (16*1024)
130
131#define HLSQ_SAMPLER_OFFSET 0x000
132#define HLSQ_MEMOBJ_OFFSET 0x400
133#define HLSQ_MIPMAP_OFFSET 0x800
134
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700135/* Use shadow RAM */
136#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700137
Jordan Croused0070882012-02-21 08:54:52 -0700138#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700261 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 start = cmd;
270
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700271 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
272 *cmd++ = 0;
273
274#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
275 /*
276 * Context registers are already shadowed; just need to
277 * disable shadowing to prevent corruption.
278 */
279
280 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
281 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
282 *cmd++ = 4 << 16; /* regs, start=0 */
283 *cmd++ = 0x0; /* count = 0 */
284
285#else
286 /*
287 * Make sure the HW context has the correct register values before
288 * reading them.
289 */
290
291 /* Write context registers into shadow */
292 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
293 unsigned int start = context_register_ranges[i * 2];
294 unsigned int end = context_register_ranges[i * 2 + 1];
295 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
296 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
297 start;
298 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
299 & 0xFFFFE000) + (start - 0x2000) * 4;
300 }
301#endif
302
303 /* Need to handle some of the global registers separately */
304 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
305 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
306 *cmd++ = global_registers[i];
307 *cmd++ = tmp_ctx.reg_values[i];
308 }
309
310 /* Save vertex shader constants */
311 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
314 *cmd++ = 0x0000FFFF;
315 *cmd++ = 3; /* EXEC_COUNT */
316 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
317 drawctxt->constant_save_commands[1].hostptr = cmd;
318 drawctxt->constant_save_commands[1].gpuaddr =
319 virt2gpu(cmd, &drawctxt->gpustate);
320 /*
321 From fixup:
322
323 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
324 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
325
326 From register spec:
327 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
328 */
329 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
330 /* ALU constant shadow base */
331 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
332
333 /* Save fragment shader constants */
334 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
337 *cmd++ = 0x0000FFFF;
338 *cmd++ = 3; /* EXEC_COUNT */
339 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
340 drawctxt->constant_save_commands[2].hostptr = cmd;
341 drawctxt->constant_save_commands[2].gpuaddr =
342 virt2gpu(cmd, &drawctxt->gpustate);
343 /*
344 From fixup:
345
346 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
347 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
348
349 From register spec:
350 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
351 */
352 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
353
354 /*
355 From fixup:
356
357 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
358 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
359
360 From register spec:
361 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
362 start offset in on chip RAM,
363 128bit aligned
364
365 dst = base + offset
366 Because of the base alignment we can use
367 dst = base | offset
368 */
369 *cmd++ = 0; /* dst */
370
371 /* Save VS texture memory objects */
372 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
373 *cmd++ =
374 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
375 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
376 *cmd++ =
377 (drawctxt->gpustate.gpuaddr +
378 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
379
380 /* Save VS texture mipmap pointers */
381 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
382 *cmd++ =
383 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
384 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
385 *cmd++ =
386 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
387
388 /* Save VS texture sampler objects */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
391 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
392 *cmd++ =
393 (drawctxt->gpustate.gpuaddr +
394 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
395
396 /* Save FS texture memory objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ =
399 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
400 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
401 *cmd++ =
402 (drawctxt->gpustate.gpuaddr +
403 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
404
405 /* Save FS texture mipmap pointers */
406 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
407 *cmd++ =
408 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
409 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
410 *cmd++ =
411 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
412
413 /* Save FS texture sampler objects */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr +
420 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
421
422 /* Create indirect buffer command for above command sequence */
423 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
424
425 tmp_ctx.cmd = cmd;
426}
427
428/* Copy GMEM contents to system memory shadow. */
429static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
430 struct adreno_context *drawctxt,
431 struct gmem_shadow_t *shadow)
432{
433 unsigned int *cmds = tmp_ctx.cmd;
434 unsigned int *start = cmds;
435
Jordan Crousefb3012f2012-06-22 13:11:05 -0600436 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
437 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
438
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700439 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
440 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
441
442 /* RB_MODE_CONTROL */
443 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
444 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
445 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
446 /* RB_RENDER_CONTROL */
447 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
448 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
449
450 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
451 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
452 /* RB_COPY_CONTROL */
453 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
454 RB_CLEAR_MODE_RESOLVE) |
455 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
456 tmp_ctx.gmem_base >> 14);
457 /* RB_COPY_DEST_BASE */
458 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
459 shadow->gmemshadow.gpuaddr >> 5);
460 /* RB_COPY_DEST_PITCH */
461 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
462 (shadow->pitch * 4) / 32);
463 /* RB_COPY_DEST_INFO */
464 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
465 RB_TILINGMODE_LINEAR) |
466 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
467 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
468 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
471 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
472 /* GRAS_SC_CONTROL */
473 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
474
475 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
476 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
477 /* VFD_CONTROL_0 */
478 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
479 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
480 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
481 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
482 /* VFD_CONTROL_1 */
483 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
484 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
485 _SET(VFD_CTRLREG1_REGID4INST, 252);
486
487 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
488 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
489 /* VFD_FETCH_INSTR_0_0 */
490 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
491 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
492 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
493 /* VFD_FETCH_INSTR_1_0 */
494 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
495 shadow->quad_vertices.gpuaddr);
496
497 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
498 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
499 /* VFD_DECODE_INSTR_0 */
500 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
501 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
502 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
503 _SET(VFD_DECODEINSTRUCTIONS_REGID, 5) |
504 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
505 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
506
507 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
508 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
509 /* HLSQ_CONTROL_0_REG */
510 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_TWO_PIX_QUADS) |
511 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
512 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
513 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
514 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
515 _SET(HLSQ_CTRL0REG_CONSTSWITCHMODE, 1) |
516 _SET(HLSQ_CTRL0REG_LAZYUPDATEDISABLE, 1) |
517 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
518 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
519 /* HLSQ_CONTROL_1_REG */
520 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
521 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1) |
522 _SET(HLSQ_CTRL1REG_RESERVED1, 4);
523 /* HLSQ_CONTROL_2_REG */
524 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
525 /* HLSQ_CONTROL_3_REG */
526 *cmds++ = 0x00000000;
527
528 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
529 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
530 /* HLSQ_VS_CONTROL_REG */
531 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
532 /* HLSQ_FS_CONTROL_REG */
533 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
534 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 272) |
535 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
536 /* HLSQ_CONST_VSPRESV_RANGE_REG */
537 *cmds++ = 0x00000000;
538 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
539 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
540 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
541
542 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
543 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
544 /* SP_FS_LENGTH_REG */
545 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
546
547 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
548 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
549 /* SP_SP_CTRL_REG */
550 *cmds++ = _SET(SP_SPCTRLREG_CONSTMODE, 1) |
551 _SET(SP_SPCTRLREG_SLEEPMODE, 1);
552
553 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
554 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
555 /* SP_VS_CTRL_REG0 */
556 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
557 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
558 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
559 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 3) |
560 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
561 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
562 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
563 /* SP_VS_CTRL_REG1 */
564 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
565 /* SP_VS_PARAM_REG */
566 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 1) |
567 _SET(SP_VSPARAMREG_PSIZEREGID, 252);
568 /* SP_VS_OUT_REG_0 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_1 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_2 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_3 */
575 *cmds++ = 0x00000000;
576 /* SP_VS_OUT_REG_4 */
577 *cmds++ = 0x00000000;
578 /* SP_VS_OUT_REG_5 */
579 *cmds++ = 0x00000000;
580 /* SP_VS_OUT_REG_6 */
581 *cmds++ = 0x00000000;
582 /* SP_VS_OUT_REG_7 */
583 *cmds++ = 0x00000000;
584
585 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
586 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
587 /* SP_VS_VPC_DST_REG_0 */
588 *cmds++ = 0x00000000;
589 /* SP_VS_VPC_DST_REG_1 */
590 *cmds++ = 0x00000000;
591 /* SP_VS_VPC_DST_REG_2 */
592 *cmds++ = 0x00000000;
593 /* SP_VS_VPC_DST_REG_3 */
594 *cmds++ = 0x00000000;
595 /* SP_VS_OBJ_OFFSET_REG */
596 *cmds++ = 0x00000000;
597 /* SP_VS_OBJ_START_REG */
598 *cmds++ = 0x00000000;
599
600 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
601 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
602 /* SP_VS_LENGTH_REG */
603 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
604 /* SP_FS_CTRL_REG0 */
605 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
606 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
607 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
608 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
609 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
610 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_TWO_VTX_QUADS) |
611 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
612 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
613 /* SP_FS_CTRL_REG1 */
614 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
615 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
616 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
617 /* SP_FS_OBJ_OFFSET_REG */
618 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 272) |
619 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
620 /* SP_FS_OBJ_START_REG */
621 *cmds++ = 0x00000000;
622
623 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
624 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
625 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
626 *cmds++ = 0x00000000;
627 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
628 *cmds++ = 0x00000000;
629
630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
631 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
632 /* SP_FS_OUTPUT_REG */
633 *cmds++ = _SET(SP_IMAGEOUTPUTREG_PAD0, SP_PIXEL_BASED);
634
635 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
636 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
637 /* SP_FS_MRT_REG_0 */
638 *cmds++ = _SET(SP_FSMRTREG_REGID, 1);
639 /* SP_FS_MRT_REG_1 */
640 *cmds++ = 0x00000000;
641 /* SP_FS_MRT_REG_2 */
642 *cmds++ = 0x00000000;
643 /* SP_FS_MRT_REG_3 */
644 *cmds++ = 0x00000000;
645
646 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
647 *cmds++ = CP_REG(A3XX_VPC_ATTR);
648 /* VPC_ATTR */
649 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
650 _SET(VPC_VPCATTR_LMSIZE, 1);
651 /* VPC_PACK */
652 *cmds++ = 0x00000000;
653 /* VPC_VARRYING_INTERUPT_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARRYING_INTERUPT_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARRYING_INTERUPT_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARRYING_INTERUPT_MODE_3 */
660 *cmds++ = 0x00000000;
661 /* VPC_VARYING_PS_REPL_MODE_0 */
662 *cmds++ = 0x00000000;
663 /* VPC_VARYING_PS_REPL_MODE_1 */
664 *cmds++ = 0x00000000;
665 /* VPC_VARYING_PS_REPL_MODE_2 */
666 *cmds++ = 0x00000000;
667 /* VPC_VARYING_PS_REPL_MODE_3 */
668 *cmds++ = 0x00000000;
669
670 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
671 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
672 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
673 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
674 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
675 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
676 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
677
678 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
679 *cmds++ = 0x00000005; *cmds++ = 0x30044b01;
680 /* end; */
681 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
682 /* nop; */
683 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
684 /* nop; */
685 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
686
687 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
688 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
689 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
690 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
691 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
692 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
693 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
694
695 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
696 *cmds++ = 0x00000000; *cmds++ = 0x30244b01;
697 /* end; */
698 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
699 /* nop; */
700 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
701 /* nop; */
702 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
703
704 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
705 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
706 /* RB_MSAA_CONTROL */
707 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
708 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
709
710 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
711 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
712 /* RB_DEPTH_CONTROL */
713 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
714
715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
716 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
717 /* RB_MRT_CONTROL0 */
718 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
719 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
720 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
721 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
722
723 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
724 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
725 /* RB_MRT_BLEND_CONTROL0 */
726 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
727 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
728 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
729 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
730 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
731 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
732 /* RB_MRT_CONTROL1 */
733 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
734 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
735 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
736
737 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
738 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
739 /* RB_MRT_BLEND_CONTROL1 */
740 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
741 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
742 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
743 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
744 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
745 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
746 /* RB_MRT_CONTROL2 */
747 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
748 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
749 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
750
751 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
752 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
753 /* RB_MRT_BLEND_CONTROL2 */
754 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
755 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
756 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
757 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
758 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
759 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
760 /* RB_MRT_CONTROL3 */
761 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
762 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
763 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
764
765 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
766 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
767 /* RB_MRT_BLEND_CONTROL3 */
768 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
769 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
770 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
771 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
772 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
773 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
774
775 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
776 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
777 /* VFD_INDEX_MIN */
778 *cmds++ = 0x00000000;
779 /* VFD_INDEX_MAX */
780 *cmds++ = 0xFFFFFFFF;
781 /* VFD_INSTANCEID_OFFSET */
782 *cmds++ = 0x00000000;
783 /* VFD_INDEX_OFFSET */
784 *cmds++ = 0x00000000;
785
786 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
787 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
788 /* VFD_VS_THREADING_THRESHOLD */
789 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
790 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
791
792 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
793 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
794 /* TPL1_TP_VS_TEX_OFFSET */
795 *cmds++ = 0;
796
797 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
798 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
799 /* TPL1_TP_FS_TEX_OFFSET */
800 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
801 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
802 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
803
804 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
805 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
806 /* PC_PRIM_VTX_CNTL */
807 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
808 PC_DRAW_TRIANGLES) |
809 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
810 PC_DRAW_TRIANGLES) |
811 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
812
813 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
814 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
815 /* GRAS_SC_WINDOW_SCISSOR_TL */
816 *cmds++ = 0x00000000;
817 /* GRAS_SC_WINDOW_SCISSOR_BR */
818 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
819 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
820
821 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
822 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
823 /* GRAS_SC_SCREEN_SCISSOR_TL */
824 *cmds++ = 0x00000000;
825 /* GRAS_SC_SCREEN_SCISSOR_BR */
826 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
827 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
828
829 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
830 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
831 /* GRAS_CL_VPORT_XOFFSET */
832 *cmds++ = 0x00000000;
833 /* GRAS_CL_VPORT_XSCALE */
834 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
835 /* GRAS_CL_VPORT_YOFFSET */
836 *cmds++ = 0x00000000;
837 /* GRAS_CL_VPORT_YSCALE */
838 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
839
840 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
841 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
842 /* GRAS_CL_VPORT_ZOFFSET */
843 *cmds++ = 0x00000000;
844 /* GRAS_CL_VPORT_ZSCALE */
845 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
846
847 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
848 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
849 /* GRAS_CL_CLIP_CNTL */
850 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
851 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
852 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
853 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
854 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
855
856 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
857 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
858 /* GRAS_CL_GB_CLIP_ADJ */
859 *cmds++ = 0x00000000;
860
861 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
862 *cmds++ = 0x00000000;
863
864 /*
865 * Resolve using two draw calls with a dummy register
866 * write in between. This is a HLM workaround
867 * that should be removed later.
868 */
869 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
870 *cmds++ = 0x00000000; /* Viz query info */
871 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
872 PC_DI_SRC_SEL_IMMEDIATE,
873 PC_DI_INDEX_SIZE_32_BIT,
874 PC_DI_IGNORE_VISIBILITY);
875 *cmds++ = 0x00000003; /* Num indices */
876 *cmds++ = 0x00000000; /* Index 0 */
877 *cmds++ = 0x00000001; /* Index 1 */
878 *cmds++ = 0x00000002; /* Index 2 */
879
880 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
881 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
882 *cmds++ = 0x00000000;
883
884 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
885 *cmds++ = 0x00000000; /* Viz query info */
886 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
887 PC_DI_SRC_SEL_IMMEDIATE,
888 PC_DI_INDEX_SIZE_32_BIT,
889 PC_DI_IGNORE_VISIBILITY);
890 *cmds++ = 0x00000003; /* Num indices */
891 *cmds++ = 0x00000002; /* Index 0 */
892 *cmds++ = 0x00000001; /* Index 1 */
893 *cmds++ = 0x00000003; /* Index 2 */
894
895 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
896 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
897 *cmds++ = 0x00000000;
898
899 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
900 *cmds++ = 0x00000000;
901
902 /* Create indirect buffer command for above command sequence */
903 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
904
905 return cmds;
906}
907
908static void build_shader_save_cmds(struct adreno_device *adreno_dev,
909 struct adreno_context *drawctxt)
910{
911 unsigned int *cmd = tmp_ctx.cmd;
912 unsigned int *start;
913
914 /* Reserve space for boolean values used for COND_EXEC packet */
915 drawctxt->cond_execs[0].hostptr = cmd;
916 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
917 *cmd++ = 0;
918 drawctxt->cond_execs[1].hostptr = cmd;
919 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
920 *cmd++ = 0;
921
922 drawctxt->shader_save_commands[0].hostptr = cmd;
923 drawctxt->shader_save_commands[0].gpuaddr =
924 virt2gpu(cmd, &drawctxt->gpustate);
925 *cmd++ = 0;
926 drawctxt->shader_save_commands[1].hostptr = cmd;
927 drawctxt->shader_save_commands[1].gpuaddr =
928 virt2gpu(cmd, &drawctxt->gpustate);
929 *cmd++ = 0;
930
931 start = cmd;
932
933 /* Save vertex shader */
934
935 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
936 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
937 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
938 *cmd++ = 0x0000FFFF;
939 *cmd++ = 3; /* EXEC_COUNT */
940
941 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
942 drawctxt->shader_save_commands[2].hostptr = cmd;
943 drawctxt->shader_save_commands[2].gpuaddr =
944 virt2gpu(cmd, &drawctxt->gpustate);
945 /*
946 From fixup:
947
948 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
949
950 From regspec:
951 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
952 If bit31 is 1, it means overflow
953 or any long shader.
954
955 src = (HLSQ_SHADOW_BASE + 0x1000)/4
956 */
957 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
958 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
959
960 /* Save fragment shader */
961 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
962 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
963 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
964 *cmd++ = 0x0000FFFF;
965 *cmd++ = 3; /* EXEC_COUNT */
966
967 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
968 drawctxt->shader_save_commands[3].hostptr = cmd;
969 drawctxt->shader_save_commands[3].gpuaddr =
970 virt2gpu(cmd, &drawctxt->gpustate);
971 /*
972 From fixup:
973
974 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
975
976 From regspec:
977 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
978 If bit31 is 1, it means overflow
979 or any long shader.
980
981 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
982 From regspec:
983
984 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
985 First instruction of the whole shader will be stored from
986 the offset in instruction cache, unit = 256bits, a cache line.
987 It can start from 0 if no VS available.
988
989 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
990 */
991 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
992 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
993 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
994
995 /* Create indirect buffer command for above command sequence */
996 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
997
998 tmp_ctx.cmd = cmd;
999}
1000
1001/*
1002 * Make an IB to modify context save IBs with the correct shader instruction
1003 * and constant sizes and offsets.
1004 */
1005
1006static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1007 struct adreno_context *drawctxt)
1008{
1009 unsigned int *cmd = tmp_ctx.cmd;
1010 unsigned int *start = cmd;
1011
1012 /* Flush HLSQ lazy updates */
1013 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1014 *cmd++ = 0x7; /* HLSQ_FLUSH */
1015 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1016 *cmd++ = 0;
1017
1018 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1019 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1020 *cmd++ = (unsigned int)
1021 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1022 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1023 0; /* No end addr for full invalidate */
1024
1025 /* Make sure registers are flushed */
1026 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1027 *cmd++ = 0;
1028
1029#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1030
1031 /* Save shader sizes */
1032 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1033 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1034 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1035
1036 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1037 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1038 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1039
1040 /* Save shader offsets */
1041 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1042 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1043 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1044
1045 /* Save constant sizes */
1046 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1047 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1048 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1049 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1050 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1051 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1052
1053 /* Save FS constant offset */
1054 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1055 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1056 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1057
1058
1059 /* Save VS instruction store mode */
1060 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1061 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1062 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1063
1064 /* Save FS instruction store mode */
1065 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1066 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1067 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1068#else
1069
1070 /* Shader save */
1071 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1072 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1073 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1074 drawctxt->shader_save_commands[2].gpuaddr);
1075
1076 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1077 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1078 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1079 *cmd++ = 0x00000000; /* AND value */
1080 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1081 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1082 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1083 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1084 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1085 A3XX_CP_SCRATCH_REG2;
1086 *cmd++ = 0x7f000000; /* AND value */
1087 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1088
1089 /*
1090 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1091 * SP_FS_OBJ_OFFSET_REG
1092 */
1093
1094 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1095 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1096 *cmd++ = 0x00000000; /* AND value */
1097 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1098 /*
1099 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1100 * 0x00000000
1101 */
1102 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1103 *cmd++ = A3XX_CP_SCRATCH_REG3;
1104 *cmd++ = 0xfe000000; /* AND value */
1105 *cmd++ = 0x00000000; /* OR value */
1106 /*
1107 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1108 */
1109 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1110 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1111 *cmd++ = 0xffffffff; /* AND value */
1112 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1113
1114 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1115 *cmd++ = A3XX_CP_SCRATCH_REG2;
1116 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1117
1118 /* Constant save */
1119 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001120 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1121 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001122 drawctxt->constant_save_commands[1].gpuaddr);
1123
1124 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001125 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1126 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001127 drawctxt->constant_save_commands[2].gpuaddr);
1128
1129 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1130 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1131 drawctxt->constant_save_commands[2].gpuaddr
1132 + sizeof(unsigned int));
1133
1134 /* Modify constant save conditionals */
1135 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1136 0, 0, drawctxt->cond_execs[2].gpuaddr);
1137
1138 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1139 0, 0, drawctxt->cond_execs[3].gpuaddr);
1140
1141 /* Save VS instruction store mode */
1142
1143 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1144 31, 0, drawctxt->cond_execs[0].gpuaddr);
1145
1146 /* Save FS instruction store mode */
1147 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1148 31, 0, drawctxt->cond_execs[1].gpuaddr);
1149
1150#endif
1151
1152 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1153
1154 tmp_ctx.cmd = cmd;
1155}
1156
1157/****************************************************************************/
1158/* Functions to build context restore IBs */
1159/****************************************************************************/
1160
1161static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1162 struct adreno_context *drawctxt,
1163 struct gmem_shadow_t *shadow)
1164{
1165 unsigned int *cmds = tmp_ctx.cmd;
1166 unsigned int *start = cmds;
1167
Jordan Crousefb3012f2012-06-22 13:11:05 -06001168 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
1169 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
1170
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001171 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1172 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1173 /* HLSQ_CONTROL_0_REG */
1174 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1175 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1176 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1177 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1178 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1179 /* HLSQ_CONTROL_1_REG */
1180 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1181 /* HLSQ_CONTROL_2_REG */
1182 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1183 /* HLSQ_CONTROL3_REG */
1184 *cmds++ = 0x00000000;
1185
1186 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1187 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1188 /* RB_MRT_BUF_INFO0 */
1189 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1190 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1191 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1192 (shadow->gmem_pitch * 4 * 8) / 256);
1193 /* RB_MRT_BUF_BASE0 */
1194 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1195
1196 /* Texture samplers */
1197 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1198 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1199 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1200 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1201 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1202 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1203 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1204 *cmds++ = 0x00000240;
1205 *cmds++ = 0x00000000;
1206
1207 /* Texture memobjs */
1208 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1209 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1210 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1211 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1212 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1213 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1214 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1215 *cmds++ = 0x4cc06880;
1216 *cmds++ = shadow->height | (shadow->width << 14);
1217 *cmds++ = (shadow->pitch*4*8) << 9;
1218 *cmds++ = 0x00000000;
1219
1220 /* Mipmap bases */
1221 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1222 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1223 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1224 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1225 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1226 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1227 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1228 *cmds++ = shadow->gmemshadow.gpuaddr;
1229 *cmds++ = 0x00000000;
1230 *cmds++ = 0x00000000;
1231 *cmds++ = 0x00000000;
1232 *cmds++ = 0x00000000;
1233 *cmds++ = 0x00000000;
1234 *cmds++ = 0x00000000;
1235 *cmds++ = 0x00000000;
1236 *cmds++ = 0x00000000;
1237 *cmds++ = 0x00000000;
1238 *cmds++ = 0x00000000;
1239 *cmds++ = 0x00000000;
1240 *cmds++ = 0x00000000;
1241 *cmds++ = 0x00000000;
1242
1243 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1244 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1245 /* HLSQ_VS_CONTROL_REG */
1246 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1247 /* HLSQ_FS_CONTROL_REG */
1248 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1249 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1250 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1251 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1252 *cmds++ = 0x00000000;
1253 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1254 *cmds++ = 0x00000000;
1255
1256 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1257 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1258 /* SP_FS_LENGTH_REG */
1259 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1260
1261 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1262 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1263 /* SP_VS_CTRL_REG0 */
1264 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1265 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1266 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1267 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1268 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1269 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1270 /* SP_VS_CTRL_REG1 */
1271 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1272 /* SP_VS_PARAM_REG */
1273 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1274 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1275 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1276 /* SP_VS_OUT_REG0 */
1277 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1278 /* SP_VS_OUT_REG1 */
1279 *cmds++ = 0x00000000;
1280 /* SP_VS_OUT_REG2 */
1281 *cmds++ = 0x00000000;
1282 /* SP_VS_OUT_REG3 */
1283 *cmds++ = 0x00000000;
1284 /* SP_VS_OUT_REG4 */
1285 *cmds++ = 0x00000000;
1286 /* SP_VS_OUT_REG5 */
1287 *cmds++ = 0x00000000;
1288 /* SP_VS_OUT_REG6 */
1289 *cmds++ = 0x00000000;
1290 /* SP_VS_OUT_REG7 */
1291 *cmds++ = 0x00000000;
1292
1293 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1294 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1295 /* SP_VS_VPC_DST_REG0 */
1296 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1297 /* SP_VS_VPC_DST_REG1 */
1298 *cmds++ = 0x00000000;
1299 /* SP_VS_VPC_DST_REG2 */
1300 *cmds++ = 0x00000000;
1301 /* SP_VS_VPC_DST_REG3 */
1302 *cmds++ = 0x00000000;
1303 /* SP_VS_OBJ_OFFSET_REG */
1304 *cmds++ = 0x00000000;
1305 /* SP_VS_OBJ_START_REG */
1306 *cmds++ = 0x00000000;
1307
1308 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1309 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1310 /* SP_VS_LENGTH_REG */
1311 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1312 /* SP_FS_CTRL_REG0 */
1313 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1314 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1315 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1316 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1317 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1318 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1319 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1320 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1321 /* SP_FS_CTRL_REG1 */
1322 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1323 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1324 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1325 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001326 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1327 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001328 /* SP_FS_OBJ_START_REG */
1329 *cmds++ = 0x00000000;
1330
1331 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1332 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1333 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1334 *cmds++ = 0x00000000;
1335 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1336 *cmds++ = 0x00000000;
1337
1338 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1339 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1340 /* SP_FS_OUT_REG */
1341 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1342
Jordan Crousea7ec4212012-02-04 10:23:52 -07001343 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001344 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1345 /* SP_FS_MRT_REG0 */
1346 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1347 /* SP_FS_MRT_REG1 */
1348 *cmds++ = 0;
1349 /* SP_FS_MRT_REG2 */
1350 *cmds++ = 0;
1351 /* SP_FS_MRT_REG3 */
1352 *cmds++ = 0;
1353
1354 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1355 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1356 /* VPC_ATTR */
1357 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1358 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1359 _SET(VPC_VPCATTR_LMSIZE, 1);
1360 /* VPC_PACK */
1361 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1362 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1363 /* VPC_VARYING_INTERP_MODE_0 */
1364 *cmds++ = 0x00000000;
1365 /* VPC_VARYING_INTERP_MODE1 */
1366 *cmds++ = 0x00000000;
1367 /* VPC_VARYING_INTERP_MODE2 */
1368 *cmds++ = 0x00000000;
1369 /* VPC_VARYING_IINTERP_MODE3 */
1370 *cmds++ = 0x00000000;
1371 /* VPC_VARRYING_PS_REPL_MODE_0 */
1372 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1373 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1374 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1375 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1376 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1377 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1378 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1379 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1380 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1381 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1382 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1383 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1384 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1385 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1386 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1387 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1388 /* VPC_VARRYING_PS_REPL_MODE_1 */
1389 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1390 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1391 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1392 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1393 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1394 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1395 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1396 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1397 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1398 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1399 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1400 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1401 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1402 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1403 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1404 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1405 /* VPC_VARRYING_PS_REPL_MODE_2 */
1406 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1407 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1408 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1409 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1410 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1411 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1412 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1413 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1414 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1415 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1416 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1417 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1418 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1419 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1420 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1421 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1422 /* VPC_VARRYING_PS_REPL_MODE_3 */
1423 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1424 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1425 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1426 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1427 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1428 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1429 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1430 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1431 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1432 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1433 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1434 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1435 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1436 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1437 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1438 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1439
Jordan Crousea7ec4212012-02-04 10:23:52 -07001440 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001441 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1442 /* SP_SP_CTRL_REG */
1443 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1444
1445 /* Load vertex shader */
1446 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1447 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1448 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1449 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1450 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1451 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1452 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1453 /* (sy)end; */
1454 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1455 /* nop; */
1456 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1457 /* nop; */
1458 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1459 /* nop; */
1460 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1461
1462 /* Load fragment shader */
1463 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1464 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1465 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1466 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1467 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1468 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1469 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1470 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1471 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1472 /* (rpt5)nop; */
1473 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1474 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1475 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1476 /* (sy)mov.f32f32 r1.x, r0.x; */
1477 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1478 /* mov.f32f32 r1.y, r0.y; */
1479 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1480 /* mov.f32f32 r1.z, r0.z; */
1481 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1482 /* mov.f32f32 r1.w, r0.w; */
1483 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1484 /* end; */
1485 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1486
1487 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1488 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1489 /* VFD_CONTROL_0 */
1490 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1491 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1492 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1493 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1494 /* VFD_CONTROL_1 */
1495 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1496 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1497 _SET(VFD_CTRLREG1_REGID4INST, 252);
1498
1499 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1500 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1501 /* VFD_FETCH_INSTR_0_0 */
1502 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1503 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1504 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1505 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1506 /* VFD_FETCH_INSTR_1_0 */
1507 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1508 shadow->quad_vertices_restore.gpuaddr);
1509 /* VFD_FETCH_INSTR_0_1 */
1510 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1511 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1512 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1513 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1514 /* VFD_FETCH_INSTR_1_1 */
1515 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1516 shadow->quad_vertices_restore.gpuaddr + 16);
1517
1518 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1519 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1520 /* VFD_DECODE_INSTR_0 */
1521 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1522 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1523 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1524 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1525 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1526 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1527 /* VFD_DECODE_INSTR_1 */
1528 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1529 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1530 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1531 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1532 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1533 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1534
1535 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1536 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1537 /* RB_DEPTH_CONTROL */
1538 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1539
1540 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1541 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1542 /* RB_STENCIL_CONTROL */
1543 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1544 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1545 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1546 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1547 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1548 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1549 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1550 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1551
1552 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1553 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1554 /* RB_MODE_CONTROL */
1555 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1556 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1557
1558 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1559 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1560 /* RB_RENDER_CONTROL */
1561 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1562 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1563
1564 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1565 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1566 /* RB_MSAA_CONTROL */
1567 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1568 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1569
1570 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1571 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1572 /* RB_MRT_CONTROL0 */
1573 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1574 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1575 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1576 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1577
1578 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1579 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1580 /* RB_MRT_BLENDCONTROL0 */
1581 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1582 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1583 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1584 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1585 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1586 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1587 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1588 /* RB_MRT_CONTROL1 */
1589 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1590 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1591 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1592
1593 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1594 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1595 /* RB_MRT_BLENDCONTROL1 */
1596 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1597 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1598 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1599 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1600 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1601 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1602 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1603 /* RB_MRT_CONTROL2 */
1604 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1605 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1606 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1607
1608 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1609 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1610 /* RB_MRT_BLENDCONTROL2 */
1611 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1612 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1613 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1614 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1615 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1616 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1617 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1618 /* RB_MRT_CONTROL3 */
1619 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1620 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1621 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1622
1623 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1624 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1625 /* RB_MRT_BLENDCONTROL3 */
1626 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1627 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1628 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1629 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1630 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1631 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1632 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1633
1634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1635 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1636 /* VFD_INDEX_MIN */
1637 *cmds++ = 0x00000000;
1638 /* VFD_INDEX_MAX */
1639 *cmds++ = 0xFFFFFFFF;
1640 /* VFD_INDEX_OFFSET */
1641 *cmds++ = 0x00000000;
1642 /* TPL1_TP_VS_TEX_OFFSET */
1643 *cmds++ = 0x00000000;
1644
1645 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1646 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1647 /* VFD_VS_THREADING_THRESHOLD */
1648 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1649 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1650
1651 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1652 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1653 /* TPL1_TP_VS_TEX_OFFSET */
1654 *cmds++ = 0x00000000;
1655
1656 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1657 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1658 /* TPL1_TP_FS_TEX_OFFSET */
1659 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1660 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1661 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1662
1663 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1664 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1665 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001666 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1667 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1668 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001669
1670 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1671 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1672 /* GRAS_SU_MODE_CONTROL */
1673 *cmds++ = 0x00000000;
1674
1675 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1676 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1677 /* GRAS_SC_WINDOW_SCISSOR_TL */
1678 *cmds++ = 0x00000000;
1679 /* GRAS_SC_WINDOW_SCISSOR_BR */
1680 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1681 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1682
1683 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1684 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1685 /* GRAS_SC_SCREEN_SCISSOR_TL */
1686 *cmds++ = 0x00000000;
1687 /* GRAS_SC_SCREEN_SCISSOR_BR */
1688 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1689 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1690
1691 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1692 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1693 /* GRAS_CL_VPORT_XOFFSET */
1694 *cmds++ = 0x00000000;
1695 /* GRAS_CL_VPORT_XSCALE */
1696 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1697 /* GRAS_CL_VPORT_YOFFSET */
1698 *cmds++ = 0x00000000;
1699 /* GRAS_CL_VPORT_YSCALE */
1700 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1701
1702 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1703 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1704 /* GRAS_CL_VPORT_ZOFFSET */
1705 *cmds++ = 0x00000000;
1706 /* GRAS_CL_VPORT_ZSCALE */
1707 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1708
1709 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1710 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1711 /* GRAS_CL_CLIP_CNTL */
1712 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1713
1714 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1715 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1716 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1717 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1718
1719 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1720 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1721 /* PC_PRIM_VTX_CONTROL */
1722 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1723 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1724 PC_DRAW_TRIANGLES) |
1725 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1726 PC_DRAW_TRIANGLES) |
1727 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1728
1729 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1730 *cmds++ = 0x00000000; /* Viz query info */
1731 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1732 PC_DI_SRC_SEL_AUTO_INDEX,
1733 PC_DI_INDEX_SIZE_16_BIT,
1734 PC_DI_IGNORE_VISIBILITY);
1735 *cmds++ = 0x00000002; /* Num indices */
1736
1737 /* Create indirect buffer command for above command sequence */
1738 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1739
1740 return cmds;
1741}
1742
1743static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1744 struct adreno_context *drawctxt)
1745{
1746 unsigned int *start = tmp_ctx.cmd;
1747 unsigned int *cmd = start;
1748 unsigned int *lcc_start;
1749
1750 int i;
1751
1752 /* Flush HLSQ lazy updates */
1753 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1754 *cmd++ = 0x7; /* HLSQ_FLUSH */
1755 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1756 *cmd++ = 0;
1757
1758 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1759 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1760 *cmd++ = (unsigned int)
1761 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1762 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1763 0; /* No end addr for full invalidate */
1764
1765 lcc_start = cmd;
1766
1767 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1768 cmd++;
1769
1770#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1771 /* Force mismatch */
1772 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1773#else
1774 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1775#endif
1776
1777 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1778 cmd = reg_range(cmd, context_register_ranges[i * 2],
1779 context_register_ranges[i * 2 + 1]);
1780 }
1781
1782 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1783 (cmd - lcc_start) - 1);
1784
1785#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1786 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1787#else
1788 lcc_start[2] |= (1 << 24) | (4 << 16);
1789#endif
1790
1791 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1792 *cmd++ = cp_type0_packet(global_registers[i], 1);
1793 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1794 *cmd++ = 0x00000000;
1795 }
1796
1797 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1798 tmp_ctx.cmd = cmd;
1799}
1800
1801static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1802 struct adreno_context *drawctxt)
1803{
1804 unsigned int *cmd = tmp_ctx.cmd;
1805 unsigned int *start = cmd;
1806 unsigned int mode = 4; /* Indirect mode */
1807 unsigned int stateblock;
1808 unsigned int numunits;
1809 unsigned int statetype;
1810
1811 drawctxt->cond_execs[2].hostptr = cmd;
1812 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1813 *cmd++ = 0;
1814 drawctxt->cond_execs[3].hostptr = cmd;
1815 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1816 *cmd++ = 0;
1817
1818#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1819 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1820 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1821 *cmd++ = 4 << 16;
1822 *cmd++ = 0x0;
1823#endif
1824 /* HLSQ full update */
1825 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1826 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1827 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1828
1829#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1830 /* Re-enable shadowing */
1831 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1832 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1833 *cmd++ = (4 << 16) | (1 << 24);
1834 *cmd++ = 0x0;
1835#endif
1836
1837 /* Load vertex shader constants */
1838 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1839 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1840 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1841 *cmd++ = 0x0000ffff;
1842 *cmd++ = 3; /* EXEC_COUNT */
1843 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1844 drawctxt->constant_load_commands[0].hostptr = cmd;
1845 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1846 &drawctxt->gpustate);
1847
1848 /*
1849 From fixup:
1850
1851 mode = 4 (indirect)
1852 stateblock = 4 (Vertex constants)
1853 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1854
1855 From register spec:
1856 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1857
1858 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1859 */
1860
1861 *cmd++ = 0; /* ord1 */
1862 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1863
1864 /* Load fragment shader constants */
1865 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1866 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1867 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1868 *cmd++ = 0x0000ffff;
1869 *cmd++ = 3; /* EXEC_COUNT */
1870 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1871 drawctxt->constant_load_commands[1].hostptr = cmd;
1872 drawctxt->constant_load_commands[1].gpuaddr =
1873 virt2gpu(cmd, &drawctxt->gpustate);
1874 /*
1875 From fixup:
1876
1877 mode = 4 (indirect)
1878 stateblock = 6 (Fragment constants)
1879 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1880
1881 From register spec:
1882 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1883
1884 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1885 */
1886
1887 *cmd++ = 0; /* ord1 */
1888 drawctxt->constant_load_commands[2].hostptr = cmd;
1889 drawctxt->constant_load_commands[2].gpuaddr =
1890 virt2gpu(cmd, &drawctxt->gpustate);
1891 /*
1892 From fixup:
1893 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1894 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1895
1896 From register spec:
1897 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1898 start offset in on chip RAM,
1899 128bit aligned
1900
1901 ord2 = base + offset | 1
1902 Because of the base alignment we can use
1903 ord2 = base | offset | 1
1904 */
1905 *cmd++ = 0; /* ord2 */
1906
1907 /* Restore VS texture memory objects */
1908 stateblock = 0;
1909 statetype = 1;
1910 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1911
1912 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1913 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1914 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1915 & 0xfffffffc) | statetype;
1916
1917 /* Restore VS texture mipmap addresses */
1918 stateblock = 1;
1919 statetype = 1;
1920 numunits = TEX_SIZE_MIPMAP / 4;
1921 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1922 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1923 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1924 & 0xfffffffc) | statetype;
1925
1926 /* Restore VS texture sampler objects */
1927 stateblock = 0;
1928 statetype = 0;
1929 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1930 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1931 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1932 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1933 & 0xfffffffc) | statetype;
1934
1935 /* Restore FS texture memory objects */
1936 stateblock = 2;
1937 statetype = 1;
1938 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1939 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1940 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1941 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
1942 & 0xfffffffc) | statetype;
1943
1944 /* Restore FS texture mipmap addresses */
1945 stateblock = 3;
1946 statetype = 1;
1947 numunits = TEX_SIZE_MIPMAP / 4;
1948 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1949 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1950 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
1951 & 0xfffffffc) | statetype;
1952
1953 /* Restore FS texture sampler objects */
1954 stateblock = 2;
1955 statetype = 0;
1956 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1957 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1958 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1959 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
1960 & 0xfffffffc) | statetype;
1961
1962 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
1963 tmp_ctx.cmd = cmd;
1964}
1965
1966static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
1967 struct adreno_context *drawctxt)
1968{
1969 unsigned int *cmd = tmp_ctx.cmd;
1970 unsigned int *start = cmd;
1971
1972 /* Vertex shader */
1973 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1974 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1975 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1976 *cmd++ = 1;
1977 *cmd++ = 3; /* EXEC_COUNT */
1978
1979 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1980 drawctxt->shader_load_commands[0].hostptr = cmd;
1981 drawctxt->shader_load_commands[0].gpuaddr =
1982 virt2gpu(cmd, &drawctxt->gpustate);
1983 /*
1984 From fixup:
1985
1986 mode = 4 (indirect)
1987 stateblock = 4 (Vertex shader)
1988 numunits = SP_VS_CTRL_REG0.VS_LENGTH
1989
1990 From regspec:
1991 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1992 If bit31 is 1, it means overflow
1993 or any long shader.
1994
1995 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
1996 */
1997 *cmd++ = 0; /*ord1 */
1998 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1999
2000 /* Fragment shader */
2001 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2002 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2003 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2004 *cmd++ = 1;
2005 *cmd++ = 3; /* EXEC_COUNT */
2006
2007 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2008 drawctxt->shader_load_commands[1].hostptr = cmd;
2009 drawctxt->shader_load_commands[1].gpuaddr =
2010 virt2gpu(cmd, &drawctxt->gpustate);
2011 /*
2012 From fixup:
2013
2014 mode = 4 (indirect)
2015 stateblock = 6 (Fragment shader)
2016 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2017
2018 From regspec:
2019 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2020 If bit31 is 1, it means overflow
2021 or any long shader.
2022
2023 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2024 */
2025 *cmd++ = 0; /*ord1 */
2026 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2027 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2028
2029 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2030 tmp_ctx.cmd = cmd;
2031}
2032
2033static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2034 struct adreno_context *drawctxt)
2035{
2036 unsigned int *cmd = tmp_ctx.cmd;
2037 unsigned int *start = cmd;
2038
2039 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2040 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2041 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2042 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2043 = virt2gpu(cmd, &drawctxt->gpustate);
2044 *cmd++ = 0;
2045
2046 /* Create indirect buffer command for above command sequence */
2047 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2048
2049 tmp_ctx.cmd = cmd;
2050}
2051
2052/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2053static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2054 struct adreno_context *drawctxt)
2055{
2056 unsigned int *cmd = tmp_ctx.cmd;
2057 unsigned int *start = cmd;
2058
2059#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2060 /* Save shader sizes */
2061 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2062 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2063 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2064
2065 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2066 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2067 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2068
2069 /* Save constant sizes */
2070 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2071 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2072 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2073
2074 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2075 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2076 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2077
2078 /* Save constant offsets */
2079 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2080 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2081 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2082#else
2083 /* Save shader sizes */
2084 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2085 30, (4 << 19) | (4 << 16),
2086 drawctxt->shader_load_commands[0].gpuaddr);
2087
2088 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2089 30, (6 << 19) | (4 << 16),
2090 drawctxt->shader_load_commands[1].gpuaddr);
2091
2092 /* Save constant sizes */
2093 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2094 23, (4 << 19) | (4 << 16),
2095 drawctxt->constant_load_commands[0].gpuaddr);
2096
2097 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2098 23, (6 << 19) | (4 << 16),
2099 drawctxt->constant_load_commands[1].gpuaddr);
2100
2101 /* Modify constant restore conditionals */
2102 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2103 0, 0, drawctxt->cond_execs[2].gpuaddr);
2104
2105 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2106 0, 0, drawctxt->cond_execs[3].gpuaddr);
2107
2108 /* Save fragment constant shadow offset */
2109 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2110 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2111 drawctxt->constant_load_commands[2].gpuaddr);
2112#endif
2113
2114 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2115 discard all the shader data */
2116
2117 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2118 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2119
2120 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2121
2122 tmp_ctx.cmd = cmd;
2123}
2124
2125static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2126 struct adreno_context *drawctxt)
2127{
2128 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2129
2130 build_regrestore_cmds(adreno_dev, drawctxt);
2131 build_constantrestore_cmds(adreno_dev, drawctxt);
2132 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2133 build_regconstantsave_cmds(adreno_dev, drawctxt);
2134 build_shader_save_cmds(adreno_dev, drawctxt);
2135 build_shader_restore_cmds(adreno_dev, drawctxt);
2136 build_restore_fixup_cmds(adreno_dev, drawctxt);
2137 build_save_fixup_cmds(adreno_dev, drawctxt);
2138
2139 return 0;
2140}
2141
2142/* create buffers for saving/restoring registers, constants, & GMEM */
2143static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2144 struct adreno_context *drawctxt)
2145{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002146 int result;
2147
Jordan Crouse7501d452012-04-19 08:58:44 -06002148 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2149 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002150
Jordan Crousea7ec4212012-02-04 10:23:52 -07002151 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2152 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002153
Jordan Crousea7ec4212012-02-04 10:23:52 -07002154 if (result)
2155 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002156
2157 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2158 &tmp_ctx.cmd);
2159
2160 /* Dow we need to idle? */
2161 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2162
2163 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2164 &drawctxt->context_gmem_shadow);
2165 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2166 &drawctxt->context_gmem_shadow);
2167
2168 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2169 KGSL_CACHE_OP_FLUSH);
2170
Jordan Crousea7ec4212012-02-04 10:23:52 -07002171 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2172
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002173 return 0;
2174}
2175
2176static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2177 struct adreno_context *drawctxt)
2178{
2179 int ret;
2180
2181 /*
2182 * Allocate memory for the GPU state and the context commands.
2183 * Despite the name, this is much more then just storage for
2184 * the gpustate. This contains command space for gmem save
2185 * and texture and vertex buffer storage too
2186 */
2187
2188 ret = kgsl_allocate(&drawctxt->gpustate,
2189 drawctxt->pagetable, CONTEXT_SIZE);
2190
2191 if (ret)
2192 return ret;
2193
2194 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2195 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2196
2197 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2198 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2199 if (ret)
2200 goto done;
2201
2202 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2203 }
2204
2205 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2206 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2207
2208done:
2209 if (ret)
2210 kgsl_sharedmem_free(&drawctxt->gpustate);
2211
2212 return ret;
2213}
2214
2215static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2216 struct adreno_context *context)
2217{
2218 struct kgsl_device *device = &adreno_dev->dev;
2219
2220 if (context == NULL)
2221 return;
2222
2223 if (context->flags & CTXT_FLAGS_GPU_HANG)
2224 KGSL_CTXT_WARN(device,
2225 "Current active context has caused gpu hang\n");
2226
2227 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2228 /* Fixup self modifying IBs for save operations */
2229 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2230 context->save_fixup, 3);
2231
2232 /* save registers and constants. */
2233 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2234 context->regconstant_save, 3);
2235
2236 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2237 /* Save shader instructions */
2238 adreno_ringbuffer_issuecmds(device,
2239 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2240
2241 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2242 }
2243 }
2244
2245 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2246 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2247 /*
2248 * Save GMEM (note: changes shader. shader must
2249 * already be saved.)
2250 */
2251
2252 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2253 context->context_gmem_shadow.
2254 gmem_save, 3);
2255 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2256 }
2257}
2258
2259static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2260 struct adreno_context *context)
2261{
2262 struct kgsl_device *device = &adreno_dev->dev;
2263 unsigned int cmds[5];
2264
2265 if (context == NULL) {
2266 /* No context - set the default pagetable and thats it */
Shubhraprakash Das79447952012-04-26 18:12:23 -06002267 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002268 return;
2269 }
2270
2271 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2272
2273 cmds[0] = cp_nop_packet(1);
2274 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2275 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2276 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002277 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2278 cmds[4] = context->id;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002279 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE, cmds, 5);
Shubhraprakash Das79447952012-04-26 18:12:23 -06002280 kgsl_mmu_setstate(&device->mmu, context->pagetable);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002281
2282 /*
2283 * Restore GMEM. (note: changes shader.
2284 * Shader must not already be restored.)
2285 */
2286
2287 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
2288 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
2289 context->context_gmem_shadow.
2290 gmem_restore, 3);
2291 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2292 }
2293
2294 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2295 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2296 context->reg_restore, 3);
2297
2298 /* Fixup self modifying IBs for restore operations */
2299 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2300 context->restore_fixup, 3);
2301
2302 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2303 context->constant_restore, 3);
2304
2305 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
2306 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2307 context->shader_restore, 3);
2308
2309 /* Restore HLSQ_CONTROL_0 register */
2310 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
2311 context->hlsqcontrol_restore, 3);
2312 }
2313}
2314
2315static void a3xx_rb_init(struct adreno_device *adreno_dev,
2316 struct adreno_ringbuffer *rb)
2317{
2318 unsigned int *cmds, cmds_gpu;
2319 cmds = adreno_ringbuffer_allocspace(rb, 18);
2320 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2321
2322 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2323 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2324 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2325 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2326 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2327 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2328 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2329 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2330 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2331 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2332 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2333 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2334 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2335 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2336 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2337 /* Protected mode control - turned off for A3XX */
2338 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2339 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2340 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2341
2342 adreno_ringbuffer_submit(rb);
2343}
2344
2345static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2346{
2347 struct kgsl_device *device = &adreno_dev->dev;
2348 const char *err = "";
2349
2350 switch (bit) {
2351 case A3XX_INT_RBBM_AHB_ERROR: {
2352 unsigned int reg;
2353
2354 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2355
2356 /*
2357 * Return the word address of the erroring register so that it
2358 * matches the register specification
2359 */
2360
2361 KGSL_DRV_CRIT(device,
2362 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2363 reg & (1 << 28) ? "WRITE" : "READ",
2364 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2365 (reg >> 24) & 0x3);
2366
2367 /* Clear the error */
2368 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2369 return;
2370 }
2371 case A3XX_INT_RBBM_REG_TIMEOUT:
2372 err = "RBBM: AHB register timeout";
2373 break;
2374 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2375 err = "RBBM: ME master split timeout";
2376 break;
2377 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2378 err = "RBBM: PFP master split timeout";
2379 break;
2380 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2381 err = "RBBM: ATB bus oveflow";
2382 break;
2383 case A3XX_INT_VFD_ERROR:
2384 err = "VFD: Out of bounds access";
2385 break;
2386 case A3XX_INT_CP_T0_PACKET_IN_IB:
2387 err = "ringbuffer TO packet in IB interrupt";
2388 break;
2389 case A3XX_INT_CP_OPCODE_ERROR:
2390 err = "ringbuffer opcode error interrupt";
2391 break;
2392 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2393 err = "ringbuffer reserved bit error interrupt";
2394 break;
2395 case A3XX_INT_CP_HW_FAULT:
2396 err = "ringbuffer hardware fault";
2397 break;
2398 case A3XX_INT_CP_REG_PROTECT_FAULT:
2399 err = "ringbuffer protected mode error interrupt";
2400 break;
2401 case A3XX_INT_CP_AHB_ERROR_HALT:
2402 err = "ringbuffer AHB error interrupt";
2403 break;
2404 case A3XX_INT_MISC_HANG_DETECT:
2405 err = "MISC: GPU hang detected";
2406 break;
2407 case A3XX_INT_UCHE_OOB_ACCESS:
2408 err = "UCHE: Out of bounds access";
2409 break;
2410 }
2411
2412 KGSL_DRV_CRIT(device, "%s\n", err);
2413 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2414}
2415
2416static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2417{
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002418 struct kgsl_device *device = &adreno_dev->dev;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002419
2420 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002421 unsigned int context_id;
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002422 kgsl_sharedmem_readl(&device->memstore, &context_id,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002423 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2424 current_context));
2425 if (context_id < KGSL_MEMSTORE_MAX) {
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002426 kgsl_sharedmem_writel(&device->memstore,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002427 KGSL_MEMSTORE_OFFSET(context_id,
2428 ts_cmp_enable), 0);
2429 wmb();
2430 }
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002431 KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002432 }
2433
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002434 wake_up_interruptible_all(&device->wait_queue);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002435
2436 /* Schedule work to free mem and issue ibs */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002437 queue_work(device->work_queue, &device->ts_expired_ws);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002438
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002439 atomic_notifier_call_chain(&device->ts_notifier_list,
2440 device->id, NULL);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002441}
2442
2443#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2444
2445#define A3XX_INT_MASK \
2446 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002447 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002448 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2449 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2450 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2451 (1 << A3XX_INT_CP_HW_FAULT) | \
2452 (1 << A3XX_INT_CP_IB1_INT) | \
2453 (1 << A3XX_INT_CP_IB2_INT) | \
2454 (1 << A3XX_INT_CP_RB_INT) | \
2455 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2456 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002457 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2458
2459static struct {
2460 void (*func)(struct adreno_device *, int);
2461} a3xx_irq_funcs[] = {
2462 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2463 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2464 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2465 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2466 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2467 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2468 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2469 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2470 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2471 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2472 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2473 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2474 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2475 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2476 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2477 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2478 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2479 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2480 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2481 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2482 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2483 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2484 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2485 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002486 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002487 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2488 /* 26 to 31 - Unused */
2489};
2490
2491static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2492{
2493 struct kgsl_device *device = &adreno_dev->dev;
2494 irqreturn_t ret = IRQ_NONE;
2495 unsigned int status, tmp;
2496 int i;
2497
2498 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2499
2500 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2501 if (tmp & 1) {
2502 if (a3xx_irq_funcs[i].func != NULL) {
2503 a3xx_irq_funcs[i].func(adreno_dev, i);
2504 ret = IRQ_HANDLED;
2505 } else {
2506 KGSL_DRV_CRIT(device,
2507 "Unhandled interrupt bit %x\n", i);
2508 }
2509 }
2510
2511 tmp >>= 1;
2512 }
2513
Carter Cooperb769c912012-04-13 08:16:35 -06002514 trace_kgsl_a3xx_irq_status(device, status);
2515
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002516 if (status)
2517 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2518 status);
2519 return ret;
2520}
2521
2522static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2523{
2524 struct kgsl_device *device = &adreno_dev->dev;
2525
Wei Zou08a7e572012-06-03 22:05:46 -07002526 if (state)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002527 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
Wei Zou08a7e572012-06-03 22:05:46 -07002528 else
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002529 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2530}
2531
2532static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2533{
2534 struct kgsl_device *device = &adreno_dev->dev;
2535 unsigned int reg, val;
2536
2537 /* Freeze the counter */
2538 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2539 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2540 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2541
2542 /* Read the value */
2543 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2544
2545 /* Reset the counter */
2546 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2547 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2548
2549 /* Re-enable the counter */
2550 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2551 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2552 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2553
2554 return val;
2555}
2556
2557static void a3xx_start(struct adreno_device *adreno_dev)
2558{
2559 struct kgsl_device *device = &adreno_dev->dev;
2560
2561 /* Reset the core */
2562 adreno_regwrite(device, A3XX_RBBM_SW_RESET_CMD,
2563 0x00000001);
2564 msleep(20);
2565
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002566 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002567
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002568 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2569 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2570 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2571 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2572 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2573 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2574 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2575
2576 /* Enable WR-REQ */
2577 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2578
2579 /* Set up round robin arbitration between both AXI ports */
2580 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2581
2582 /* Set up AOOO */
2583 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2584 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2585
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302586 if (cpu_is_apq8064()) {
2587 /* Enable 1K sort */
2588 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2589 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2590 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002591 /* Make all blocks contribute to the GPU BUSY perf counter */
2592 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2593
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002594 /* Tune the hystersis counters for SP and CP idle detection */
2595 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2596 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2597
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002598 /* Enable the RBBM error reporting bits. This lets us get
2599 useful information on failure */
2600
2601 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2602
2603 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002604 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002605
2606 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002607 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002608
2609 /* Turn on hang detection - this spews a lot of useful information
2610 * into the RBBM registers on a hang */
2611
2612 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2613 (1 << 16) | 0xFFF);
2614
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002615}
2616
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002617/* Defined in adreno_a3xx_snapshot.c */
2618void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2619 int *remain, int hang);
2620
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002621struct adreno_gpudev adreno_a3xx_gpudev = {
2622 .reg_rbbm_status = A3XX_RBBM_STATUS,
2623 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2624 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2625
2626 .ctxt_create = a3xx_drawctxt_create,
2627 .ctxt_save = a3xx_drawctxt_save,
2628 .ctxt_restore = a3xx_drawctxt_restore,
Shubhraprakash Das4624b552012-06-01 14:08:03 -06002629 .ctxt_draw_workaround = NULL,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002630 .rb_init = a3xx_rb_init,
2631 .irq_control = a3xx_irq_control,
2632 .irq_handler = a3xx_irq_handler,
2633 .busy_cycles = a3xx_busy_cycles,
2634 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002635 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002636};