blob: bb89067db48f90d983ec7d256ebf7ce8ce617db7 [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Steve Mucklef132c6c2012-06-06 18:30:57 -070015#include <linux/sched.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053016#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070017
18#include "kgsl.h"
19#include "adreno.h"
20#include "kgsl_sharedmem.h"
21#include "kgsl_cffdump.h"
22#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060023#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070024
Jordan Crouse0c2761a2012-02-01 22:11:12 -070025/*
26 * Set of registers to dump for A3XX on postmortem and snapshot.
27 * Registers in pairs - first value is the start offset, second
28 * is the stop offset (inclusive)
29 */
30
31const unsigned int a3xx_registers[] = {
32 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
33 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
34 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
35 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
36 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
37 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
38 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
39 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070040 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070041 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
42 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
43 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
44 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
45 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
46 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
47 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
48 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
49 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
50 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
51 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
52 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
53 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
54 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
55 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
56 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
57 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
58 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
59 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
60 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
61 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
62 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
63 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
64 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070065 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
66 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
67 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070068};
69
70const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
71
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070072/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
73 * functions.
74 */
75
76#define _SET(_shift, _val) ((_val) << (_shift))
77
78/*
79 ****************************************************************************
80 *
81 * Context state shadow structure:
82 *
83 * +---------------------+------------+-------------+---------------------+---+
84 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
85 * +---------------------+------------+-------------+---------------------+---+
86 *
87 * 8K - ALU Constant Shadow (8K aligned)
88 * 4K - H/W Register Shadow (8K aligned)
89 * 5K - Command and Vertex Buffers
90 * 8K - Shader Instruction Shadow
91 * ~6K - Texture Constant Shadow
92 *
93 *
94 ***************************************************************************
95 */
96
97/* Sizes of all sections in state shadow memory */
98#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
99#define REG_SHADOW_SIZE (4*1024) /* 4KB */
100#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
101#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
102#define TEX_SIZE_MIPMAP 1936 /* bytes */
103#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
104#define TEX_SHADOW_SIZE \
105 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
106 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
107#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
108
109/* Total context size, excluding GMEM shadow */
110#define CONTEXT_SIZE \
111 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
112 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
113 TEX_SHADOW_SIZE)
114
115/* Offsets to different sections in context shadow memory */
116#define REG_OFFSET ALU_SHADOW_SIZE
117#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
118#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
119#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
120#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
121#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
122#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
123#define FS_TEX_OFFSET_MEM_OBJECTS \
124 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
125#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
126#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
127
128/* The offset for fragment shader data in HLSQ context */
129#define SSIZE (16*1024)
130
131#define HLSQ_SAMPLER_OFFSET 0x000
132#define HLSQ_MEMOBJ_OFFSET 0x400
133#define HLSQ_MIPMAP_OFFSET 0x800
134
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700135/* Use shadow RAM */
136#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700137
Jordan Croused0070882012-02-21 08:54:52 -0700138#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700261 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 start = cmd;
270
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700271 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
272 *cmd++ = 0;
273
274#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
275 /*
276 * Context registers are already shadowed; just need to
277 * disable shadowing to prevent corruption.
278 */
279
280 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
281 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
282 *cmd++ = 4 << 16; /* regs, start=0 */
283 *cmd++ = 0x0; /* count = 0 */
284
285#else
286 /*
287 * Make sure the HW context has the correct register values before
288 * reading them.
289 */
290
291 /* Write context registers into shadow */
292 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
293 unsigned int start = context_register_ranges[i * 2];
294 unsigned int end = context_register_ranges[i * 2 + 1];
295 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
296 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
297 start;
298 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
299 & 0xFFFFE000) + (start - 0x2000) * 4;
300 }
301#endif
302
303 /* Need to handle some of the global registers separately */
304 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
305 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
306 *cmd++ = global_registers[i];
307 *cmd++ = tmp_ctx.reg_values[i];
308 }
309
310 /* Save vertex shader constants */
311 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
314 *cmd++ = 0x0000FFFF;
315 *cmd++ = 3; /* EXEC_COUNT */
316 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
317 drawctxt->constant_save_commands[1].hostptr = cmd;
318 drawctxt->constant_save_commands[1].gpuaddr =
319 virt2gpu(cmd, &drawctxt->gpustate);
320 /*
321 From fixup:
322
323 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
324 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
325
326 From register spec:
327 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
328 */
329 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
330 /* ALU constant shadow base */
331 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
332
333 /* Save fragment shader constants */
334 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
337 *cmd++ = 0x0000FFFF;
338 *cmd++ = 3; /* EXEC_COUNT */
339 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
340 drawctxt->constant_save_commands[2].hostptr = cmd;
341 drawctxt->constant_save_commands[2].gpuaddr =
342 virt2gpu(cmd, &drawctxt->gpustate);
343 /*
344 From fixup:
345
346 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
347 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
348
349 From register spec:
350 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
351 */
352 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
353
354 /*
355 From fixup:
356
357 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
358 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
359
360 From register spec:
361 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
362 start offset in on chip RAM,
363 128bit aligned
364
365 dst = base + offset
366 Because of the base alignment we can use
367 dst = base | offset
368 */
369 *cmd++ = 0; /* dst */
370
371 /* Save VS texture memory objects */
372 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
373 *cmd++ =
374 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
375 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
376 *cmd++ =
377 (drawctxt->gpustate.gpuaddr +
378 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
379
380 /* Save VS texture mipmap pointers */
381 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
382 *cmd++ =
383 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
384 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
385 *cmd++ =
386 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
387
388 /* Save VS texture sampler objects */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
391 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
392 *cmd++ =
393 (drawctxt->gpustate.gpuaddr +
394 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
395
396 /* Save FS texture memory objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ =
399 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
400 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
401 *cmd++ =
402 (drawctxt->gpustate.gpuaddr +
403 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
404
405 /* Save FS texture mipmap pointers */
406 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
407 *cmd++ =
408 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
409 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
410 *cmd++ =
411 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
412
413 /* Save FS texture sampler objects */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr +
420 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
421
422 /* Create indirect buffer command for above command sequence */
423 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
424
425 tmp_ctx.cmd = cmd;
426}
427
428/* Copy GMEM contents to system memory shadow. */
429static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
430 struct adreno_context *drawctxt,
431 struct gmem_shadow_t *shadow)
432{
433 unsigned int *cmds = tmp_ctx.cmd;
434 unsigned int *start = cmds;
435
Jordan Crousefb3012f2012-06-22 13:11:05 -0600436 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
437 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
438
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700439 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
440 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
441
442 /* RB_MODE_CONTROL */
443 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
444 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
445 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
446 /* RB_RENDER_CONTROL */
447 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
448 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
449
450 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
451 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
452 /* RB_COPY_CONTROL */
453 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
454 RB_CLEAR_MODE_RESOLVE) |
455 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
456 tmp_ctx.gmem_base >> 14);
457 /* RB_COPY_DEST_BASE */
458 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
459 shadow->gmemshadow.gpuaddr >> 5);
460 /* RB_COPY_DEST_PITCH */
461 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
462 (shadow->pitch * 4) / 32);
463 /* RB_COPY_DEST_INFO */
464 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
465 RB_TILINGMODE_LINEAR) |
466 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
467 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
468 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
471 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
472 /* GRAS_SC_CONTROL */
473 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
474
475 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
476 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
477 /* VFD_CONTROL_0 */
478 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
479 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
480 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
481 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
482 /* VFD_CONTROL_1 */
483 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
484 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
485 _SET(VFD_CTRLREG1_REGID4INST, 252);
486
487 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
488 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
489 /* VFD_FETCH_INSTR_0_0 */
490 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
491 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
492 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
493 /* VFD_FETCH_INSTR_1_0 */
494 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
495 shadow->quad_vertices.gpuaddr);
496
497 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
498 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
499 /* VFD_DECODE_INSTR_0 */
500 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
501 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
502 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700503 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
504 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
505
506 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
507 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
508 /* HLSQ_CONTROL_0_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700509 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700510 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700511 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700513 /* HLSQ_CONTROL_1_REG */
514 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700515 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700516 /* HLSQ_CONTROL_2_REG */
517 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
518 /* HLSQ_CONTROL_3_REG */
519 *cmds++ = 0x00000000;
520
521 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
522 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
523 /* HLSQ_VS_CONTROL_REG */
524 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
525 /* HLSQ_FS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700527 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700528 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
529 /* HLSQ_CONST_VSPRESV_RANGE_REG */
530 *cmds++ = 0x00000000;
531 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
532 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
533 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
534
535 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
536 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
537 /* SP_FS_LENGTH_REG */
538 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
539
540 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
541 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
542 /* SP_SP_CTRL_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700543 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
544 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700545
546 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
547 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
548 /* SP_VS_CTRL_REG0 */
549 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
550 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
551 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700552 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700553 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
554 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
555 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
556 /* SP_VS_CTRL_REG1 */
557 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
558 /* SP_VS_PARAM_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700559 *cmds++ = _SET(SP_VSPARAMREG_PSIZEREGID, 252);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700560 /* SP_VS_OUT_REG_0 */
561 *cmds++ = 0x00000000;
562 /* SP_VS_OUT_REG_1 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_2 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_3 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_4 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_5 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_6 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_7 */
575 *cmds++ = 0x00000000;
576
577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
578 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
579 /* SP_VS_VPC_DST_REG_0 */
580 *cmds++ = 0x00000000;
581 /* SP_VS_VPC_DST_REG_1 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_2 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_3 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_OBJ_OFFSET_REG */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_START_REG */
590 *cmds++ = 0x00000000;
591
592 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
593 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
594 /* SP_VS_LENGTH_REG */
595 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
596 /* SP_FS_CTRL_REG0 */
597 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
598 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
599 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700600 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700601 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700602 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700603 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
604 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
605 /* SP_FS_CTRL_REG1 */
606 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700607 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
608 /* SP_FS_OBJ_OFFSET_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700609 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
610 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 127);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700611 /* SP_FS_OBJ_START_REG */
612 *cmds++ = 0x00000000;
613
614 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
615 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
616 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
617 *cmds++ = 0x00000000;
618 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
619 *cmds++ = 0x00000000;
620
621 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
622 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
623 /* SP_FS_OUTPUT_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700624 *cmds++ = _SET(SP_IMAGEOUTPUTREG_DEPTHOUTMODE, SP_PIXEL_BASED);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700625
626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
627 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
628 /* SP_FS_MRT_REG_0 */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700629 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
630
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700631 /* SP_FS_MRT_REG_1 */
632 *cmds++ = 0x00000000;
633 /* SP_FS_MRT_REG_2 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_3 */
636 *cmds++ = 0x00000000;
637
638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
639 *cmds++ = CP_REG(A3XX_VPC_ATTR);
640 /* VPC_ATTR */
641 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
642 _SET(VPC_VPCATTR_LMSIZE, 1);
643 /* VPC_PACK */
644 *cmds++ = 0x00000000;
645 /* VPC_VARRYING_INTERUPT_MODE_0 */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_1 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_2 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_3 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARYING_PS_REPL_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_3 */
660 *cmds++ = 0x00000000;
661
662 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
663 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
664 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
665 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
666 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
667 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
668 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
669
670 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700671 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700672 /* end; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700673 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700674 /* nop; */
675 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700679
680 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
681 *cmds++ = 0x00000000;
682
683 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
684 *cmds++ = 0x00000000;
685
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700686 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
687 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
688 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
689 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
690 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
691 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
692 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
693
694 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700695 *cmds++ = 0x00000000; *cmds++ = 0x30201b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700696 /* end; */
697 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
698 /* nop; */
699 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
700 /* nop; */
701 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
702
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700703
704
705 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
706 *cmds++ = 0x00000000;
707
708 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
709 *cmds++ = 0x00000000;
710
711
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
713 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
714 /* RB_MSAA_CONTROL */
715 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
716 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
719 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
720 /* RB_DEPTH_CONTROL */
721 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
722
723 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700724 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
725 /* RB_STENCIL_CONTROL */
726 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
727 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
728 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
729 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
730 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
731 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
732 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
733 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
734
735 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
736 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
737 /* GRAS_SU_MODE_CONTROL */
738 *cmds++ = 0x00000000;
739
740 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700741 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
742 /* RB_MRT_CONTROL0 */
743 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
744 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
745 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
746 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
747
748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
749 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
750 /* RB_MRT_BLEND_CONTROL0 */
751 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
752 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
753 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
755 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
756 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
757 /* RB_MRT_CONTROL1 */
758 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
759 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
760 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
761
762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
763 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
764 /* RB_MRT_BLEND_CONTROL1 */
765 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
766 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
767 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
769 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
770 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
771 /* RB_MRT_CONTROL2 */
772 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
773 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
774 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
775
776 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
777 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
778 /* RB_MRT_BLEND_CONTROL2 */
779 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
780 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
781 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
782 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
783 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
784 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
785 /* RB_MRT_CONTROL3 */
786 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
787 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
788 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
789
790 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
791 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
792 /* RB_MRT_BLEND_CONTROL3 */
793 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
794 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
795 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
796 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
797 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
798 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
799
800 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
801 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
802 /* VFD_INDEX_MIN */
803 *cmds++ = 0x00000000;
804 /* VFD_INDEX_MAX */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700805 *cmds++ = 0x155;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700806 /* VFD_INSTANCEID_OFFSET */
807 *cmds++ = 0x00000000;
808 /* VFD_INDEX_OFFSET */
809 *cmds++ = 0x00000000;
810
811 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
812 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
813 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700814 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700815 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
816
817 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
818 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
819 /* TPL1_TP_VS_TEX_OFFSET */
820 *cmds++ = 0;
821
822 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
823 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
824 /* TPL1_TP_FS_TEX_OFFSET */
825 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
826 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
827 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
828
829 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
830 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
831 /* PC_PRIM_VTX_CNTL */
832 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
833 PC_DRAW_TRIANGLES) |
834 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
835 PC_DRAW_TRIANGLES) |
836 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
837
838 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
839 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
840 /* GRAS_SC_WINDOW_SCISSOR_TL */
841 *cmds++ = 0x00000000;
842 /* GRAS_SC_WINDOW_SCISSOR_BR */
843 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
844 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
845
846 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
847 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
848 /* GRAS_SC_SCREEN_SCISSOR_TL */
849 *cmds++ = 0x00000000;
850 /* GRAS_SC_SCREEN_SCISSOR_BR */
851 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
852 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
853
854 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
855 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
856 /* GRAS_CL_VPORT_XOFFSET */
857 *cmds++ = 0x00000000;
858 /* GRAS_CL_VPORT_XSCALE */
859 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
860 /* GRAS_CL_VPORT_YOFFSET */
861 *cmds++ = 0x00000000;
862 /* GRAS_CL_VPORT_YSCALE */
863 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
864
865 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
866 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
867 /* GRAS_CL_VPORT_ZOFFSET */
868 *cmds++ = 0x00000000;
869 /* GRAS_CL_VPORT_ZSCALE */
870 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
871
872 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
873 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
874 /* GRAS_CL_CLIP_CNTL */
875 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
876 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
877 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
878 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
879 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
880
881 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
882 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
883 /* GRAS_CL_GB_CLIP_ADJ */
884 *cmds++ = 0x00000000;
885
886 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
887 *cmds++ = 0x00000000;
888
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700889
890 /* oxili_generate_context_roll_packets */
891 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
892 *cmds++ = 0x00000400;
893
894 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
895 *cmds++ = 0x00000400;
896
897 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
898 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
899
900 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
901 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
902
903 /* Clear cache invalidate bit when re-loading the shader control regs */
904 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
905 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
906 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
907 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
908 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
909 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
910 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
911
912 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
913 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
914 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
915 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
916 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
917 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
918 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
919 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
920
921 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
922 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
923
924 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
925 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
926
927 /* end oxili_generate_context_roll_packets */
928
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700929 /*
930 * Resolve using two draw calls with a dummy register
931 * write in between. This is a HLM workaround
932 * that should be removed later.
933 */
934 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
935 *cmds++ = 0x00000000; /* Viz query info */
936 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
937 PC_DI_SRC_SEL_IMMEDIATE,
938 PC_DI_INDEX_SIZE_32_BIT,
939 PC_DI_IGNORE_VISIBILITY);
940 *cmds++ = 0x00000003; /* Num indices */
941 *cmds++ = 0x00000000; /* Index 0 */
942 *cmds++ = 0x00000001; /* Index 1 */
943 *cmds++ = 0x00000002; /* Index 2 */
944
945 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
946 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
947 *cmds++ = 0x00000000;
948
949 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
950 *cmds++ = 0x00000000; /* Viz query info */
951 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
952 PC_DI_SRC_SEL_IMMEDIATE,
953 PC_DI_INDEX_SIZE_32_BIT,
954 PC_DI_IGNORE_VISIBILITY);
955 *cmds++ = 0x00000003; /* Num indices */
956 *cmds++ = 0x00000002; /* Index 0 */
957 *cmds++ = 0x00000001; /* Index 1 */
958 *cmds++ = 0x00000003; /* Index 2 */
959
960 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
961 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
962 *cmds++ = 0x00000000;
963
964 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
965 *cmds++ = 0x00000000;
966
967 /* Create indirect buffer command for above command sequence */
968 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
969
970 return cmds;
971}
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700972static void build_shader_save_cmds(struct adreno_device *adreno_dev,
973 struct adreno_context *drawctxt)
974{
975 unsigned int *cmd = tmp_ctx.cmd;
976 unsigned int *start;
977
978 /* Reserve space for boolean values used for COND_EXEC packet */
979 drawctxt->cond_execs[0].hostptr = cmd;
980 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
981 *cmd++ = 0;
982 drawctxt->cond_execs[1].hostptr = cmd;
983 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
984 *cmd++ = 0;
985
986 drawctxt->shader_save_commands[0].hostptr = cmd;
987 drawctxt->shader_save_commands[0].gpuaddr =
988 virt2gpu(cmd, &drawctxt->gpustate);
989 *cmd++ = 0;
990 drawctxt->shader_save_commands[1].hostptr = cmd;
991 drawctxt->shader_save_commands[1].gpuaddr =
992 virt2gpu(cmd, &drawctxt->gpustate);
993 *cmd++ = 0;
994
995 start = cmd;
996
997 /* Save vertex shader */
998
999 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1000 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1001 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1002 *cmd++ = 0x0000FFFF;
1003 *cmd++ = 3; /* EXEC_COUNT */
1004
1005 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1006 drawctxt->shader_save_commands[2].hostptr = cmd;
1007 drawctxt->shader_save_commands[2].gpuaddr =
1008 virt2gpu(cmd, &drawctxt->gpustate);
1009 /*
1010 From fixup:
1011
1012 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
1013
1014 From regspec:
1015 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1016 If bit31 is 1, it means overflow
1017 or any long shader.
1018
1019 src = (HLSQ_SHADOW_BASE + 0x1000)/4
1020 */
1021 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1022 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1023
1024 /* Save fragment shader */
1025 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1026 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1027 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1028 *cmd++ = 0x0000FFFF;
1029 *cmd++ = 3; /* EXEC_COUNT */
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 drawctxt->shader_save_commands[3].hostptr = cmd;
1033 drawctxt->shader_save_commands[3].gpuaddr =
1034 virt2gpu(cmd, &drawctxt->gpustate);
1035 /*
1036 From fixup:
1037
1038 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
1039
1040 From regspec:
1041 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
1042 If bit31 is 1, it means overflow
1043 or any long shader.
1044
1045 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
1046 From regspec:
1047
1048 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
1049 First instruction of the whole shader will be stored from
1050 the offset in instruction cache, unit = 256bits, a cache line.
1051 It can start from 0 if no VS available.
1052
1053 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
1054 */
1055 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1056 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
1057 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
1058
1059 /* Create indirect buffer command for above command sequence */
1060 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
1061
1062 tmp_ctx.cmd = cmd;
1063}
1064
1065/*
1066 * Make an IB to modify context save IBs with the correct shader instruction
1067 * and constant sizes and offsets.
1068 */
1069
1070static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1071 struct adreno_context *drawctxt)
1072{
1073 unsigned int *cmd = tmp_ctx.cmd;
1074 unsigned int *start = cmd;
1075
1076 /* Flush HLSQ lazy updates */
1077 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1078 *cmd++ = 0x7; /* HLSQ_FLUSH */
1079 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1080 *cmd++ = 0;
1081
1082 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1083 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1084 *cmd++ = (unsigned int)
1085 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1086 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1087 0; /* No end addr for full invalidate */
1088
1089 /* Make sure registers are flushed */
1090 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1091 *cmd++ = 0;
1092
1093#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1094
1095 /* Save shader sizes */
1096 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1097 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1098 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1099
1100 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1101 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1102 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1103
1104 /* Save shader offsets */
1105 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1106 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1107 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1108
1109 /* Save constant sizes */
1110 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1111 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1112 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1113 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1114 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1115 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1116
1117 /* Save FS constant offset */
1118 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1119 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1120 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1121
1122
1123 /* Save VS instruction store mode */
1124 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1125 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1126 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1127
1128 /* Save FS instruction store mode */
1129 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1130 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1131 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1132#else
1133
1134 /* Shader save */
1135 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1136 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1137 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1138 drawctxt->shader_save_commands[2].gpuaddr);
1139
1140 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1141 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1142 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1143 *cmd++ = 0x00000000; /* AND value */
1144 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1145 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1146 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1147 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1148 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1149 A3XX_CP_SCRATCH_REG2;
1150 *cmd++ = 0x7f000000; /* AND value */
1151 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1152
1153 /*
1154 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1155 * SP_FS_OBJ_OFFSET_REG
1156 */
1157
1158 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1159 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1160 *cmd++ = 0x00000000; /* AND value */
1161 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1162 /*
1163 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1164 * 0x00000000
1165 */
1166 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1167 *cmd++ = A3XX_CP_SCRATCH_REG3;
1168 *cmd++ = 0xfe000000; /* AND value */
1169 *cmd++ = 0x00000000; /* OR value */
1170 /*
1171 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1172 */
1173 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1174 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1175 *cmd++ = 0xffffffff; /* AND value */
1176 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1177
1178 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1179 *cmd++ = A3XX_CP_SCRATCH_REG2;
1180 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1181
1182 /* Constant save */
1183 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001184 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1185 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001186 drawctxt->constant_save_commands[1].gpuaddr);
1187
1188 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001189 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1190 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001191 drawctxt->constant_save_commands[2].gpuaddr);
1192
1193 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1194 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1195 drawctxt->constant_save_commands[2].gpuaddr
1196 + sizeof(unsigned int));
1197
1198 /* Modify constant save conditionals */
1199 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1200 0, 0, drawctxt->cond_execs[2].gpuaddr);
1201
1202 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1203 0, 0, drawctxt->cond_execs[3].gpuaddr);
1204
1205 /* Save VS instruction store mode */
1206
1207 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1208 31, 0, drawctxt->cond_execs[0].gpuaddr);
1209
1210 /* Save FS instruction store mode */
1211 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1212 31, 0, drawctxt->cond_execs[1].gpuaddr);
1213
1214#endif
1215
1216 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1217
1218 tmp_ctx.cmd = cmd;
1219}
1220
1221/****************************************************************************/
1222/* Functions to build context restore IBs */
1223/****************************************************************************/
1224
1225static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1226 struct adreno_context *drawctxt,
1227 struct gmem_shadow_t *shadow)
1228{
1229 unsigned int *cmds = tmp_ctx.cmd;
1230 unsigned int *start = cmds;
1231
Jordan Crousefb3012f2012-06-22 13:11:05 -06001232 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
1233 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
1234
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001235 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1236 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1237 /* HLSQ_CONTROL_0_REG */
1238 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001239 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001240 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1241 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001242 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001243 /* HLSQ_CONTROL_1_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001244 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
1245 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001246 /* HLSQ_CONTROL_2_REG */
1247 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1248 /* HLSQ_CONTROL3_REG */
1249 *cmds++ = 0x00000000;
1250
1251 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1252 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1253 /* RB_MRT_BUF_INFO0 */
1254 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1255 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1256 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1257 (shadow->gmem_pitch * 4 * 8) / 256);
1258 /* RB_MRT_BUF_BASE0 */
1259 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1260
1261 /* Texture samplers */
1262 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1263 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1264 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1265 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1266 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1267 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1268 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1269 *cmds++ = 0x00000240;
1270 *cmds++ = 0x00000000;
1271
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001272 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1273 *cmds++ = 0x00000000;
1274
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001275 /* Texture memobjs */
1276 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1277 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1278 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1279 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1280 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1281 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1282 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1283 *cmds++ = 0x4cc06880;
1284 *cmds++ = shadow->height | (shadow->width << 14);
1285 *cmds++ = (shadow->pitch*4*8) << 9;
1286 *cmds++ = 0x00000000;
1287
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001288 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1289 *cmds++ = 0x00000000;
1290
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001291 /* Mipmap bases */
1292 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1293 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1294 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1295 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1296 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1297 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1298 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1299 *cmds++ = shadow->gmemshadow.gpuaddr;
1300 *cmds++ = 0x00000000;
1301 *cmds++ = 0x00000000;
1302 *cmds++ = 0x00000000;
1303 *cmds++ = 0x00000000;
1304 *cmds++ = 0x00000000;
1305 *cmds++ = 0x00000000;
1306 *cmds++ = 0x00000000;
1307 *cmds++ = 0x00000000;
1308 *cmds++ = 0x00000000;
1309 *cmds++ = 0x00000000;
1310 *cmds++ = 0x00000000;
1311 *cmds++ = 0x00000000;
1312 *cmds++ = 0x00000000;
1313
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001314 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1315 *cmds++ = 0x00000000;
1316
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001317 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1318 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1319 /* HLSQ_VS_CONTROL_REG */
1320 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1321 /* HLSQ_FS_CONTROL_REG */
1322 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1323 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1324 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1325 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1326 *cmds++ = 0x00000000;
1327 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1328 *cmds++ = 0x00000000;
1329
1330 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1331 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1332 /* SP_FS_LENGTH_REG */
1333 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1334
1335 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1336 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1337 /* SP_VS_CTRL_REG0 */
1338 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1339 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1340 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1341 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1342 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1343 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1344 /* SP_VS_CTRL_REG1 */
1345 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1346 /* SP_VS_PARAM_REG */
1347 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1348 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1349 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1350 /* SP_VS_OUT_REG0 */
1351 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1352 /* SP_VS_OUT_REG1 */
1353 *cmds++ = 0x00000000;
1354 /* SP_VS_OUT_REG2 */
1355 *cmds++ = 0x00000000;
1356 /* SP_VS_OUT_REG3 */
1357 *cmds++ = 0x00000000;
1358 /* SP_VS_OUT_REG4 */
1359 *cmds++ = 0x00000000;
1360 /* SP_VS_OUT_REG5 */
1361 *cmds++ = 0x00000000;
1362 /* SP_VS_OUT_REG6 */
1363 *cmds++ = 0x00000000;
1364 /* SP_VS_OUT_REG7 */
1365 *cmds++ = 0x00000000;
1366
1367 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1368 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1369 /* SP_VS_VPC_DST_REG0 */
1370 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1371 /* SP_VS_VPC_DST_REG1 */
1372 *cmds++ = 0x00000000;
1373 /* SP_VS_VPC_DST_REG2 */
1374 *cmds++ = 0x00000000;
1375 /* SP_VS_VPC_DST_REG3 */
1376 *cmds++ = 0x00000000;
1377 /* SP_VS_OBJ_OFFSET_REG */
1378 *cmds++ = 0x00000000;
1379 /* SP_VS_OBJ_START_REG */
1380 *cmds++ = 0x00000000;
1381
1382 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1383 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1384 /* SP_VS_LENGTH_REG */
1385 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1386 /* SP_FS_CTRL_REG0 */
1387 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1388 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1389 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001390 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1391 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001392 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1393 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001394 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001395 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1396 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1397 /* SP_FS_CTRL_REG1 */
1398 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1399 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1400 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1401 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001402 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001403 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 126);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001404 /* SP_FS_OBJ_START_REG */
1405 *cmds++ = 0x00000000;
1406
1407 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1408 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1409 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1410 *cmds++ = 0x00000000;
1411 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1412 *cmds++ = 0x00000000;
1413
1414 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1415 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1416 /* SP_FS_OUT_REG */
1417 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1418
Jordan Crousea7ec4212012-02-04 10:23:52 -07001419 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001420 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1421 /* SP_FS_MRT_REG0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001422 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001423 /* SP_FS_MRT_REG1 */
1424 *cmds++ = 0;
1425 /* SP_FS_MRT_REG2 */
1426 *cmds++ = 0;
1427 /* SP_FS_MRT_REG3 */
1428 *cmds++ = 0;
1429
1430 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1431 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1432 /* VPC_ATTR */
1433 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1434 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1435 _SET(VPC_VPCATTR_LMSIZE, 1);
1436 /* VPC_PACK */
1437 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1438 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1439 /* VPC_VARYING_INTERP_MODE_0 */
1440 *cmds++ = 0x00000000;
1441 /* VPC_VARYING_INTERP_MODE1 */
1442 *cmds++ = 0x00000000;
1443 /* VPC_VARYING_INTERP_MODE2 */
1444 *cmds++ = 0x00000000;
1445 /* VPC_VARYING_IINTERP_MODE3 */
1446 *cmds++ = 0x00000000;
1447 /* VPC_VARRYING_PS_REPL_MODE_0 */
1448 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1449 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1450 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1451 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1452 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1453 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1454 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1455 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1456 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1457 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1458 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1459 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1460 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1461 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1462 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1463 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1464 /* VPC_VARRYING_PS_REPL_MODE_1 */
1465 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1466 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1467 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1468 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1469 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1470 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1471 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1472 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1473 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1474 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1475 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1476 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1477 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1478 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1479 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1480 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1481 /* VPC_VARRYING_PS_REPL_MODE_2 */
1482 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1483 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1484 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1485 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1486 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1487 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1488 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1489 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1490 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1491 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1492 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1493 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1494 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1495 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1496 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1497 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1498 /* VPC_VARRYING_PS_REPL_MODE_3 */
1499 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1500 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1501 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1502 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1503 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1504 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1505 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1506 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1507 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1508 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1509 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1510 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1511 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1512 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1513 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1514 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1515
Jordan Crousea7ec4212012-02-04 10:23:52 -07001516 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001517 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1518 /* SP_SP_CTRL_REG */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001519 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
1520 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001521
1522 /* Load vertex shader */
1523 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1524 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1525 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1526 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1527 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1528 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1529 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1530 /* (sy)end; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001531 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001532 /* nop; */
1533 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1534 /* nop; */
1535 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1536 /* nop; */
1537 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1538
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001539 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1540 *cmds++ = 0x00000000;
1541
1542 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1543 *cmds++ = 0x00000000;
1544
1545
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001546 /* Load fragment shader */
1547 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1548 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1549 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1550 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1551 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1552 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1553 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1554 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001555 *cmds++ = 0x00002000; *cmds++ = 0x57309902;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001556 /* (rpt5)nop; */
1557 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1558 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1559 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1560 /* (sy)mov.f32f32 r1.x, r0.x; */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001561 *cmds++ = 0x00000000; *cmds++ = 0x30040b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001562 /* mov.f32f32 r1.y, r0.y; */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001563 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001564 /* mov.f32f32 r1.z, r0.z; */
1565 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1566 /* mov.f32f32 r1.w, r0.w; */
1567 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1568 /* end; */
1569 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1570
1571 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
1572 *cmds++ = 0x00000000;
1573
1574 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1575 *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001576
1577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1578 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1579 /* VFD_CONTROL_0 */
1580 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1581 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1582 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1583 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1584 /* VFD_CONTROL_1 */
1585 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1586 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1587 _SET(VFD_CTRLREG1_REGID4INST, 252);
1588
1589 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1590 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1591 /* VFD_FETCH_INSTR_0_0 */
1592 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1593 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1594 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1595 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1596 /* VFD_FETCH_INSTR_1_0 */
1597 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1598 shadow->quad_vertices_restore.gpuaddr);
1599 /* VFD_FETCH_INSTR_0_1 */
1600 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1601 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1602 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1603 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1604 /* VFD_FETCH_INSTR_1_1 */
1605 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1606 shadow->quad_vertices_restore.gpuaddr + 16);
1607
1608 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1609 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1610 /* VFD_DECODE_INSTR_0 */
1611 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1612 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1613 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1614 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1615 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1616 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1617 /* VFD_DECODE_INSTR_1 */
1618 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1619 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1620 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1621 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1622 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1623 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1624
1625 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1626 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1627 /* RB_DEPTH_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001628 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_LESS);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001629
1630 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1631 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1632 /* RB_STENCIL_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001633 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001634 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1635 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1636 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001637 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001638 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1639 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1640 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1641
1642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1643 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1644 /* RB_MODE_CONTROL */
1645 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1646 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1647
1648 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1649 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1650 /* RB_RENDER_CONTROL */
1651 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1652 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1653
1654 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1655 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1656 /* RB_MSAA_CONTROL */
1657 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1658 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1659
1660 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1661 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1662 /* RB_MRT_CONTROL0 */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001663 *cmds++ = _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1664 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001665 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1666
1667 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1668 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1669 /* RB_MRT_BLENDCONTROL0 */
1670 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1671 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1672 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1673 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1674 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1675 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1676 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1677 /* RB_MRT_CONTROL1 */
1678 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001679 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1680 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001681 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1682
1683 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1684 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1685 /* RB_MRT_BLENDCONTROL1 */
1686 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1687 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1688 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1689 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1690 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1691 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1692 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1693 /* RB_MRT_CONTROL2 */
1694 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001695 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1696 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001697 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1698
1699 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1700 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1701 /* RB_MRT_BLENDCONTROL2 */
1702 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1703 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1704 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1705 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1706 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1707 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1708 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1709 /* RB_MRT_CONTROL3 */
1710 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001711 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1712 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001713 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1714
1715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1716 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1717 /* RB_MRT_BLENDCONTROL3 */
1718 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1719 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1720 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1721 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1722 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1723 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1724 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1725
1726 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1727 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1728 /* VFD_INDEX_MIN */
1729 *cmds++ = 0x00000000;
1730 /* VFD_INDEX_MAX */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001731 *cmds++ = 340;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001732 /* VFD_INDEX_OFFSET */
1733 *cmds++ = 0x00000000;
1734 /* TPL1_TP_VS_TEX_OFFSET */
1735 *cmds++ = 0x00000000;
1736
1737 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1738 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1739 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001740 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001741 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1742
1743 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1744 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1745 /* TPL1_TP_VS_TEX_OFFSET */
1746 *cmds++ = 0x00000000;
1747
1748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1749 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1750 /* TPL1_TP_FS_TEX_OFFSET */
1751 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1752 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1753 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1754
1755 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1756 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1757 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001758 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1759 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1760 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001761
1762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1763 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1764 /* GRAS_SU_MODE_CONTROL */
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001765 *cmds++ = _SET(GRAS_SU_CTRLMODE_LINEHALFWIDTH, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001766
1767 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1768 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1769 /* GRAS_SC_WINDOW_SCISSOR_TL */
1770 *cmds++ = 0x00000000;
1771 /* GRAS_SC_WINDOW_SCISSOR_BR */
1772 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1773 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1774
1775 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1776 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1777 /* GRAS_SC_SCREEN_SCISSOR_TL */
1778 *cmds++ = 0x00000000;
1779 /* GRAS_SC_SCREEN_SCISSOR_BR */
1780 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1781 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1782
1783 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1784 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1785 /* GRAS_CL_VPORT_XOFFSET */
1786 *cmds++ = 0x00000000;
1787 /* GRAS_CL_VPORT_XSCALE */
1788 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1789 /* GRAS_CL_VPORT_YOFFSET */
1790 *cmds++ = 0x00000000;
1791 /* GRAS_CL_VPORT_YSCALE */
1792 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1793
1794 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1795 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1796 /* GRAS_CL_VPORT_ZOFFSET */
1797 *cmds++ = 0x00000000;
1798 /* GRAS_CL_VPORT_ZSCALE */
1799 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1800
1801 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1802 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1803 /* GRAS_CL_CLIP_CNTL */
1804 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1805
1806 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1807 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1808 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1809 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1810
1811 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1812 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1813 /* PC_PRIM_VTX_CONTROL */
1814 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1815 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1816 PC_DRAW_TRIANGLES) |
1817 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1818 PC_DRAW_TRIANGLES) |
1819 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1820
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001821
1822 /* oxili_generate_context_roll_packets */
1823 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1824 *cmds++ = 0x00000400;
1825
1826 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1827 *cmds++ = 0x00000400;
1828
1829 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1830 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
1831
1832 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1833 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
1834
1835 /* Clear cache invalidate bit when re-loading the shader control regs */
1836 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
1837 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1838 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1839 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1840 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1841 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1842
1843 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
1844 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1845 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1846 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
1847 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 1) |
1848 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1849 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1850 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
1851 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1852
1853 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
1854 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
1855
1856 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
1857 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
1858
1859 /* end oxili_generate_context_roll_packets */
1860
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001861 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1862 *cmds++ = 0x00000000; /* Viz query info */
1863 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1864 PC_DI_SRC_SEL_AUTO_INDEX,
1865 PC_DI_INDEX_SIZE_16_BIT,
1866 PC_DI_IGNORE_VISIBILITY);
1867 *cmds++ = 0x00000002; /* Num indices */
1868
1869 /* Create indirect buffer command for above command sequence */
1870 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1871
1872 return cmds;
1873}
1874
Rajeev Kulkarnic46499f2012-07-10 16:02:46 -07001875
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001876static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1877 struct adreno_context *drawctxt)
1878{
1879 unsigned int *start = tmp_ctx.cmd;
1880 unsigned int *cmd = start;
1881 unsigned int *lcc_start;
1882
1883 int i;
1884
1885 /* Flush HLSQ lazy updates */
1886 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1887 *cmd++ = 0x7; /* HLSQ_FLUSH */
1888 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1889 *cmd++ = 0;
1890
1891 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1892 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1893 *cmd++ = (unsigned int)
1894 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1895 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1896 0; /* No end addr for full invalidate */
1897
1898 lcc_start = cmd;
1899
1900 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1901 cmd++;
1902
1903#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1904 /* Force mismatch */
1905 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1906#else
1907 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1908#endif
1909
1910 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1911 cmd = reg_range(cmd, context_register_ranges[i * 2],
1912 context_register_ranges[i * 2 + 1]);
1913 }
1914
1915 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1916 (cmd - lcc_start) - 1);
1917
1918#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1919 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1920#else
1921 lcc_start[2] |= (1 << 24) | (4 << 16);
1922#endif
1923
1924 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1925 *cmd++ = cp_type0_packet(global_registers[i], 1);
1926 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1927 *cmd++ = 0x00000000;
1928 }
1929
1930 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1931 tmp_ctx.cmd = cmd;
1932}
1933
1934static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1935 struct adreno_context *drawctxt)
1936{
1937 unsigned int *cmd = tmp_ctx.cmd;
1938 unsigned int *start = cmd;
1939 unsigned int mode = 4; /* Indirect mode */
1940 unsigned int stateblock;
1941 unsigned int numunits;
1942 unsigned int statetype;
1943
1944 drawctxt->cond_execs[2].hostptr = cmd;
1945 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1946 *cmd++ = 0;
1947 drawctxt->cond_execs[3].hostptr = cmd;
1948 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1949 *cmd++ = 0;
1950
1951#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1952 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1953 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1954 *cmd++ = 4 << 16;
1955 *cmd++ = 0x0;
1956#endif
1957 /* HLSQ full update */
1958 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1959 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1960 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1961
1962#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1963 /* Re-enable shadowing */
1964 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1965 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1966 *cmd++ = (4 << 16) | (1 << 24);
1967 *cmd++ = 0x0;
1968#endif
1969
1970 /* Load vertex shader constants */
1971 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1972 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1973 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1974 *cmd++ = 0x0000ffff;
1975 *cmd++ = 3; /* EXEC_COUNT */
1976 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1977 drawctxt->constant_load_commands[0].hostptr = cmd;
1978 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1979 &drawctxt->gpustate);
1980
1981 /*
1982 From fixup:
1983
1984 mode = 4 (indirect)
1985 stateblock = 4 (Vertex constants)
1986 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1987
1988 From register spec:
1989 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1990
1991 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1992 */
1993
1994 *cmd++ = 0; /* ord1 */
1995 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1996
1997 /* Load fragment shader constants */
1998 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1999 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2000 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
2001 *cmd++ = 0x0000ffff;
2002 *cmd++ = 3; /* EXEC_COUNT */
2003 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2004 drawctxt->constant_load_commands[1].hostptr = cmd;
2005 drawctxt->constant_load_commands[1].gpuaddr =
2006 virt2gpu(cmd, &drawctxt->gpustate);
2007 /*
2008 From fixup:
2009
2010 mode = 4 (indirect)
2011 stateblock = 6 (Fragment constants)
2012 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
2013
2014 From register spec:
2015 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
2016
2017 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
2018 */
2019
2020 *cmd++ = 0; /* ord1 */
2021 drawctxt->constant_load_commands[2].hostptr = cmd;
2022 drawctxt->constant_load_commands[2].gpuaddr =
2023 virt2gpu(cmd, &drawctxt->gpustate);
2024 /*
2025 From fixup:
2026 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
2027 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
2028
2029 From register spec:
2030 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
2031 start offset in on chip RAM,
2032 128bit aligned
2033
2034 ord2 = base + offset | 1
2035 Because of the base alignment we can use
2036 ord2 = base | offset | 1
2037 */
2038 *cmd++ = 0; /* ord2 */
2039
2040 /* Restore VS texture memory objects */
2041 stateblock = 0;
2042 statetype = 1;
2043 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2044
2045 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2046 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2047 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
2048 & 0xfffffffc) | statetype;
2049
2050 /* Restore VS texture mipmap addresses */
2051 stateblock = 1;
2052 statetype = 1;
2053 numunits = TEX_SIZE_MIPMAP / 4;
2054 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2055 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2056 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
2057 & 0xfffffffc) | statetype;
2058
2059 /* Restore VS texture sampler objects */
2060 stateblock = 0;
2061 statetype = 0;
2062 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2063 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2064 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2065 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
2066 & 0xfffffffc) | statetype;
2067
2068 /* Restore FS texture memory objects */
2069 stateblock = 2;
2070 statetype = 1;
2071 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2072 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2073 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2074 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
2075 & 0xfffffffc) | statetype;
2076
2077 /* Restore FS texture mipmap addresses */
2078 stateblock = 3;
2079 statetype = 1;
2080 numunits = TEX_SIZE_MIPMAP / 4;
2081 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2082 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2083 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
2084 & 0xfffffffc) | statetype;
2085
2086 /* Restore FS texture sampler objects */
2087 stateblock = 2;
2088 statetype = 0;
2089 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2090 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2091 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2092 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
2093 & 0xfffffffc) | statetype;
2094
2095 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
2096 tmp_ctx.cmd = cmd;
2097}
2098
2099static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
2100 struct adreno_context *drawctxt)
2101{
2102 unsigned int *cmd = tmp_ctx.cmd;
2103 unsigned int *start = cmd;
2104
2105 /* Vertex shader */
2106 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2107 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2108 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2109 *cmd++ = 1;
2110 *cmd++ = 3; /* EXEC_COUNT */
2111
2112 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2113 drawctxt->shader_load_commands[0].hostptr = cmd;
2114 drawctxt->shader_load_commands[0].gpuaddr =
2115 virt2gpu(cmd, &drawctxt->gpustate);
2116 /*
2117 From fixup:
2118
2119 mode = 4 (indirect)
2120 stateblock = 4 (Vertex shader)
2121 numunits = SP_VS_CTRL_REG0.VS_LENGTH
2122
2123 From regspec:
2124 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
2125 If bit31 is 1, it means overflow
2126 or any long shader.
2127
2128 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2129 */
2130 *cmd++ = 0; /*ord1 */
2131 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
2132
2133 /* Fragment shader */
2134 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2135 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2136 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2137 *cmd++ = 1;
2138 *cmd++ = 3; /* EXEC_COUNT */
2139
2140 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2141 drawctxt->shader_load_commands[1].hostptr = cmd;
2142 drawctxt->shader_load_commands[1].gpuaddr =
2143 virt2gpu(cmd, &drawctxt->gpustate);
2144 /*
2145 From fixup:
2146
2147 mode = 4 (indirect)
2148 stateblock = 6 (Fragment shader)
2149 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2150
2151 From regspec:
2152 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2153 If bit31 is 1, it means overflow
2154 or any long shader.
2155
2156 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2157 */
2158 *cmd++ = 0; /*ord1 */
2159 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2160 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2161
2162 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2163 tmp_ctx.cmd = cmd;
2164}
2165
2166static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2167 struct adreno_context *drawctxt)
2168{
2169 unsigned int *cmd = tmp_ctx.cmd;
2170 unsigned int *start = cmd;
2171
2172 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2173 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2174 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2175 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2176 = virt2gpu(cmd, &drawctxt->gpustate);
2177 *cmd++ = 0;
2178
2179 /* Create indirect buffer command for above command sequence */
2180 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2181
2182 tmp_ctx.cmd = cmd;
2183}
2184
2185/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2186static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2187 struct adreno_context *drawctxt)
2188{
2189 unsigned int *cmd = tmp_ctx.cmd;
2190 unsigned int *start = cmd;
2191
2192#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2193 /* Save shader sizes */
2194 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2195 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2196 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2197
2198 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2199 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2200 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2201
2202 /* Save constant sizes */
2203 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2204 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2205 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2206
2207 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2208 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2209 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2210
2211 /* Save constant offsets */
2212 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2213 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2214 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2215#else
2216 /* Save shader sizes */
2217 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2218 30, (4 << 19) | (4 << 16),
2219 drawctxt->shader_load_commands[0].gpuaddr);
2220
2221 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2222 30, (6 << 19) | (4 << 16),
2223 drawctxt->shader_load_commands[1].gpuaddr);
2224
2225 /* Save constant sizes */
2226 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2227 23, (4 << 19) | (4 << 16),
2228 drawctxt->constant_load_commands[0].gpuaddr);
2229
2230 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2231 23, (6 << 19) | (4 << 16),
2232 drawctxt->constant_load_commands[1].gpuaddr);
2233
2234 /* Modify constant restore conditionals */
2235 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2236 0, 0, drawctxt->cond_execs[2].gpuaddr);
2237
2238 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2239 0, 0, drawctxt->cond_execs[3].gpuaddr);
2240
2241 /* Save fragment constant shadow offset */
2242 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2243 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2244 drawctxt->constant_load_commands[2].gpuaddr);
2245#endif
2246
2247 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2248 discard all the shader data */
2249
2250 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2251 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2252
2253 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2254
2255 tmp_ctx.cmd = cmd;
2256}
2257
2258static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2259 struct adreno_context *drawctxt)
2260{
2261 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2262
2263 build_regrestore_cmds(adreno_dev, drawctxt);
2264 build_constantrestore_cmds(adreno_dev, drawctxt);
2265 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2266 build_regconstantsave_cmds(adreno_dev, drawctxt);
2267 build_shader_save_cmds(adreno_dev, drawctxt);
2268 build_shader_restore_cmds(adreno_dev, drawctxt);
2269 build_restore_fixup_cmds(adreno_dev, drawctxt);
2270 build_save_fixup_cmds(adreno_dev, drawctxt);
2271
2272 return 0;
2273}
2274
2275/* create buffers for saving/restoring registers, constants, & GMEM */
2276static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2277 struct adreno_context *drawctxt)
2278{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002279 int result;
2280
Jordan Crouse7501d452012-04-19 08:58:44 -06002281 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2282 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002283
Jordan Crousea7ec4212012-02-04 10:23:52 -07002284 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2285 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002286
Jordan Crousea7ec4212012-02-04 10:23:52 -07002287 if (result)
2288 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002289
2290 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2291 &tmp_ctx.cmd);
2292
2293 /* Dow we need to idle? */
2294 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2295
2296 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2297 &drawctxt->context_gmem_shadow);
2298 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2299 &drawctxt->context_gmem_shadow);
2300
2301 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2302 KGSL_CACHE_OP_FLUSH);
2303
Jordan Crousea7ec4212012-02-04 10:23:52 -07002304 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2305
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002306 return 0;
2307}
2308
2309static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2310 struct adreno_context *drawctxt)
2311{
2312 int ret;
2313
2314 /*
2315 * Allocate memory for the GPU state and the context commands.
2316 * Despite the name, this is much more then just storage for
2317 * the gpustate. This contains command space for gmem save
2318 * and texture and vertex buffer storage too
2319 */
2320
2321 ret = kgsl_allocate(&drawctxt->gpustate,
2322 drawctxt->pagetable, CONTEXT_SIZE);
2323
2324 if (ret)
2325 return ret;
2326
2327 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2328 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2329
2330 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2331 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2332 if (ret)
2333 goto done;
2334
2335 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2336 }
2337
2338 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2339 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2340
2341done:
2342 if (ret)
2343 kgsl_sharedmem_free(&drawctxt->gpustate);
2344
2345 return ret;
2346}
2347
2348static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2349 struct adreno_context *context)
2350{
2351 struct kgsl_device *device = &adreno_dev->dev;
2352
2353 if (context == NULL)
2354 return;
2355
2356 if (context->flags & CTXT_FLAGS_GPU_HANG)
2357 KGSL_CTXT_WARN(device,
2358 "Current active context has caused gpu hang\n");
2359
2360 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2361 /* Fixup self modifying IBs for save operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002362 adreno_ringbuffer_issuecmds(device, context,
2363 KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002364
2365 /* save registers and constants. */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002366 adreno_ringbuffer_issuecmds(device, context,
2367 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002368 context->regconstant_save, 3);
2369
2370 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2371 /* Save shader instructions */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002372 adreno_ringbuffer_issuecmds(device, context,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002373 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2374
2375 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2376 }
2377 }
2378
2379 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2380 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2381 /*
2382 * Save GMEM (note: changes shader. shader must
2383 * already be saved.)
2384 */
2385
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002386 adreno_ringbuffer_issuecmds(device, context,
2387 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002388 context->context_gmem_shadow.
2389 gmem_save, 3);
2390 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2391 }
2392}
2393
2394static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2395 struct adreno_context *context)
2396{
2397 struct kgsl_device *device = &adreno_dev->dev;
2398 unsigned int cmds[5];
2399
2400 if (context == NULL) {
2401 /* No context - set the default pagetable and thats it */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002402 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
2403 adreno_dev->drawctxt_active->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002404 return;
2405 }
2406
2407 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2408
2409 cmds[0] = cp_nop_packet(1);
2410 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2411 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2412 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002413 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2414 cmds[4] = context->id;
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002415 adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
2416 cmds, 5);
2417 kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002418
2419 /*
2420 * Restore GMEM. (note: changes shader.
2421 * Shader must not already be restored.)
2422 */
2423
2424 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002425 adreno_ringbuffer_issuecmds(device, context,
2426 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002427 context->context_gmem_shadow.
2428 gmem_restore, 3);
2429 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2430 }
2431
2432 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002433 adreno_ringbuffer_issuecmds(device, context,
2434 KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002435
2436 /* Fixup self modifying IBs for restore operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002437 adreno_ringbuffer_issuecmds(device, context,
2438 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002439 context->restore_fixup, 3);
2440
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002441 adreno_ringbuffer_issuecmds(device, context,
2442 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002443 context->constant_restore, 3);
2444
2445 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002446 adreno_ringbuffer_issuecmds(device, context,
2447 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002448 context->shader_restore, 3);
2449
2450 /* Restore HLSQ_CONTROL_0 register */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002451 adreno_ringbuffer_issuecmds(device, context,
2452 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002453 context->hlsqcontrol_restore, 3);
2454 }
2455}
2456
2457static void a3xx_rb_init(struct adreno_device *adreno_dev,
2458 struct adreno_ringbuffer *rb)
2459{
2460 unsigned int *cmds, cmds_gpu;
2461 cmds = adreno_ringbuffer_allocspace(rb, 18);
2462 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2463
2464 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2465 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2466 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2467 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2468 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2469 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2470 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2471 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2472 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2473 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2474 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2475 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2476 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2477 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2478 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2479 /* Protected mode control - turned off for A3XX */
2480 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2481 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2482 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2483
2484 adreno_ringbuffer_submit(rb);
2485}
2486
2487static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2488{
2489 struct kgsl_device *device = &adreno_dev->dev;
2490 const char *err = "";
2491
2492 switch (bit) {
2493 case A3XX_INT_RBBM_AHB_ERROR: {
2494 unsigned int reg;
2495
2496 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2497
2498 /*
2499 * Return the word address of the erroring register so that it
2500 * matches the register specification
2501 */
2502
2503 KGSL_DRV_CRIT(device,
2504 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2505 reg & (1 << 28) ? "WRITE" : "READ",
2506 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2507 (reg >> 24) & 0x3);
2508
2509 /* Clear the error */
2510 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2511 return;
2512 }
2513 case A3XX_INT_RBBM_REG_TIMEOUT:
2514 err = "RBBM: AHB register timeout";
2515 break;
2516 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2517 err = "RBBM: ME master split timeout";
2518 break;
2519 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2520 err = "RBBM: PFP master split timeout";
2521 break;
2522 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2523 err = "RBBM: ATB bus oveflow";
2524 break;
2525 case A3XX_INT_VFD_ERROR:
2526 err = "VFD: Out of bounds access";
2527 break;
2528 case A3XX_INT_CP_T0_PACKET_IN_IB:
2529 err = "ringbuffer TO packet in IB interrupt";
2530 break;
2531 case A3XX_INT_CP_OPCODE_ERROR:
2532 err = "ringbuffer opcode error interrupt";
2533 break;
2534 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2535 err = "ringbuffer reserved bit error interrupt";
2536 break;
2537 case A3XX_INT_CP_HW_FAULT:
2538 err = "ringbuffer hardware fault";
2539 break;
2540 case A3XX_INT_CP_REG_PROTECT_FAULT:
2541 err = "ringbuffer protected mode error interrupt";
2542 break;
2543 case A3XX_INT_CP_AHB_ERROR_HALT:
2544 err = "ringbuffer AHB error interrupt";
2545 break;
2546 case A3XX_INT_MISC_HANG_DETECT:
2547 err = "MISC: GPU hang detected";
2548 break;
2549 case A3XX_INT_UCHE_OOB_ACCESS:
2550 err = "UCHE: Out of bounds access";
2551 break;
2552 }
2553
2554 KGSL_DRV_CRIT(device, "%s\n", err);
2555 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2556}
2557
2558static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2559{
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002560 struct kgsl_device *device = &adreno_dev->dev;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002561
2562 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002563 unsigned int context_id;
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002564 kgsl_sharedmem_readl(&device->memstore, &context_id,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002565 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2566 current_context));
2567 if (context_id < KGSL_MEMSTORE_MAX) {
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002568 kgsl_sharedmem_writel(&device->memstore,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002569 KGSL_MEMSTORE_OFFSET(context_id,
2570 ts_cmp_enable), 0);
2571 wmb();
2572 }
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002573 KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002574 }
2575
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002576 wake_up_interruptible_all(&device->wait_queue);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002577
2578 /* Schedule work to free mem and issue ibs */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002579 queue_work(device->work_queue, &device->ts_expired_ws);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002580
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002581 atomic_notifier_call_chain(&device->ts_notifier_list,
2582 device->id, NULL);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002583}
2584
2585#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2586
2587#define A3XX_INT_MASK \
2588 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002589 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002590 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2591 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2592 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2593 (1 << A3XX_INT_CP_HW_FAULT) | \
2594 (1 << A3XX_INT_CP_IB1_INT) | \
2595 (1 << A3XX_INT_CP_IB2_INT) | \
2596 (1 << A3XX_INT_CP_RB_INT) | \
2597 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2598 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002599 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2600
2601static struct {
2602 void (*func)(struct adreno_device *, int);
2603} a3xx_irq_funcs[] = {
2604 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2605 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2606 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2607 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2608 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2609 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2610 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2611 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2612 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2613 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2614 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2615 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2616 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2617 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2618 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2619 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2620 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2621 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2622 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2623 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2624 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2625 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2626 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2627 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002628 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002629 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2630 /* 26 to 31 - Unused */
2631};
2632
2633static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2634{
2635 struct kgsl_device *device = &adreno_dev->dev;
2636 irqreturn_t ret = IRQ_NONE;
2637 unsigned int status, tmp;
2638 int i;
2639
2640 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2641
2642 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2643 if (tmp & 1) {
2644 if (a3xx_irq_funcs[i].func != NULL) {
2645 a3xx_irq_funcs[i].func(adreno_dev, i);
2646 ret = IRQ_HANDLED;
2647 } else {
2648 KGSL_DRV_CRIT(device,
2649 "Unhandled interrupt bit %x\n", i);
2650 }
2651 }
2652
2653 tmp >>= 1;
2654 }
2655
Carter Cooperb769c912012-04-13 08:16:35 -06002656 trace_kgsl_a3xx_irq_status(device, status);
2657
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002658 if (status)
2659 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2660 status);
2661 return ret;
2662}
2663
2664static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2665{
2666 struct kgsl_device *device = &adreno_dev->dev;
2667
Wei Zou08a7e572012-06-03 22:05:46 -07002668 if (state)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002669 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
Wei Zou08a7e572012-06-03 22:05:46 -07002670 else
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002671 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2672}
2673
2674static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2675{
2676 struct kgsl_device *device = &adreno_dev->dev;
2677 unsigned int reg, val;
2678
2679 /* Freeze the counter */
2680 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2681 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2682 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2683
2684 /* Read the value */
2685 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2686
2687 /* Reset the counter */
2688 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2689 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2690
2691 /* Re-enable the counter */
2692 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2693 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2694 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2695
2696 return val;
2697}
2698
2699static void a3xx_start(struct adreno_device *adreno_dev)
2700{
2701 struct kgsl_device *device = &adreno_dev->dev;
2702
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002703 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002704
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002705 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2706 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2707 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2708 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2709 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2710 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2711 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2712
2713 /* Enable WR-REQ */
2714 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2715
2716 /* Set up round robin arbitration between both AXI ports */
2717 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2718
2719 /* Set up AOOO */
2720 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2721 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2722
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302723 if (cpu_is_apq8064()) {
2724 /* Enable 1K sort */
2725 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2726 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2727 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002728 /* Make all blocks contribute to the GPU BUSY perf counter */
2729 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2730
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002731 /* Tune the hystersis counters for SP and CP idle detection */
2732 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2733 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2734
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002735 /* Enable the RBBM error reporting bits. This lets us get
2736 useful information on failure */
2737
2738 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2739
2740 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002741 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002742
2743 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002744 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002745
2746 /* Turn on hang detection - this spews a lot of useful information
2747 * into the RBBM registers on a hang */
2748
2749 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2750 (1 << 16) | 0xFFF);
2751
Rajeev Kulkarni7f177962012-06-22 12:09:44 -07002752 /* Enable Clock gating */
2753 adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
2754 A3XX_RBBM_CLOCK_CTL_DEFAULT);
2755
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002756}
2757
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002758/* Defined in adreno_a3xx_snapshot.c */
2759void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2760 int *remain, int hang);
2761
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002762struct adreno_gpudev adreno_a3xx_gpudev = {
2763 .reg_rbbm_status = A3XX_RBBM_STATUS,
2764 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2765 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2766
2767 .ctxt_create = a3xx_drawctxt_create,
2768 .ctxt_save = a3xx_drawctxt_save,
2769 .ctxt_restore = a3xx_drawctxt_restore,
Shubhraprakash Das4624b552012-06-01 14:08:03 -06002770 .ctxt_draw_workaround = NULL,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002771 .rb_init = a3xx_rb_init,
2772 .irq_control = a3xx_irq_control,
2773 .irq_handler = a3xx_irq_handler,
2774 .busy_cycles = a3xx_busy_cycles,
2775 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002776 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002777};