blob: 152fc76b3605efec7c57d0121d42e4a4bbdd898f [file] [log] [blame]
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/delay.h>
Steve Mucklef132c6c2012-06-06 18:30:57 -070015#include <linux/sched.h>
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +053016#include <mach/socinfo.h>
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070017
18#include "kgsl.h"
19#include "adreno.h"
20#include "kgsl_sharedmem.h"
21#include "kgsl_cffdump.h"
22#include "a3xx_reg.h"
Carter Cooperb769c912012-04-13 08:16:35 -060023#include "adreno_a3xx_trace.h"
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070024
Jordan Crouse0c2761a2012-02-01 22:11:12 -070025/*
26 * Set of registers to dump for A3XX on postmortem and snapshot.
27 * Registers in pairs - first value is the start offset, second
28 * is the stop offset (inclusive)
29 */
30
31const unsigned int a3xx_registers[] = {
32 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
33 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
34 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
35 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
36 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
37 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
38 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
39 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
Jordan Crouse55d98fd2012-02-04 10:23:51 -070040 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070041 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
42 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
43 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
44 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
45 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
46 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
47 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
48 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
49 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
50 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
51 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
52 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
53 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
54 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
55 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
56 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
57 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
58 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
59 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
60 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
61 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
62 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
63 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
64 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
Jordan Crouse1268f9c2012-02-21 08:54:53 -070065 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
66 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
67 0x303C, 0x303C, 0x305E, 0x305F,
Jordan Crouse0c2761a2012-02-01 22:11:12 -070068};
69
70const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
71
Jordan Crouseb4d31bd2012-02-01 22:11:12 -070072/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
73 * functions.
74 */
75
76#define _SET(_shift, _val) ((_val) << (_shift))
77
78/*
79 ****************************************************************************
80 *
81 * Context state shadow structure:
82 *
83 * +---------------------+------------+-------------+---------------------+---+
84 * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
85 * +---------------------+------------+-------------+---------------------+---+
86 *
87 * 8K - ALU Constant Shadow (8K aligned)
88 * 4K - H/W Register Shadow (8K aligned)
89 * 5K - Command and Vertex Buffers
90 * 8K - Shader Instruction Shadow
91 * ~6K - Texture Constant Shadow
92 *
93 *
94 ***************************************************************************
95 */
96
97/* Sizes of all sections in state shadow memory */
98#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
99#define REG_SHADOW_SIZE (4*1024) /* 4KB */
100#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
101#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
102#define TEX_SIZE_MIPMAP 1936 /* bytes */
103#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
104#define TEX_SHADOW_SIZE \
105 ((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
106 TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
107#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
108
109/* Total context size, excluding GMEM shadow */
110#define CONTEXT_SIZE \
111 (ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
112 CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
113 TEX_SHADOW_SIZE)
114
115/* Offsets to different sections in context shadow memory */
116#define REG_OFFSET ALU_SHADOW_SIZE
117#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
118#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
119#define TEX_OFFSET (SHADER_OFFSET+SHADER_SHADOW_SIZE)
120#define VS_TEX_OFFSET_MEM_OBJECTS TEX_OFFSET
121#define VS_TEX_OFFSET_MIPMAP (VS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
122#define VS_TEX_OFFSET_SAMPLER_OBJ (VS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
123#define FS_TEX_OFFSET_MEM_OBJECTS \
124 (VS_TEX_OFFSET_SAMPLER_OBJ+TEX_SIZE_SAMPLER_OBJ)
125#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
126#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
127
128/* The offset for fragment shader data in HLSQ context */
129#define SSIZE (16*1024)
130
131#define HLSQ_SAMPLER_OFFSET 0x000
132#define HLSQ_MEMOBJ_OFFSET 0x400
133#define HLSQ_MIPMAP_OFFSET 0x800
134
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700135/* Use shadow RAM */
136#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700137
Jordan Croused0070882012-02-21 08:54:52 -0700138#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700139
140#define BUILD_PC_DRAW_INITIATOR(prim_type, source_select, index_size, \
141 vis_cull_mode) \
142 (((prim_type) << PC_DRAW_INITIATOR_PRIM_TYPE) | \
143 ((source_select) << PC_DRAW_INITIATOR_SOURCE_SELECT) | \
144 ((index_size & 1) << PC_DRAW_INITIATOR_INDEX_SIZE) | \
145 ((index_size >> 1) << PC_DRAW_INITIATOR_SMALL_INDEX) | \
146 ((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
147 (1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
148
149/*
150 * List of context registers (starting from dword offset 0x2000).
151 * Each line contains start and end of a range of registers.
152 */
153static const unsigned int context_register_ranges[] = {
154 A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
155 A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
156 A3XX_GRAS_CL_VPORT_XOFFSET, A3XX_GRAS_CL_VPORT_ZSCALE,
157 A3XX_GRAS_SU_POINT_MINMAX, A3XX_GRAS_SU_POINT_SIZE,
158 A3XX_GRAS_SU_POLY_OFFSET_SCALE, A3XX_GRAS_SU_POLY_OFFSET_OFFSET,
159 A3XX_GRAS_SU_MODE_CONTROL, A3XX_GRAS_SU_MODE_CONTROL,
160 A3XX_GRAS_SC_CONTROL, A3XX_GRAS_SC_CONTROL,
161 A3XX_GRAS_SC_SCREEN_SCISSOR_TL, A3XX_GRAS_SC_SCREEN_SCISSOR_BR,
162 A3XX_GRAS_SC_WINDOW_SCISSOR_TL, A3XX_GRAS_SC_WINDOW_SCISSOR_BR,
163 A3XX_RB_MODE_CONTROL, A3XX_RB_MRT_BLEND_CONTROL3,
164 A3XX_RB_BLEND_RED, A3XX_RB_COPY_DEST_INFO,
165 A3XX_RB_DEPTH_CONTROL, A3XX_RB_DEPTH_CONTROL,
166 A3XX_PC_VSTREAM_CONTROL, A3XX_PC_VSTREAM_CONTROL,
167 A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, A3XX_PC_VERTEX_REUSE_BLOCK_CNTL,
168 A3XX_PC_PRIM_VTX_CNTL, A3XX_PC_RESTART_INDEX,
169 A3XX_HLSQ_CONTROL_0_REG, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG,
170 A3XX_HLSQ_CL_NDRANGE_0_REG, A3XX_HLSQ_CL_NDRANGE_0_REG,
171 A3XX_HLSQ_CL_NDRANGE_2_REG, A3XX_HLSQ_CL_CONTROL_1_REG,
172 A3XX_HLSQ_CL_KERNEL_CONST_REG, A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG,
173 A3XX_HLSQ_CL_WG_OFFSET_REG, A3XX_HLSQ_CL_WG_OFFSET_REG,
174 A3XX_VFD_CONTROL_0, A3XX_VFD_VS_THREADING_THRESHOLD,
175 A3XX_SP_SP_CTRL_REG, A3XX_SP_SP_CTRL_REG,
176 A3XX_SP_VS_CTRL_REG0, A3XX_SP_VS_OUT_REG_7,
177 A3XX_SP_VS_VPC_DST_REG_0, A3XX_SP_VS_PVT_MEM_SIZE_REG,
178 A3XX_SP_VS_LENGTH_REG, A3XX_SP_FS_PVT_MEM_SIZE_REG,
179 A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, A3XX_SP_FS_FLAT_SHAD_MODE_REG_1,
180 A3XX_SP_FS_OUTPUT_REG, A3XX_SP_FS_OUTPUT_REG,
181 A3XX_SP_FS_MRT_REG_0, A3XX_SP_FS_IMAGE_OUTPUT_REG_3,
182 A3XX_SP_FS_LENGTH_REG, A3XX_SP_FS_LENGTH_REG,
183 A3XX_TPL1_TP_VS_TEX_OFFSET, A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
184 A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
185};
186
187/* Global registers that need to be saved separately */
188static const unsigned int global_registers[] = {
189 A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
190 A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
191 A3XX_GRAS_CL_USER_PLANE_X1, A3XX_GRAS_CL_USER_PLANE_Y1,
192 A3XX_GRAS_CL_USER_PLANE_Z1, A3XX_GRAS_CL_USER_PLANE_W1,
193 A3XX_GRAS_CL_USER_PLANE_X2, A3XX_GRAS_CL_USER_PLANE_Y2,
194 A3XX_GRAS_CL_USER_PLANE_Z2, A3XX_GRAS_CL_USER_PLANE_W2,
195 A3XX_GRAS_CL_USER_PLANE_X3, A3XX_GRAS_CL_USER_PLANE_Y3,
196 A3XX_GRAS_CL_USER_PLANE_Z3, A3XX_GRAS_CL_USER_PLANE_W3,
197 A3XX_GRAS_CL_USER_PLANE_X4, A3XX_GRAS_CL_USER_PLANE_Y4,
198 A3XX_GRAS_CL_USER_PLANE_Z4, A3XX_GRAS_CL_USER_PLANE_W4,
199 A3XX_GRAS_CL_USER_PLANE_X5, A3XX_GRAS_CL_USER_PLANE_Y5,
200 A3XX_GRAS_CL_USER_PLANE_Z5, A3XX_GRAS_CL_USER_PLANE_W5,
201 A3XX_VSC_BIN_SIZE,
202 A3XX_VSC_PIPE_CONFIG_0, A3XX_VSC_PIPE_CONFIG_1,
203 A3XX_VSC_PIPE_CONFIG_2, A3XX_VSC_PIPE_CONFIG_3,
204 A3XX_VSC_PIPE_CONFIG_4, A3XX_VSC_PIPE_CONFIG_5,
205 A3XX_VSC_PIPE_CONFIG_6, A3XX_VSC_PIPE_CONFIG_7,
206 A3XX_VSC_PIPE_DATA_ADDRESS_0, A3XX_VSC_PIPE_DATA_ADDRESS_1,
207 A3XX_VSC_PIPE_DATA_ADDRESS_2, A3XX_VSC_PIPE_DATA_ADDRESS_3,
208 A3XX_VSC_PIPE_DATA_ADDRESS_4, A3XX_VSC_PIPE_DATA_ADDRESS_5,
209 A3XX_VSC_PIPE_DATA_ADDRESS_6, A3XX_VSC_PIPE_DATA_ADDRESS_7,
210 A3XX_VSC_PIPE_DATA_LENGTH_0, A3XX_VSC_PIPE_DATA_LENGTH_1,
211 A3XX_VSC_PIPE_DATA_LENGTH_2, A3XX_VSC_PIPE_DATA_LENGTH_3,
212 A3XX_VSC_PIPE_DATA_LENGTH_4, A3XX_VSC_PIPE_DATA_LENGTH_5,
213 A3XX_VSC_PIPE_DATA_LENGTH_6, A3XX_VSC_PIPE_DATA_LENGTH_7,
214 A3XX_VSC_SIZE_ADDRESS
215};
216
217#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
218
219/* A scratchpad used to build commands during context create */
220static struct tmp_ctx {
221 unsigned int *cmd; /* Next available dword in C&V buffer */
222
223 /* Addresses in comamnd buffer where registers are saved */
224 uint32_t reg_values[GLOBAL_REGISTER_COUNT];
225 uint32_t gmem_base; /* Base GPU address of GMEM */
226} tmp_ctx;
227
228#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
229/*
230 * Function for executing dest = ( (reg & and) ROL rol ) | or
231 */
232static unsigned int *rmw_regtomem(unsigned int *cmd,
233 unsigned int reg, unsigned int and,
234 unsigned int rol, unsigned int or,
235 unsigned int dest)
236{
237 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
238 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
239 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
240 *cmd++ = 0x00000000; /* AND value */
241 *cmd++ = reg; /* OR address */
242
243 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
244 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
245 *cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
246 *cmd++ = and; /* AND value */
247 *cmd++ = or; /* OR value */
248
249 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
250 *cmd++ = A3XX_CP_SCRATCH_REG2;
251 *cmd++ = dest;
252
253 return cmd;
254}
255#endif
256
257static void build_regconstantsave_cmds(struct adreno_device *adreno_dev,
258 struct adreno_context *drawctxt)
259{
260 unsigned int *cmd = tmp_ctx.cmd;
Jordan Crousea7ec4212012-02-04 10:23:52 -0700261 unsigned int *start;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700262 unsigned int i;
263
264 drawctxt->constant_save_commands[0].hostptr = cmd;
265 drawctxt->constant_save_commands[0].gpuaddr =
266 virt2gpu(cmd, &drawctxt->gpustate);
267 cmd++;
268
Jordan Crousea7ec4212012-02-04 10:23:52 -0700269 start = cmd;
270
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700271 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
272 *cmd++ = 0;
273
274#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
275 /*
276 * Context registers are already shadowed; just need to
277 * disable shadowing to prevent corruption.
278 */
279
280 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
281 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
282 *cmd++ = 4 << 16; /* regs, start=0 */
283 *cmd++ = 0x0; /* count = 0 */
284
285#else
286 /*
287 * Make sure the HW context has the correct register values before
288 * reading them.
289 */
290
291 /* Write context registers into shadow */
292 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
293 unsigned int start = context_register_ranges[i * 2];
294 unsigned int end = context_register_ranges[i * 2 + 1];
295 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
296 *cmd++ = ((end - start + 1) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
297 start;
298 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET)
299 & 0xFFFFE000) + (start - 0x2000) * 4;
300 }
301#endif
302
303 /* Need to handle some of the global registers separately */
304 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
305 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
306 *cmd++ = global_registers[i];
307 *cmd++ = tmp_ctx.reg_values[i];
308 }
309
310 /* Save vertex shader constants */
311 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
312 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
313 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
314 *cmd++ = 0x0000FFFF;
315 *cmd++ = 3; /* EXEC_COUNT */
316 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
317 drawctxt->constant_save_commands[1].hostptr = cmd;
318 drawctxt->constant_save_commands[1].gpuaddr =
319 virt2gpu(cmd, &drawctxt->gpustate);
320 /*
321 From fixup:
322
323 dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
324 src = (HLSQ_SHADOW_BASE + 0x2000) / 4
325
326 From register spec:
327 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
328 */
329 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
330 /* ALU constant shadow base */
331 *cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
332
333 /* Save fragment shader constants */
334 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
335 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
336 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
337 *cmd++ = 0x0000FFFF;
338 *cmd++ = 3; /* EXEC_COUNT */
339 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
340 drawctxt->constant_save_commands[2].hostptr = cmd;
341 drawctxt->constant_save_commands[2].gpuaddr =
342 virt2gpu(cmd, &drawctxt->gpustate);
343 /*
344 From fixup:
345
346 dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
347 src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
348
349 From register spec:
350 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
351 */
352 *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
353
354 /*
355 From fixup:
356
357 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
358 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
359
360 From register spec:
361 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
362 start offset in on chip RAM,
363 128bit aligned
364
365 dst = base + offset
366 Because of the base alignment we can use
367 dst = base | offset
368 */
369 *cmd++ = 0; /* dst */
370
371 /* Save VS texture memory objects */
372 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
373 *cmd++ =
374 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
375 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET) / 4);
376 *cmd++ =
377 (drawctxt->gpustate.gpuaddr +
378 VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
379
380 /* Save VS texture mipmap pointers */
381 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
382 *cmd++ =
383 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
384 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET) / 4);
385 *cmd++ =
386 (drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
387
388 /* Save VS texture sampler objects */
389 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
390 *cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
391 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
392 *cmd++ =
393 (drawctxt->gpustate.gpuaddr +
394 VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
395
396 /* Save FS texture memory objects */
397 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
398 *cmd++ =
399 ((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
400 ((HLSQ_SHADOW_BASE + HLSQ_MEMOBJ_OFFSET + SSIZE) / 4);
401 *cmd++ =
402 (drawctxt->gpustate.gpuaddr +
403 FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
404
405 /* Save FS texture mipmap pointers */
406 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
407 *cmd++ =
408 ((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
409 ((HLSQ_SHADOW_BASE + HLSQ_MIPMAP_OFFSET + SSIZE) / 4);
410 *cmd++ =
411 (drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
412
413 /* Save FS texture sampler objects */
414 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
415 *cmd++ =
416 ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
417 ((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET + SSIZE) / 4);
418 *cmd++ =
419 (drawctxt->gpustate.gpuaddr +
420 FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
421
422 /* Create indirect buffer command for above command sequence */
423 create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
424
425 tmp_ctx.cmd = cmd;
426}
427
428/* Copy GMEM contents to system memory shadow. */
429static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
430 struct adreno_context *drawctxt,
431 struct gmem_shadow_t *shadow)
432{
433 unsigned int *cmds = tmp_ctx.cmd;
434 unsigned int *start = cmds;
435
Jordan Crousefb3012f2012-06-22 13:11:05 -0600436 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
437 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
438
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700439 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
440 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
441
442 /* RB_MODE_CONTROL */
443 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
444 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
445 _SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
446 /* RB_RENDER_CONTROL */
447 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
448 _SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
449
450 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
451 *cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
452 /* RB_COPY_CONTROL */
453 *cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
454 RB_CLEAR_MODE_RESOLVE) |
455 _SET(RB_COPYCONTROL_COPY_GMEM_BASE,
456 tmp_ctx.gmem_base >> 14);
457 /* RB_COPY_DEST_BASE */
458 *cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
459 shadow->gmemshadow.gpuaddr >> 5);
460 /* RB_COPY_DEST_PITCH */
461 *cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
462 (shadow->pitch * 4) / 32);
463 /* RB_COPY_DEST_INFO */
464 *cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
465 RB_TILINGMODE_LINEAR) |
466 _SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
467 _SET(RB_COPYDESTINFO_COPY_COMPONENT_ENABLE, 0X0F) |
468 _SET(RB_COPYDESTINFO_COPY_DEST_ENDIAN, RB_ENDIAN_NONE);
469
470 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
471 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
472 /* GRAS_SC_CONTROL */
473 *cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
474
475 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
476 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
477 /* VFD_CONTROL_0 */
478 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
479 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
480 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
481 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
482 /* VFD_CONTROL_1 */
483 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
484 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
485 _SET(VFD_CTRLREG1_REGID4INST, 252);
486
487 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
488 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
489 /* VFD_FETCH_INSTR_0_0 */
490 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
491 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
492 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
493 /* VFD_FETCH_INSTR_1_0 */
494 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
495 shadow->quad_vertices.gpuaddr);
496
497 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
498 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
499 /* VFD_DECODE_INSTR_0 */
500 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
501 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
502 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700503 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
504 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
505
506 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
507 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
508 /* HLSQ_CONTROL_0_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700509 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700510 _SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700511 _SET(HLSQ_CTRL0REG_RESERVED2, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700512 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700513 /* HLSQ_CONTROL_1_REG */
514 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700515 _SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700516 /* HLSQ_CONTROL_2_REG */
517 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
518 /* HLSQ_CONTROL_3_REG */
519 *cmds++ = 0x00000000;
520
521 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
522 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
523 /* HLSQ_VS_CONTROL_REG */
524 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
525 /* HLSQ_FS_CONTROL_REG */
526 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700527 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700528 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
529 /* HLSQ_CONST_VSPRESV_RANGE_REG */
530 *cmds++ = 0x00000000;
531 /* HLSQ_CONST_FSPRESV_RANGE_REQ */
532 *cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
533 _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
534
535 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
536 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
537 /* SP_FS_LENGTH_REG */
538 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
539
540 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
541 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
542 /* SP_SP_CTRL_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700543 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
544 _SET(SP_SPCTRLREG_LOMODE, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700545
546 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
547 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
548 /* SP_VS_CTRL_REG0 */
549 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
550 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
551 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700552 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700553 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
554 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
555 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
556 /* SP_VS_CTRL_REG1 */
557 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
558 /* SP_VS_PARAM_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700559 *cmds++ = _SET(SP_VSPARAMREG_PSIZEREGID, 252);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700560 /* SP_VS_OUT_REG_0 */
561 *cmds++ = 0x00000000;
562 /* SP_VS_OUT_REG_1 */
563 *cmds++ = 0x00000000;
564 /* SP_VS_OUT_REG_2 */
565 *cmds++ = 0x00000000;
566 /* SP_VS_OUT_REG_3 */
567 *cmds++ = 0x00000000;
568 /* SP_VS_OUT_REG_4 */
569 *cmds++ = 0x00000000;
570 /* SP_VS_OUT_REG_5 */
571 *cmds++ = 0x00000000;
572 /* SP_VS_OUT_REG_6 */
573 *cmds++ = 0x00000000;
574 /* SP_VS_OUT_REG_7 */
575 *cmds++ = 0x00000000;
576
577 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
578 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
579 /* SP_VS_VPC_DST_REG_0 */
580 *cmds++ = 0x00000000;
581 /* SP_VS_VPC_DST_REG_1 */
582 *cmds++ = 0x00000000;
583 /* SP_VS_VPC_DST_REG_2 */
584 *cmds++ = 0x00000000;
585 /* SP_VS_VPC_DST_REG_3 */
586 *cmds++ = 0x00000000;
587 /* SP_VS_OBJ_OFFSET_REG */
588 *cmds++ = 0x00000000;
589 /* SP_VS_OBJ_START_REG */
590 *cmds++ = 0x00000000;
591
592 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
593 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
594 /* SP_VS_LENGTH_REG */
595 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
596 /* SP_FS_CTRL_REG0 */
597 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
598 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
599 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700600 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700601 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700602 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700603 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
604 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
605 /* SP_FS_CTRL_REG1 */
606 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700607 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
608 /* SP_FS_OBJ_OFFSET_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700609 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
610 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 127);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700611 /* SP_FS_OBJ_START_REG */
612 *cmds++ = 0x00000000;
613
614 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
615 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
616 /* SP_FS_FLAT_SHAD_MODE_REG_0 */
617 *cmds++ = 0x00000000;
618 /* SP_FS_FLAT_SHAD_MODE_REG_1 */
619 *cmds++ = 0x00000000;
620
621 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
622 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
623 /* SP_FS_OUTPUT_REG */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700624 *cmds++ = _SET(SP_IMAGEOUTPUTREG_DEPTHOUTMODE, SP_PIXEL_BASED);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700625
626 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
627 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
628 /* SP_FS_MRT_REG_0 */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700629 *cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
630
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700631 /* SP_FS_MRT_REG_1 */
632 *cmds++ = 0x00000000;
633 /* SP_FS_MRT_REG_2 */
634 *cmds++ = 0x00000000;
635 /* SP_FS_MRT_REG_3 */
636 *cmds++ = 0x00000000;
637
638 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
639 *cmds++ = CP_REG(A3XX_VPC_ATTR);
640 /* VPC_ATTR */
641 *cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
642 _SET(VPC_VPCATTR_LMSIZE, 1);
643 /* VPC_PACK */
644 *cmds++ = 0x00000000;
645 /* VPC_VARRYING_INTERUPT_MODE_0 */
646 *cmds++ = 0x00000000;
647 /* VPC_VARRYING_INTERUPT_MODE_1 */
648 *cmds++ = 0x00000000;
649 /* VPC_VARRYING_INTERUPT_MODE_2 */
650 *cmds++ = 0x00000000;
651 /* VPC_VARRYING_INTERUPT_MODE_3 */
652 *cmds++ = 0x00000000;
653 /* VPC_VARYING_PS_REPL_MODE_0 */
654 *cmds++ = 0x00000000;
655 /* VPC_VARYING_PS_REPL_MODE_1 */
656 *cmds++ = 0x00000000;
657 /* VPC_VARYING_PS_REPL_MODE_2 */
658 *cmds++ = 0x00000000;
659 /* VPC_VARYING_PS_REPL_MODE_3 */
660 *cmds++ = 0x00000000;
661
662 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
663 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
664 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
665 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
666 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
667 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
668 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
669
670 /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700671 *cmds++ = 0x00000000; *cmds++ = 0x13001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700672 /* end; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700673 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700674 /* nop; */
675 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
676 /* nop; */
677 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
678
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700679
680 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
681 *cmds++ = 0x00000000;
682
683 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
684 *cmds++ = 0x00000000;
685
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700686 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
687 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
688 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
689 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
690 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
691 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
692 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
693
694 /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700695 *cmds++ = 0x00000000; *cmds++ = 0x30201b00;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700696 /* end; */
697 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
698 /* nop; */
699 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
700 /* nop; */
701 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
702
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700703
704
705 *cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
706 *cmds++ = 0x00000000;
707
708 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
709 *cmds++ = 0x00000000;
710
711
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700712 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
713 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
714 /* RB_MSAA_CONTROL */
715 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
716 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
717
718 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
719 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
720 /* RB_DEPTH_CONTROL */
721 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
722
723 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700724 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
725 /* RB_STENCIL_CONTROL */
726 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
727 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
728 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
729 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
730 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
731 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
732 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
733 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
734
735 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
736 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
737 /* GRAS_SU_MODE_CONTROL */
738 *cmds++ = 0x00000000;
739
740 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700741 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
742 /* RB_MRT_CONTROL0 */
743 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
744 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
745 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
746 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
747
748 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
749 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
750 /* RB_MRT_BLEND_CONTROL0 */
751 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
752 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
753 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
754 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
755 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
756 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
757 /* RB_MRT_CONTROL1 */
758 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
759 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
760 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
761
762 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
763 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
764 /* RB_MRT_BLEND_CONTROL1 */
765 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
766 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
767 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
768 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
769 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
770 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
771 /* RB_MRT_CONTROL2 */
772 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
773 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
774 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
775
776 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
777 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
778 /* RB_MRT_BLEND_CONTROL2 */
779 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
780 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
781 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
782 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
783 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
784 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
785 /* RB_MRT_CONTROL3 */
786 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
787 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
788 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
789
790 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
791 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
792 /* RB_MRT_BLEND_CONTROL3 */
793 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
794 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
795 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
796 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
797 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
798 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
799
800 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
801 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
802 /* VFD_INDEX_MIN */
803 *cmds++ = 0x00000000;
804 /* VFD_INDEX_MAX */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700805 *cmds++ = 0x155;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700806 /* VFD_INSTANCEID_OFFSET */
807 *cmds++ = 0x00000000;
808 /* VFD_INDEX_OFFSET */
809 *cmds++ = 0x00000000;
810
811 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
812 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
813 /* VFD_VS_THREADING_THRESHOLD */
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700814 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700815 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
816
817 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
818 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
819 /* TPL1_TP_VS_TEX_OFFSET */
820 *cmds++ = 0;
821
822 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
823 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
824 /* TPL1_TP_FS_TEX_OFFSET */
825 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
826 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
827 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
828
829 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
830 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
831 /* PC_PRIM_VTX_CNTL */
832 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
833 PC_DRAW_TRIANGLES) |
834 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
835 PC_DRAW_TRIANGLES) |
836 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
837
838 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
839 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
840 /* GRAS_SC_WINDOW_SCISSOR_TL */
841 *cmds++ = 0x00000000;
842 /* GRAS_SC_WINDOW_SCISSOR_BR */
843 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
844 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
845
846 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
847 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
848 /* GRAS_SC_SCREEN_SCISSOR_TL */
849 *cmds++ = 0x00000000;
850 /* GRAS_SC_SCREEN_SCISSOR_BR */
851 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
852 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
853
854 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
855 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
856 /* GRAS_CL_VPORT_XOFFSET */
857 *cmds++ = 0x00000000;
858 /* GRAS_CL_VPORT_XSCALE */
859 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
860 /* GRAS_CL_VPORT_YOFFSET */
861 *cmds++ = 0x00000000;
862 /* GRAS_CL_VPORT_YSCALE */
863 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
864
865 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
866 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
867 /* GRAS_CL_VPORT_ZOFFSET */
868 *cmds++ = 0x00000000;
869 /* GRAS_CL_VPORT_ZSCALE */
870 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
871
872 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
873 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
874 /* GRAS_CL_CLIP_CNTL */
875 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
876 _SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
877 _SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
878 _SET(GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE, 1) |
879 _SET(GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE, 1);
880
881 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
882 *cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
883 /* GRAS_CL_GB_CLIP_ADJ */
884 *cmds++ = 0x00000000;
885
886 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
887 *cmds++ = 0x00000000;
888
Rajeev Kulkarni06a2c722012-07-06 16:47:16 -0700889
890 /* oxili_generate_context_roll_packets */
891 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
892 *cmds++ = 0x00000400;
893
894 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
895 *cmds++ = 0x00000400;
896
897 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
898 *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
899
900 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
901 *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
902
903 /* Clear cache invalidate bit when re-loading the shader control regs */
904 *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
905 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
906 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
907 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 1) |
908 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
909 _SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
910 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
911
912 *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
913 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
914 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
915 _SET(SP_FSCTRLREG0_FSHALFREGFOOTPRINT, 1) |
916 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
917 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
918 _SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
919 _SET(SP_FSCTRLREG0_FSLENGTH, 1);
920
921 *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
922 *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
923
924 *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
925 *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
926
927 /* end oxili_generate_context_roll_packets */
928
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700929 /*
930 * Resolve using two draw calls with a dummy register
931 * write in between. This is a HLM workaround
932 * that should be removed later.
933 */
934 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
935 *cmds++ = 0x00000000; /* Viz query info */
936 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
937 PC_DI_SRC_SEL_IMMEDIATE,
938 PC_DI_INDEX_SIZE_32_BIT,
939 PC_DI_IGNORE_VISIBILITY);
940 *cmds++ = 0x00000003; /* Num indices */
941 *cmds++ = 0x00000000; /* Index 0 */
942 *cmds++ = 0x00000001; /* Index 1 */
943 *cmds++ = 0x00000002; /* Index 2 */
944
945 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
946 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
947 *cmds++ = 0x00000000;
948
949 *cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
950 *cmds++ = 0x00000000; /* Viz query info */
951 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
952 PC_DI_SRC_SEL_IMMEDIATE,
953 PC_DI_INDEX_SIZE_32_BIT,
954 PC_DI_IGNORE_VISIBILITY);
955 *cmds++ = 0x00000003; /* Num indices */
956 *cmds++ = 0x00000002; /* Index 0 */
957 *cmds++ = 0x00000001; /* Index 1 */
958 *cmds++ = 0x00000003; /* Index 2 */
959
960 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
961 *cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
962 *cmds++ = 0x00000000;
963
964 *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
965 *cmds++ = 0x00000000;
966
967 /* Create indirect buffer command for above command sequence */
968 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
969
970 return cmds;
971}
Jordan Crouseb4d31bd2012-02-01 22:11:12 -0700972static void build_shader_save_cmds(struct adreno_device *adreno_dev,
973 struct adreno_context *drawctxt)
974{
975 unsigned int *cmd = tmp_ctx.cmd;
976 unsigned int *start;
977
978 /* Reserve space for boolean values used for COND_EXEC packet */
979 drawctxt->cond_execs[0].hostptr = cmd;
980 drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
981 *cmd++ = 0;
982 drawctxt->cond_execs[1].hostptr = cmd;
983 drawctxt->cond_execs[1].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
984 *cmd++ = 0;
985
986 drawctxt->shader_save_commands[0].hostptr = cmd;
987 drawctxt->shader_save_commands[0].gpuaddr =
988 virt2gpu(cmd, &drawctxt->gpustate);
989 *cmd++ = 0;
990 drawctxt->shader_save_commands[1].hostptr = cmd;
991 drawctxt->shader_save_commands[1].gpuaddr =
992 virt2gpu(cmd, &drawctxt->gpustate);
993 *cmd++ = 0;
994
995 start = cmd;
996
997 /* Save vertex shader */
998
999 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1000 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1001 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
1002 *cmd++ = 0x0000FFFF;
1003 *cmd++ = 3; /* EXEC_COUNT */
1004
1005 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1006 drawctxt->shader_save_commands[2].hostptr = cmd;
1007 drawctxt->shader_save_commands[2].gpuaddr =
1008 virt2gpu(cmd, &drawctxt->gpustate);
1009 /*
1010 From fixup:
1011
1012 dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
1013
1014 From regspec:
1015 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
1016 If bit31 is 1, it means overflow
1017 or any long shader.
1018
1019 src = (HLSQ_SHADOW_BASE + 0x1000)/4
1020 */
1021 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1022 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
1023
1024 /* Save fragment shader */
1025 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1026 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1027 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
1028 *cmd++ = 0x0000FFFF;
1029 *cmd++ = 3; /* EXEC_COUNT */
1030
1031 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1032 drawctxt->shader_save_commands[3].hostptr = cmd;
1033 drawctxt->shader_save_commands[3].gpuaddr =
1034 virt2gpu(cmd, &drawctxt->gpustate);
1035 /*
1036 From fixup:
1037
1038 dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
1039
1040 From regspec:
1041 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
1042 If bit31 is 1, it means overflow
1043 or any long shader.
1044
1045 fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
1046 From regspec:
1047
1048 SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
1049 First instruction of the whole shader will be stored from
1050 the offset in instruction cache, unit = 256bits, a cache line.
1051 It can start from 0 if no VS available.
1052
1053 src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
1054 */
1055 *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
1056 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
1057 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
1058
1059 /* Create indirect buffer command for above command sequence */
1060 create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
1061
1062 tmp_ctx.cmd = cmd;
1063}
1064
1065/*
1066 * Make an IB to modify context save IBs with the correct shader instruction
1067 * and constant sizes and offsets.
1068 */
1069
1070static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
1071 struct adreno_context *drawctxt)
1072{
1073 unsigned int *cmd = tmp_ctx.cmd;
1074 unsigned int *start = cmd;
1075
1076 /* Flush HLSQ lazy updates */
1077 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1078 *cmd++ = 0x7; /* HLSQ_FLUSH */
1079 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1080 *cmd++ = 0;
1081
1082 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1083 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1084 *cmd++ = (unsigned int)
1085 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1086 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1087 0; /* No end addr for full invalidate */
1088
1089 /* Make sure registers are flushed */
1090 *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
1091 *cmd++ = 0;
1092
1093#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
1094
1095 /* Save shader sizes */
1096 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1097 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1098 *cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
1099
1100 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1101 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1102 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1103
1104 /* Save shader offsets */
1105 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1106 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1107 *cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
1108
1109 /* Save constant sizes */
1110 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1111 *cmd++ = A3XX_SP_VS_CTRL_REG1;
1112 *cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
1113 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1114 *cmd++ = A3XX_SP_FS_CTRL_REG1;
1115 *cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
1116
1117 /* Save FS constant offset */
1118 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1119 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
1120 *cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
1121
1122
1123 /* Save VS instruction store mode */
1124 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1125 *cmd++ = A3XX_SP_VS_CTRL_REG0;
1126 *cmd++ = drawctxt->cond_execs[0].gpuaddr;
1127
1128 /* Save FS instruction store mode */
1129 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1130 *cmd++ = A3XX_SP_FS_CTRL_REG0;
1131 *cmd++ = drawctxt->cond_execs[1].gpuaddr;
1132#else
1133
1134 /* Shader save */
1135 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
1136 11+REG_TO_MEM_LOOP_COUNT_SHIFT,
1137 (HLSQ_SHADOW_BASE + 0x1000) / 4,
1138 drawctxt->shader_save_commands[2].gpuaddr);
1139
1140 /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
1141 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1142 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1143 *cmd++ = 0x00000000; /* AND value */
1144 *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
1145 /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
1146 | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
1147 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1148 *cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
1149 A3XX_CP_SCRATCH_REG2;
1150 *cmd++ = 0x7f000000; /* AND value */
1151 *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
1152
1153 /*
1154 * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
1155 * SP_FS_OBJ_OFFSET_REG
1156 */
1157
1158 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1159 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
1160 *cmd++ = 0x00000000; /* AND value */
1161 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
1162 /*
1163 * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
1164 * 0x00000000
1165 */
1166 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1167 *cmd++ = A3XX_CP_SCRATCH_REG3;
1168 *cmd++ = 0xfe000000; /* AND value */
1169 *cmd++ = 0x00000000; /* OR value */
1170 /*
1171 * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
1172 */
1173 *cmd++ = cp_type3_packet(CP_REG_RMW, 3);
1174 *cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
1175 *cmd++ = 0xffffffff; /* AND value */
1176 *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
1177
1178 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
1179 *cmd++ = A3XX_CP_SCRATCH_REG2;
1180 *cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
1181
1182 /* Constant save */
1183 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001184 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1185 (HLSQ_SHADOW_BASE + 0x2000) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001186 drawctxt->constant_save_commands[1].gpuaddr);
1187
1188 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
Jordan Croused0070882012-02-21 08:54:52 -07001189 2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
1190 (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001191 drawctxt->constant_save_commands[2].gpuaddr);
1192
1193 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
1194 18, drawctxt->gpustate.gpuaddr & 0xfffffe00,
1195 drawctxt->constant_save_commands[2].gpuaddr
1196 + sizeof(unsigned int));
1197
1198 /* Modify constant save conditionals */
1199 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
1200 0, 0, drawctxt->cond_execs[2].gpuaddr);
1201
1202 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
1203 0, 0, drawctxt->cond_execs[3].gpuaddr);
1204
1205 /* Save VS instruction store mode */
1206
1207 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
1208 31, 0, drawctxt->cond_execs[0].gpuaddr);
1209
1210 /* Save FS instruction store mode */
1211 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
1212 31, 0, drawctxt->cond_execs[1].gpuaddr);
1213
1214#endif
1215
1216 create_ib1(drawctxt, drawctxt->save_fixup, start, cmd);
1217
1218 tmp_ctx.cmd = cmd;
1219}
1220
1221/****************************************************************************/
1222/* Functions to build context restore IBs */
1223/****************************************************************************/
1224
1225static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
1226 struct adreno_context *drawctxt,
1227 struct gmem_shadow_t *shadow)
1228{
1229 unsigned int *cmds = tmp_ctx.cmd;
1230 unsigned int *start = cmds;
1231
Jordan Crousefb3012f2012-06-22 13:11:05 -06001232 *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
1233 *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
1234
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001235 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1236 *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1237 /* HLSQ_CONTROL_0_REG */
1238 *cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
1239 _SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
1240 _SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
1241 _SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1) |
1242 _SET(HLSQ_CTRL0REG_TPFULLUPDATE, 1);
1243 /* HLSQ_CONTROL_1_REG */
1244 *cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS);
1245 /* HLSQ_CONTROL_2_REG */
1246 *cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
1247 /* HLSQ_CONTROL3_REG */
1248 *cmds++ = 0x00000000;
1249
1250 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1251 *cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
1252 /* RB_MRT_BUF_INFO0 */
1253 *cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
1254 _SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
1255 _SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
1256 (shadow->gmem_pitch * 4 * 8) / 256);
1257 /* RB_MRT_BUF_BASE0 */
1258 *cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
1259
1260 /* Texture samplers */
1261 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
1262 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1263 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1264 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1265 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1266 *cmds++ = (HLSQ_TP_TEX_SAMPLERS << CP_LOADSTATE_STATETYPE_SHIFT)
1267 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1268 *cmds++ = 0x00000240;
1269 *cmds++ = 0x00000000;
1270
1271 /* Texture memobjs */
1272 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
1273 *cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1274 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1275 | (HLSQ_BLOCK_ID_TP_TEX << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1276 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1277 *cmds++ = (HLSQ_TP_TEX_MEMOBJ << CP_LOADSTATE_STATETYPE_SHIFT)
1278 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1279 *cmds++ = 0x4cc06880;
1280 *cmds++ = shadow->height | (shadow->width << 14);
1281 *cmds++ = (shadow->pitch*4*8) << 9;
1282 *cmds++ = 0x00000000;
1283
1284 /* Mipmap bases */
1285 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
1286 *cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1287 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1288 | (HLSQ_BLOCK_ID_TP_MIPMAP << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1289 | (14 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1290 *cmds++ = (HLSQ_TP_MIPMAP_BASE << CP_LOADSTATE_STATETYPE_SHIFT)
1291 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1292 *cmds++ = shadow->gmemshadow.gpuaddr;
1293 *cmds++ = 0x00000000;
1294 *cmds++ = 0x00000000;
1295 *cmds++ = 0x00000000;
1296 *cmds++ = 0x00000000;
1297 *cmds++ = 0x00000000;
1298 *cmds++ = 0x00000000;
1299 *cmds++ = 0x00000000;
1300 *cmds++ = 0x00000000;
1301 *cmds++ = 0x00000000;
1302 *cmds++ = 0x00000000;
1303 *cmds++ = 0x00000000;
1304 *cmds++ = 0x00000000;
1305 *cmds++ = 0x00000000;
1306
1307 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1308 *cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
1309 /* HLSQ_VS_CONTROL_REG */
1310 *cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
1311 /* HLSQ_FS_CONTROL_REG */
1312 *cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
1313 _SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
1314 _SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
1315 /* HLSQ_CONST_VSPRESV_RANGE_REG */
1316 *cmds++ = 0x00000000;
1317 /* HLSQ_CONST_FSPRESV_RANGE_REG */
1318 *cmds++ = 0x00000000;
1319
1320 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1321 *cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
1322 /* SP_FS_LENGTH_REG */
1323 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
1324
1325 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
1326 *cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
1327 /* SP_VS_CTRL_REG0 */
1328 *cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
1329 _SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1330 _SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
1331 _SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
1332 _SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
1333 _SET(SP_VSCTRLREG0_VSLENGTH, 1);
1334 /* SP_VS_CTRL_REG1 */
1335 *cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
1336 /* SP_VS_PARAM_REG */
1337 *cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
1338 _SET(SP_VSPARAMREG_PSIZEREGID, 252) |
1339 _SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
1340 /* SP_VS_OUT_REG0 */
1341 *cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
1342 /* SP_VS_OUT_REG1 */
1343 *cmds++ = 0x00000000;
1344 /* SP_VS_OUT_REG2 */
1345 *cmds++ = 0x00000000;
1346 /* SP_VS_OUT_REG3 */
1347 *cmds++ = 0x00000000;
1348 /* SP_VS_OUT_REG4 */
1349 *cmds++ = 0x00000000;
1350 /* SP_VS_OUT_REG5 */
1351 *cmds++ = 0x00000000;
1352 /* SP_VS_OUT_REG6 */
1353 *cmds++ = 0x00000000;
1354 /* SP_VS_OUT_REG7 */
1355 *cmds++ = 0x00000000;
1356
1357 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
1358 *cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
1359 /* SP_VS_VPC_DST_REG0 */
1360 *cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
1361 /* SP_VS_VPC_DST_REG1 */
1362 *cmds++ = 0x00000000;
1363 /* SP_VS_VPC_DST_REG2 */
1364 *cmds++ = 0x00000000;
1365 /* SP_VS_VPC_DST_REG3 */
1366 *cmds++ = 0x00000000;
1367 /* SP_VS_OBJ_OFFSET_REG */
1368 *cmds++ = 0x00000000;
1369 /* SP_VS_OBJ_START_REG */
1370 *cmds++ = 0x00000000;
1371
1372 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
1373 *cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
1374 /* SP_VS_LENGTH_REG */
1375 *cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
1376 /* SP_FS_CTRL_REG0 */
1377 *cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
1378 _SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
1379 _SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
1380 _SET(SP_FSCTRLREG0_FSFULLREGFOOTPRINT, 2) |
1381 _SET(SP_FSCTRLREG0_FSINOUTREGOVERLAP, 1) |
1382 _SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
1383 _SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
1384 _SET(SP_FSCTRLREG0_FSLENGTH, 2);
1385 /* SP_FS_CTRL_REG1 */
1386 *cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
1387 _SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
1388 _SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
1389 /* SP_FS_OBJ_OFFSET_REG */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001390 *cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
1391 _SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 1);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001392 /* SP_FS_OBJ_START_REG */
1393 *cmds++ = 0x00000000;
1394
1395 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1396 *cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
1397 /* SP_FS_FLAT_SHAD_MODE_REG0 */
1398 *cmds++ = 0x00000000;
1399 /* SP_FS_FLAT_SHAD_MODE_REG1 */
1400 *cmds++ = 0x00000000;
1401
1402 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1403 *cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
1404 /* SP_FS_OUT_REG */
1405 *cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
1406
Jordan Crousea7ec4212012-02-04 10:23:52 -07001407 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001408 *cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
1409 /* SP_FS_MRT_REG0 */
1410 *cmds++ = _SET(SP_FSMRTREG_REGID, 4);
1411 /* SP_FS_MRT_REG1 */
1412 *cmds++ = 0;
1413 /* SP_FS_MRT_REG2 */
1414 *cmds++ = 0;
1415 /* SP_FS_MRT_REG3 */
1416 *cmds++ = 0;
1417
1418 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
1419 *cmds++ = CP_REG(A3XX_VPC_ATTR);
1420 /* VPC_ATTR */
1421 *cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
1422 _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
1423 _SET(VPC_VPCATTR_LMSIZE, 1);
1424 /* VPC_PACK */
1425 *cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
1426 _SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
1427 /* VPC_VARYING_INTERP_MODE_0 */
1428 *cmds++ = 0x00000000;
1429 /* VPC_VARYING_INTERP_MODE1 */
1430 *cmds++ = 0x00000000;
1431 /* VPC_VARYING_INTERP_MODE2 */
1432 *cmds++ = 0x00000000;
1433 /* VPC_VARYING_IINTERP_MODE3 */
1434 *cmds++ = 0x00000000;
1435 /* VPC_VARRYING_PS_REPL_MODE_0 */
1436 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1437 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1438 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1439 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1440 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1441 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1442 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1443 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1444 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1445 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1446 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1447 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1448 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1449 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1450 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1451 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1452 /* VPC_VARRYING_PS_REPL_MODE_1 */
1453 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1454 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1455 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1456 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1457 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1458 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1459 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1460 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1461 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1462 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1463 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1464 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1465 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1466 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1467 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1468 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1469 /* VPC_VARRYING_PS_REPL_MODE_2 */
1470 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1471 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1472 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1473 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1474 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1475 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1476 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1477 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1478 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1479 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1480 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1481 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1482 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1483 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1484 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1485 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1486 /* VPC_VARRYING_PS_REPL_MODE_3 */
1487 *cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
1488 _SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
1489 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
1490 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0B, 2) |
1491 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0C, 1) |
1492 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0D, 2) |
1493 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0E, 1) |
1494 _SET(VPC_VPCVARPSREPLMODE_COMPONENT0F, 2) |
1495 _SET(VPC_VPCVARPSREPLMODE_COMPONENT10, 1) |
1496 _SET(VPC_VPCVARPSREPLMODE_COMPONENT11, 2) |
1497 _SET(VPC_VPCVARPSREPLMODE_COMPONENT12, 1) |
1498 _SET(VPC_VPCVARPSREPLMODE_COMPONENT13, 2) |
1499 _SET(VPC_VPCVARPSREPLMODE_COMPONENT14, 1) |
1500 _SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
1501 _SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
1502 _SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
1503
Jordan Crousea7ec4212012-02-04 10:23:52 -07001504 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001505 *cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
1506 /* SP_SP_CTRL_REG */
1507 *cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1);
1508
1509 /* Load vertex shader */
1510 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
1511 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1512 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1513 | (HLSQ_BLOCK_ID_SP_VS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1514 | (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1515 *cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1516 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1517 /* (sy)end; */
1518 *cmds++ = 0x00000000; *cmds++ = 0x13000000;
1519 /* nop; */
1520 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1521 /* nop; */
1522 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1523 /* nop; */
1524 *cmds++ = 0x00000000; *cmds++ = 0x00000000;
1525
1526 /* Load fragment shader */
1527 *cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
1528 *cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
1529 | (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
1530 | (HLSQ_BLOCK_ID_SP_FS << CP_LOADSTATE_STATEBLOCKID_SHIFT)
1531 | (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
1532 *cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
1533 | (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
1534 /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
1535 *cmds++ = 0x00002000; *cmds++ = 0x57368902;
1536 /* (rpt5)nop; */
1537 *cmds++ = 0x00000000; *cmds++ = 0x00000500;
1538 /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
1539 *cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
1540 /* (sy)mov.f32f32 r1.x, r0.x; */
1541 *cmds++ = 0x00000000; *cmds++ = 0x30044004;
1542 /* mov.f32f32 r1.y, r0.y; */
1543 *cmds++ = 0x00000001; *cmds++ = 0x20044005;
1544 /* mov.f32f32 r1.z, r0.z; */
1545 *cmds++ = 0x00000002; *cmds++ = 0x20044006;
1546 /* mov.f32f32 r1.w, r0.w; */
1547 *cmds++ = 0x00000003; *cmds++ = 0x20044007;
1548 /* end; */
1549 *cmds++ = 0x00000000; *cmds++ = 0x03000000;
1550
1551 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1552 *cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
1553 /* VFD_CONTROL_0 */
1554 *cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
1555 _SET(VFD_CTRLREG0_PACKETSIZE, 2) |
1556 _SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
1557 _SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
1558 /* VFD_CONTROL_1 */
1559 *cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
1560 _SET(VFD_CTRLREG1_REGID4VTX, 252) |
1561 _SET(VFD_CTRLREG1_REGID4INST, 252);
1562
1563 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1564 *cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
1565 /* VFD_FETCH_INSTR_0_0 */
1566 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
1567 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
1568 _SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
1569 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1570 /* VFD_FETCH_INSTR_1_0 */
1571 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1572 shadow->quad_vertices_restore.gpuaddr);
1573 /* VFD_FETCH_INSTR_0_1 */
1574 *cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
1575 _SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
1576 _SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
1577 _SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
1578 /* VFD_FETCH_INSTR_1_1 */
1579 *cmds++ = _SET(VFD_BASEADDR_BASEADDR,
1580 shadow->quad_vertices_restore.gpuaddr + 16);
1581
1582 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1583 *cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
1584 /* VFD_DECODE_INSTR_0 */
1585 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1586 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1587 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
1588 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
1589 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
1590 _SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
1591 /* VFD_DECODE_INSTR_1 */
1592 *cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
1593 _SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
1594 _SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
1595 _SET(VFD_DECODEINSTRUCTIONS_REGID, 4) |
1596 _SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 12) |
1597 _SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1);
1598
1599 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1600 *cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
1601 /* RB_DEPTH_CONTROL */
1602 *cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
1603
1604 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1605 *cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
1606 /* RB_STENCIL_CONTROL */
1607 *cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
1608 _SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
1609 _SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
1610 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL, RB_STENCIL_KEEP) |
1611 _SET(RB_STENCILCONTROL_STENCIL_FUNC_BF, RB_REF_NEVER) |
1612 _SET(RB_STENCILCONTROL_STENCIL_FAIL_BF, RB_STENCIL_KEEP) |
1613 _SET(RB_STENCILCONTROL_STENCIL_ZPASS_BF, RB_STENCIL_KEEP) |
1614 _SET(RB_STENCILCONTROL_STENCIL_ZFAIL_BF, RB_STENCIL_KEEP);
1615
1616 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1617 *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
1618 /* RB_MODE_CONTROL */
1619 *cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
1620 _SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
1621
1622 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1623 *cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
1624 /* RB_RENDER_CONTROL */
1625 *cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
1626 _SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
1627
1628 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1629 *cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
1630 /* RB_MSAA_CONTROL */
1631 *cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
1632 _SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
1633
1634 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1635 *cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
1636 /* RB_MRT_CONTROL0 */
1637 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1638 _SET(RB_MRTCONTROL_ROP_CODE, 12) |
1639 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
1640 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1641
1642 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1643 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
1644 /* RB_MRT_BLENDCONTROL0 */
1645 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1646 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1647 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1648 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1649 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1650 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1651 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1652 /* RB_MRT_CONTROL1 */
1653 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1654 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1655 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1656
1657 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1658 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
1659 /* RB_MRT_BLENDCONTROL1 */
1660 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1661 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1662 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1663 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1664 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1665 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1666 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1667 /* RB_MRT_CONTROL2 */
1668 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1669 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1670 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1671
1672 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1673 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
1674 /* RB_MRT_BLENDCONTROL2 */
1675 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1676 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1677 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1678 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1679 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1680 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1681 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1682 /* RB_MRT_CONTROL3 */
1683 *cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
1684 _SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
1685 _SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
1686
1687 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1688 *cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
1689 /* RB_MRT_BLENDCONTROL3 */
1690 *cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
1691 _SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1692 _SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
1693 _SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
1694 _SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
1695 _SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
1696 _SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
1697
1698 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1699 *cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
1700 /* VFD_INDEX_MIN */
1701 *cmds++ = 0x00000000;
1702 /* VFD_INDEX_MAX */
1703 *cmds++ = 0xFFFFFFFF;
1704 /* VFD_INDEX_OFFSET */
1705 *cmds++ = 0x00000000;
1706 /* TPL1_TP_VS_TEX_OFFSET */
1707 *cmds++ = 0x00000000;
1708
1709 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1710 *cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
1711 /* VFD_VS_THREADING_THRESHOLD */
1712 *cmds++ = _SET(VFD_THREADINGTHRESHOLD_RESERVED6, 12) |
1713 _SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
1714
1715 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1716 *cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
1717 /* TPL1_TP_VS_TEX_OFFSET */
1718 *cmds++ = 0x00000000;
1719
1720 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1721 *cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
1722 /* TPL1_TP_FS_TEX_OFFSET */
1723 *cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
1724 _SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
1725 _SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
1726
1727 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1728 *cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
1729 /* GRAS_SC_CONTROL */
Tarun Karra7e8e1cf2012-02-06 18:23:19 -08001730 /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
1731 *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
1732 *cmds++ = 0x04001000;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07001733
1734 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1735 *cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
1736 /* GRAS_SU_MODE_CONTROL */
1737 *cmds++ = 0x00000000;
1738
1739 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1740 *cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
1741 /* GRAS_SC_WINDOW_SCISSOR_TL */
1742 *cmds++ = 0x00000000;
1743 /* GRAS_SC_WINDOW_SCISSOR_BR */
1744 *cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
1745 _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
1746
1747 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1748 *cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
1749 /* GRAS_SC_SCREEN_SCISSOR_TL */
1750 *cmds++ = 0x00000000;
1751 /* GRAS_SC_SCREEN_SCISSOR_BR */
1752 *cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
1753 _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
1754
1755 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
1756 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
1757 /* GRAS_CL_VPORT_XOFFSET */
1758 *cmds++ = 0x00000000;
1759 /* GRAS_CL_VPORT_XSCALE */
1760 *cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
1761 /* GRAS_CL_VPORT_YOFFSET */
1762 *cmds++ = 0x00000000;
1763 /* GRAS_CL_VPORT_YSCALE */
1764 *cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
1765
1766 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
1767 *cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
1768 /* GRAS_CL_VPORT_ZOFFSET */
1769 *cmds++ = 0x00000000;
1770 /* GRAS_CL_VPORT_ZSCALE */
1771 *cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
1772
1773 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1774 *cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
1775 /* GRAS_CL_CLIP_CNTL */
1776 *cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
1777
1778 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1779 *cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
1780 /* SP_FS_IMAGE_OUTPUT_REG_0 */
1781 *cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
1782
1783 *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1784 *cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
1785 /* PC_PRIM_VTX_CONTROL */
1786 *cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
1787 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
1788 PC_DRAW_TRIANGLES) |
1789 _SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
1790 PC_DRAW_TRIANGLES) |
1791 _SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
1792
1793 *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
1794 *cmds++ = 0x00000000; /* Viz query info */
1795 *cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
1796 PC_DI_SRC_SEL_AUTO_INDEX,
1797 PC_DI_INDEX_SIZE_16_BIT,
1798 PC_DI_IGNORE_VISIBILITY);
1799 *cmds++ = 0x00000002; /* Num indices */
1800
1801 /* Create indirect buffer command for above command sequence */
1802 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
1803
1804 return cmds;
1805}
1806
1807static void build_regrestore_cmds(struct adreno_device *adreno_dev,
1808 struct adreno_context *drawctxt)
1809{
1810 unsigned int *start = tmp_ctx.cmd;
1811 unsigned int *cmd = start;
1812 unsigned int *lcc_start;
1813
1814 int i;
1815
1816 /* Flush HLSQ lazy updates */
1817 *cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
1818 *cmd++ = 0x7; /* HLSQ_FLUSH */
1819 *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
1820 *cmd++ = 0;
1821
1822 *cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
1823 *cmd++ = 0x00000000; /* No start addr for full invalidate */
1824 *cmd++ = (unsigned int)
1825 UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
1826 UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
1827 0; /* No end addr for full invalidate */
1828
1829 lcc_start = cmd;
1830
1831 /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
1832 cmd++;
1833
1834#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1835 /* Force mismatch */
1836 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
1837#else
1838 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1839#endif
1840
1841 for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
1842 cmd = reg_range(cmd, context_register_ranges[i * 2],
1843 context_register_ranges[i * 2 + 1]);
1844 }
1845
1846 lcc_start[0] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT,
1847 (cmd - lcc_start) - 1);
1848
1849#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1850 lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
1851#else
1852 lcc_start[2] |= (1 << 24) | (4 << 16);
1853#endif
1854
1855 for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
1856 *cmd++ = cp_type0_packet(global_registers[i], 1);
1857 tmp_ctx.reg_values[i] = virt2gpu(cmd, &drawctxt->gpustate);
1858 *cmd++ = 0x00000000;
1859 }
1860
1861 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1862 tmp_ctx.cmd = cmd;
1863}
1864
1865static void build_constantrestore_cmds(struct adreno_device *adreno_dev,
1866 struct adreno_context *drawctxt)
1867{
1868 unsigned int *cmd = tmp_ctx.cmd;
1869 unsigned int *start = cmd;
1870 unsigned int mode = 4; /* Indirect mode */
1871 unsigned int stateblock;
1872 unsigned int numunits;
1873 unsigned int statetype;
1874
1875 drawctxt->cond_execs[2].hostptr = cmd;
1876 drawctxt->cond_execs[2].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1877 *cmd++ = 0;
1878 drawctxt->cond_execs[3].hostptr = cmd;
1879 drawctxt->cond_execs[3].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
1880 *cmd++ = 0;
1881
1882#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1883 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1884 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1885 *cmd++ = 4 << 16;
1886 *cmd++ = 0x0;
1887#endif
1888 /* HLSQ full update */
1889 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
1890 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
1891 *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
1892
1893#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1894 /* Re-enable shadowing */
1895 *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
1896 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
1897 *cmd++ = (4 << 16) | (1 << 24);
1898 *cmd++ = 0x0;
1899#endif
1900
1901 /* Load vertex shader constants */
1902 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1903 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1904 *cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
1905 *cmd++ = 0x0000ffff;
1906 *cmd++ = 3; /* EXEC_COUNT */
1907 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1908 drawctxt->constant_load_commands[0].hostptr = cmd;
1909 drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
1910 &drawctxt->gpustate);
1911
1912 /*
1913 From fixup:
1914
1915 mode = 4 (indirect)
1916 stateblock = 4 (Vertex constants)
1917 numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
1918
1919 From register spec:
1920 SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1921
1922 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1923 */
1924
1925 *cmd++ = 0; /* ord1 */
1926 *cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
1927
1928 /* Load fragment shader constants */
1929 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
1930 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1931 *cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
1932 *cmd++ = 0x0000ffff;
1933 *cmd++ = 3; /* EXEC_COUNT */
1934 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1935 drawctxt->constant_load_commands[1].hostptr = cmd;
1936 drawctxt->constant_load_commands[1].gpuaddr =
1937 virt2gpu(cmd, &drawctxt->gpustate);
1938 /*
1939 From fixup:
1940
1941 mode = 4 (indirect)
1942 stateblock = 6 (Fragment constants)
1943 numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
1944
1945 From register spec:
1946 SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
1947
1948 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
1949 */
1950
1951 *cmd++ = 0; /* ord1 */
1952 drawctxt->constant_load_commands[2].hostptr = cmd;
1953 drawctxt->constant_load_commands[2].gpuaddr =
1954 virt2gpu(cmd, &drawctxt->gpustate);
1955 /*
1956 From fixup:
1957 base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
1958 offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
1959
1960 From register spec:
1961 SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
1962 start offset in on chip RAM,
1963 128bit aligned
1964
1965 ord2 = base + offset | 1
1966 Because of the base alignment we can use
1967 ord2 = base | offset | 1
1968 */
1969 *cmd++ = 0; /* ord2 */
1970
1971 /* Restore VS texture memory objects */
1972 stateblock = 0;
1973 statetype = 1;
1974 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
1975
1976 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1977 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1978 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
1979 & 0xfffffffc) | statetype;
1980
1981 /* Restore VS texture mipmap addresses */
1982 stateblock = 1;
1983 statetype = 1;
1984 numunits = TEX_SIZE_MIPMAP / 4;
1985 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1986 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1987 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
1988 & 0xfffffffc) | statetype;
1989
1990 /* Restore VS texture sampler objects */
1991 stateblock = 0;
1992 statetype = 0;
1993 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
1994 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
1995 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
1996 *cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
1997 & 0xfffffffc) | statetype;
1998
1999 /* Restore FS texture memory objects */
2000 stateblock = 2;
2001 statetype = 1;
2002 numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
2003 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2004 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2005 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
2006 & 0xfffffffc) | statetype;
2007
2008 /* Restore FS texture mipmap addresses */
2009 stateblock = 3;
2010 statetype = 1;
2011 numunits = TEX_SIZE_MIPMAP / 4;
2012 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2013 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2014 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
2015 & 0xfffffffc) | statetype;
2016
2017 /* Restore FS texture sampler objects */
2018 stateblock = 2;
2019 statetype = 0;
2020 numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
2021 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2022 *cmd++ = (numunits << 22) | (stateblock << 19) | (mode << 16);
2023 *cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_SAMPLER_OBJ)
2024 & 0xfffffffc) | statetype;
2025
2026 create_ib1(drawctxt, drawctxt->constant_restore, start, cmd);
2027 tmp_ctx.cmd = cmd;
2028}
2029
2030static void build_shader_restore_cmds(struct adreno_device *adreno_dev,
2031 struct adreno_context *drawctxt)
2032{
2033 unsigned int *cmd = tmp_ctx.cmd;
2034 unsigned int *start = cmd;
2035
2036 /* Vertex shader */
2037 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2038 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2039 *cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
2040 *cmd++ = 1;
2041 *cmd++ = 3; /* EXEC_COUNT */
2042
2043 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2044 drawctxt->shader_load_commands[0].hostptr = cmd;
2045 drawctxt->shader_load_commands[0].gpuaddr =
2046 virt2gpu(cmd, &drawctxt->gpustate);
2047 /*
2048 From fixup:
2049
2050 mode = 4 (indirect)
2051 stateblock = 4 (Vertex shader)
2052 numunits = SP_VS_CTRL_REG0.VS_LENGTH
2053
2054 From regspec:
2055 SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
2056 If bit31 is 1, it means overflow
2057 or any long shader.
2058
2059 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2060 */
2061 *cmd++ = 0; /*ord1 */
2062 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
2063
2064 /* Fragment shader */
2065 *cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
2066 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2067 *cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
2068 *cmd++ = 1;
2069 *cmd++ = 3; /* EXEC_COUNT */
2070
2071 *cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
2072 drawctxt->shader_load_commands[1].hostptr = cmd;
2073 drawctxt->shader_load_commands[1].gpuaddr =
2074 virt2gpu(cmd, &drawctxt->gpustate);
2075 /*
2076 From fixup:
2077
2078 mode = 4 (indirect)
2079 stateblock = 6 (Fragment shader)
2080 numunits = SP_FS_CTRL_REG0.FS_LENGTH
2081
2082 From regspec:
2083 SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
2084 If bit31 is 1, it means overflow
2085 or any long shader.
2086
2087 ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
2088 */
2089 *cmd++ = 0; /*ord1 */
2090 *cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
2091 + (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
2092
2093 create_ib1(drawctxt, drawctxt->shader_restore, start, cmd);
2094 tmp_ctx.cmd = cmd;
2095}
2096
2097static void build_hlsqcontrol_restore_cmds(struct adreno_device *adreno_dev,
2098 struct adreno_context *drawctxt)
2099{
2100 unsigned int *cmd = tmp_ctx.cmd;
2101 unsigned int *start = cmd;
2102
2103 *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
2104 *cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
2105 drawctxt->hlsqcontrol_restore_commands[0].hostptr = cmd;
2106 drawctxt->hlsqcontrol_restore_commands[0].gpuaddr
2107 = virt2gpu(cmd, &drawctxt->gpustate);
2108 *cmd++ = 0;
2109
2110 /* Create indirect buffer command for above command sequence */
2111 create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
2112
2113 tmp_ctx.cmd = cmd;
2114}
2115
2116/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
2117static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
2118 struct adreno_context *drawctxt)
2119{
2120 unsigned int *cmd = tmp_ctx.cmd;
2121 unsigned int *start = cmd;
2122
2123#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
2124 /* Save shader sizes */
2125 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2126 *cmd++ = A3XX_SP_VS_CTRL_REG0;
2127 *cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
2128
2129 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2130 *cmd++ = A3XX_SP_FS_CTRL_REG0;
2131 *cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
2132
2133 /* Save constant sizes */
2134 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2135 *cmd++ = A3XX_SP_VS_CTRL_REG1;
2136 *cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
2137
2138 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2139 *cmd++ = A3XX_SP_FS_CTRL_REG1;
2140 *cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
2141
2142 /* Save constant offsets */
2143 *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
2144 *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
2145 *cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
2146#else
2147 /* Save shader sizes */
2148 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
2149 30, (4 << 19) | (4 << 16),
2150 drawctxt->shader_load_commands[0].gpuaddr);
2151
2152 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x7f000000,
2153 30, (6 << 19) | (4 << 16),
2154 drawctxt->shader_load_commands[1].gpuaddr);
2155
2156 /* Save constant sizes */
2157 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2158 23, (4 << 19) | (4 << 16),
2159 drawctxt->constant_load_commands[0].gpuaddr);
2160
2161 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2162 23, (6 << 19) | (4 << 16),
2163 drawctxt->constant_load_commands[1].gpuaddr);
2164
2165 /* Modify constant restore conditionals */
2166 cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2167 0, 0, drawctxt->cond_execs[2].gpuaddr);
2168
2169 cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
2170 0, 0, drawctxt->cond_execs[3].gpuaddr);
2171
2172 /* Save fragment constant shadow offset */
2173 cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
2174 18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
2175 drawctxt->constant_load_commands[2].gpuaddr);
2176#endif
2177
2178 /* Use mask value to avoid flushing HLSQ which would cause the HW to
2179 discard all the shader data */
2180
2181 cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
2182 0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
2183
2184 create_ib1(drawctxt, drawctxt->restore_fixup, start, cmd);
2185
2186 tmp_ctx.cmd = cmd;
2187}
2188
2189static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
2190 struct adreno_context *drawctxt)
2191{
2192 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
2193
2194 build_regrestore_cmds(adreno_dev, drawctxt);
2195 build_constantrestore_cmds(adreno_dev, drawctxt);
2196 build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
2197 build_regconstantsave_cmds(adreno_dev, drawctxt);
2198 build_shader_save_cmds(adreno_dev, drawctxt);
2199 build_shader_restore_cmds(adreno_dev, drawctxt);
2200 build_restore_fixup_cmds(adreno_dev, drawctxt);
2201 build_save_fixup_cmds(adreno_dev, drawctxt);
2202
2203 return 0;
2204}
2205
2206/* create buffers for saving/restoring registers, constants, & GMEM */
2207static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
2208 struct adreno_context *drawctxt)
2209{
Jordan Crousea7ec4212012-02-04 10:23:52 -07002210 int result;
2211
Jordan Crouse7501d452012-04-19 08:58:44 -06002212 calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
2213 tmp_ctx.gmem_base = adreno_dev->gmem_base;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002214
Jordan Crousea7ec4212012-02-04 10:23:52 -07002215 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
2216 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002217
Jordan Crousea7ec4212012-02-04 10:23:52 -07002218 if (result)
2219 return result;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002220
2221 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
2222 &tmp_ctx.cmd);
2223
2224 /* Dow we need to idle? */
2225 /* adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT); */
2226
2227 tmp_ctx.cmd = build_gmem2sys_cmds(adreno_dev, drawctxt,
2228 &drawctxt->context_gmem_shadow);
2229 tmp_ctx.cmd = build_sys2gmem_cmds(adreno_dev, drawctxt,
2230 &drawctxt->context_gmem_shadow);
2231
2232 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
2233 KGSL_CACHE_OP_FLUSH);
2234
Jordan Crousea7ec4212012-02-04 10:23:52 -07002235 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
2236
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002237 return 0;
2238}
2239
2240static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
2241 struct adreno_context *drawctxt)
2242{
2243 int ret;
2244
2245 /*
2246 * Allocate memory for the GPU state and the context commands.
2247 * Despite the name, this is much more then just storage for
2248 * the gpustate. This contains command space for gmem save
2249 * and texture and vertex buffer storage too
2250 */
2251
2252 ret = kgsl_allocate(&drawctxt->gpustate,
2253 drawctxt->pagetable, CONTEXT_SIZE);
2254
2255 if (ret)
2256 return ret;
2257
2258 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
2259 tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
2260
2261 if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
2262 ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
2263 if (ret)
2264 goto done;
2265
2266 drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
2267 }
2268
2269 if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
2270 ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
2271
2272done:
2273 if (ret)
2274 kgsl_sharedmem_free(&drawctxt->gpustate);
2275
2276 return ret;
2277}
2278
2279static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
2280 struct adreno_context *context)
2281{
2282 struct kgsl_device *device = &adreno_dev->dev;
2283
2284 if (context == NULL)
2285 return;
2286
2287 if (context->flags & CTXT_FLAGS_GPU_HANG)
2288 KGSL_CTXT_WARN(device,
2289 "Current active context has caused gpu hang\n");
2290
2291 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
2292 /* Fixup self modifying IBs for save operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002293 adreno_ringbuffer_issuecmds(device, context,
2294 KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002295
2296 /* save registers and constants. */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002297 adreno_ringbuffer_issuecmds(device, context,
2298 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002299 context->regconstant_save, 3);
2300
2301 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
2302 /* Save shader instructions */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002303 adreno_ringbuffer_issuecmds(device, context,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002304 KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
2305
2306 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
2307 }
2308 }
2309
2310 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
2311 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
2312 /*
2313 * Save GMEM (note: changes shader. shader must
2314 * already be saved.)
2315 */
2316
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002317 adreno_ringbuffer_issuecmds(device, context,
2318 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002319 context->context_gmem_shadow.
2320 gmem_save, 3);
2321 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
2322 }
2323}
2324
2325static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
2326 struct adreno_context *context)
2327{
2328 struct kgsl_device *device = &adreno_dev->dev;
2329 unsigned int cmds[5];
2330
2331 if (context == NULL) {
2332 /* No context - set the default pagetable and thats it */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002333 kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
2334 adreno_dev->drawctxt_active->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002335 return;
2336 }
2337
2338 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
2339
2340 cmds[0] = cp_nop_packet(1);
2341 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
2342 cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
2343 cmds[3] = device->memstore.gpuaddr +
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002344 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
2345 cmds[4] = context->id;
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002346 adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
2347 cmds, 5);
2348 kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002349
2350 /*
2351 * Restore GMEM. (note: changes shader.
2352 * Shader must not already be restored.)
2353 */
2354
2355 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002356 adreno_ringbuffer_issuecmds(device, context,
2357 KGSL_CMD_FLAGS_PMODE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002358 context->context_gmem_shadow.
2359 gmem_restore, 3);
2360 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
2361 }
2362
2363 if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002364 adreno_ringbuffer_issuecmds(device, context,
2365 KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002366
2367 /* Fixup self modifying IBs for restore operations */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002368 adreno_ringbuffer_issuecmds(device, context,
2369 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002370 context->restore_fixup, 3);
2371
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002372 adreno_ringbuffer_issuecmds(device, context,
2373 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002374 context->constant_restore, 3);
2375
2376 if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002377 adreno_ringbuffer_issuecmds(device, context,
2378 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002379 context->shader_restore, 3);
2380
2381 /* Restore HLSQ_CONTROL_0 register */
Shubhraprakash Dasb2abc452012-06-08 16:33:03 -06002382 adreno_ringbuffer_issuecmds(device, context,
2383 KGSL_CMD_FLAGS_NONE,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002384 context->hlsqcontrol_restore, 3);
2385 }
2386}
2387
2388static void a3xx_rb_init(struct adreno_device *adreno_dev,
2389 struct adreno_ringbuffer *rb)
2390{
2391 unsigned int *cmds, cmds_gpu;
2392 cmds = adreno_ringbuffer_allocspace(rb, 18);
2393 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
2394
2395 GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
2396 GSL_RB_WRITE(cmds, cmds_gpu, 0x000003f7);
2397 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2398 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2399 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2400 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000080);
2401 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000100);
2402 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000180);
2403 GSL_RB_WRITE(cmds, cmds_gpu, 0x00006600);
2404 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000150);
2405 GSL_RB_WRITE(cmds, cmds_gpu, 0x0000014e);
2406 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000154);
2407 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
2408 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2409 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2410 /* Protected mode control - turned off for A3XX */
2411 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2412 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2413 GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
2414
2415 adreno_ringbuffer_submit(rb);
2416}
2417
2418static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
2419{
2420 struct kgsl_device *device = &adreno_dev->dev;
2421 const char *err = "";
2422
2423 switch (bit) {
2424 case A3XX_INT_RBBM_AHB_ERROR: {
2425 unsigned int reg;
2426
2427 adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
2428
2429 /*
2430 * Return the word address of the erroring register so that it
2431 * matches the register specification
2432 */
2433
2434 KGSL_DRV_CRIT(device,
2435 "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
2436 reg & (1 << 28) ? "WRITE" : "READ",
2437 (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
2438 (reg >> 24) & 0x3);
2439
2440 /* Clear the error */
2441 adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
2442 return;
2443 }
2444 case A3XX_INT_RBBM_REG_TIMEOUT:
2445 err = "RBBM: AHB register timeout";
2446 break;
2447 case A3XX_INT_RBBM_ME_MS_TIMEOUT:
2448 err = "RBBM: ME master split timeout";
2449 break;
2450 case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
2451 err = "RBBM: PFP master split timeout";
2452 break;
2453 case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
2454 err = "RBBM: ATB bus oveflow";
2455 break;
2456 case A3XX_INT_VFD_ERROR:
2457 err = "VFD: Out of bounds access";
2458 break;
2459 case A3XX_INT_CP_T0_PACKET_IN_IB:
2460 err = "ringbuffer TO packet in IB interrupt";
2461 break;
2462 case A3XX_INT_CP_OPCODE_ERROR:
2463 err = "ringbuffer opcode error interrupt";
2464 break;
2465 case A3XX_INT_CP_RESERVED_BIT_ERROR:
2466 err = "ringbuffer reserved bit error interrupt";
2467 break;
2468 case A3XX_INT_CP_HW_FAULT:
2469 err = "ringbuffer hardware fault";
2470 break;
2471 case A3XX_INT_CP_REG_PROTECT_FAULT:
2472 err = "ringbuffer protected mode error interrupt";
2473 break;
2474 case A3XX_INT_CP_AHB_ERROR_HALT:
2475 err = "ringbuffer AHB error interrupt";
2476 break;
2477 case A3XX_INT_MISC_HANG_DETECT:
2478 err = "MISC: GPU hang detected";
2479 break;
2480 case A3XX_INT_UCHE_OOB_ACCESS:
2481 err = "UCHE: Out of bounds access";
2482 break;
2483 }
2484
2485 KGSL_DRV_CRIT(device, "%s\n", err);
2486 kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
2487}
2488
2489static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
2490{
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002491 struct kgsl_device *device = &adreno_dev->dev;
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002492
2493 if (irq == A3XX_INT_CP_RB_INT) {
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002494 unsigned int context_id;
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002495 kgsl_sharedmem_readl(&device->memstore, &context_id,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002496 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
2497 current_context));
2498 if (context_id < KGSL_MEMSTORE_MAX) {
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002499 kgsl_sharedmem_writel(&device->memstore,
Carter Cooper7e7f02e2012-02-15 09:36:31 -07002500 KGSL_MEMSTORE_OFFSET(context_id,
2501 ts_cmp_enable), 0);
2502 wmb();
2503 }
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002504 KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002505 }
2506
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002507 wake_up_interruptible_all(&device->wait_queue);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002508
2509 /* Schedule work to free mem and issue ibs */
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002510 queue_work(device->work_queue, &device->ts_expired_ws);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002511
Carter Cooperd7b1aba2012-06-05 11:59:23 -06002512 atomic_notifier_call_chain(&device->ts_notifier_list,
2513 device->id, NULL);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002514}
2515
2516#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
2517
2518#define A3XX_INT_MASK \
2519 ((1 << A3XX_INT_RBBM_AHB_ERROR) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002520 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002521 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \
2522 (1 << A3XX_INT_CP_OPCODE_ERROR) | \
2523 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
2524 (1 << A3XX_INT_CP_HW_FAULT) | \
2525 (1 << A3XX_INT_CP_IB1_INT) | \
2526 (1 << A3XX_INT_CP_IB2_INT) | \
2527 (1 << A3XX_INT_CP_RB_INT) | \
2528 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \
2529 (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002530 (1 << A3XX_INT_UCHE_OOB_ACCESS))
2531
2532static struct {
2533 void (*func)(struct adreno_device *, int);
2534} a3xx_irq_funcs[] = {
2535 A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
2536 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
2537 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
2538 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
2539 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
2540 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
2541 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
2542 A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
2543 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
2544 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
2545 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
2546 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
2547 A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
2548 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
2549 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
2550 A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
2551 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
2552 A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
2553 A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
2554 A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
2555 A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
2556 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
2557 A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
2558 A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002559 A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002560 A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
2561 /* 26 to 31 - Unused */
2562};
2563
2564static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
2565{
2566 struct kgsl_device *device = &adreno_dev->dev;
2567 irqreturn_t ret = IRQ_NONE;
2568 unsigned int status, tmp;
2569 int i;
2570
2571 adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
2572
2573 for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
2574 if (tmp & 1) {
2575 if (a3xx_irq_funcs[i].func != NULL) {
2576 a3xx_irq_funcs[i].func(adreno_dev, i);
2577 ret = IRQ_HANDLED;
2578 } else {
2579 KGSL_DRV_CRIT(device,
2580 "Unhandled interrupt bit %x\n", i);
2581 }
2582 }
2583
2584 tmp >>= 1;
2585 }
2586
Carter Cooperb769c912012-04-13 08:16:35 -06002587 trace_kgsl_a3xx_irq_status(device, status);
2588
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002589 if (status)
2590 adreno_regwrite(&adreno_dev->dev, A3XX_RBBM_INT_CLEAR_CMD,
2591 status);
2592 return ret;
2593}
2594
2595static void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
2596{
2597 struct kgsl_device *device = &adreno_dev->dev;
2598
Wei Zou08a7e572012-06-03 22:05:46 -07002599 if (state)
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002600 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, A3XX_INT_MASK);
Wei Zou08a7e572012-06-03 22:05:46 -07002601 else
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002602 adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
2603}
2604
2605static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
2606{
2607 struct kgsl_device *device = &adreno_dev->dev;
2608 unsigned int reg, val;
2609
2610 /* Freeze the counter */
2611 adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
2612 reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2613 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2614
2615 /* Read the value */
2616 adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
2617
2618 /* Reset the counter */
2619 reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
2620 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2621
2622 /* Re-enable the counter */
2623 reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
2624 reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
2625 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
2626
2627 return val;
2628}
2629
2630static void a3xx_start(struct adreno_device *adreno_dev)
2631{
2632 struct kgsl_device *device = &adreno_dev->dev;
2633
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002634 /* Set up 16 deep read/write request queues */
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002635
Jordan Crouse563cf0f2012-02-21 08:54:53 -07002636 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
2637 adreno_regwrite(device, A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
2638 adreno_regwrite(device, A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
2639 adreno_regwrite(device, A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
2640 adreno_regwrite(device, A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303);
2641 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
2642 adreno_regwrite(device, A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
2643
2644 /* Enable WR-REQ */
2645 adreno_regwrite(device, A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x000000FF);
2646
2647 /* Set up round robin arbitration between both AXI ports */
2648 adreno_regwrite(device, A3XX_VBIF_ARB_CTL, 0x00000030);
2649
2650 /* Set up AOOO */
2651 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C);
2652 adreno_regwrite(device, A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C);
2653
Sudhakara Rao Tentu5746bde2012-03-15 12:16:32 +05302654 if (cpu_is_apq8064()) {
2655 /* Enable 1K sort */
2656 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT, 0x000000FF);
2657 adreno_regwrite(device, A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
2658 }
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002659 /* Make all blocks contribute to the GPU BUSY perf counter */
2660 adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
2661
Jordan Crousea1d43ff2012-04-09 09:37:50 -06002662 /* Tune the hystersis counters for SP and CP idle detection */
2663 adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
2664 adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
2665
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002666 /* Enable the RBBM error reporting bits. This lets us get
2667 useful information on failure */
2668
2669 adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
2670
2671 /* Enable AHB error reporting */
Wei Zou8e6dfcc2012-03-16 14:53:39 -06002672 adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002673
2674 /* Turn on the power counters */
Tarun Karra4b6bd982012-04-23 17:55:36 -07002675 adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
Jordan Crouseb6ebffe2012-02-04 10:23:53 -07002676
2677 /* Turn on hang detection - this spews a lot of useful information
2678 * into the RBBM registers on a hang */
2679
2680 adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
2681 (1 << 16) | 0xFFF);
2682
Rajeev Kulkarni7f177962012-06-22 12:09:44 -07002683 /* Enable Clock gating */
2684 adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
2685 A3XX_RBBM_CLOCK_CTL_DEFAULT);
2686
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002687}
2688
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002689/* Defined in adreno_a3xx_snapshot.c */
2690void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
2691 int *remain, int hang);
2692
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002693struct adreno_gpudev adreno_a3xx_gpudev = {
2694 .reg_rbbm_status = A3XX_RBBM_STATUS,
2695 .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
2696 .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
2697
2698 .ctxt_create = a3xx_drawctxt_create,
2699 .ctxt_save = a3xx_drawctxt_save,
2700 .ctxt_restore = a3xx_drawctxt_restore,
Shubhraprakash Das4624b552012-06-01 14:08:03 -06002701 .ctxt_draw_workaround = NULL,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002702 .rb_init = a3xx_rb_init,
2703 .irq_control = a3xx_irq_control,
2704 .irq_handler = a3xx_irq_handler,
2705 .busy_cycles = a3xx_busy_cycles,
2706 .start = a3xx_start,
Jordan Crouse0c2761a2012-02-01 22:11:12 -07002707 .snapshot = a3xx_snapshot,
Jordan Crouseb4d31bd2012-02-01 22:11:12 -07002708};