blob: 3aa601c155797b29359327e3e160fc1e7f9546f5 [file] [log] [blame]
Jordan Crousea78c9172011-07-11 13:14:09 -06001/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include "kgsl.h"
15#include "kgsl_sharedmem.h"
16#include "kgsl_cffdump.h"
17#include "adreno.h"
18
19/*
20 *
21 * Memory Map for Register, Constant & Instruction Shadow, and Command Buffers
22 * (34.5KB)
23 *
24 * +---------------------+------------+-------------+---+---------------------+
25 * | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow |
26 * +---------------------+------------+-------------+---+---------------------+
27 * ________________________________/ \____________________
28 * / |
29 * +--------------+-----------+------+-----------+------------------------+
30 * | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused |
31 * +--------------+-----------+------+-----------+------------------------+
32 *
33 * 8K - ALU Constant Shadow (8K aligned)
34 * 4K - H/W Register Shadow (8K aligned)
35 * 4K - Command and Vertex Buffers
36 * - Indirect command buffer : Const/Reg restore
37 * - includes Loop & Bool const shadows
38 * - Indirect command buffer : Const/Reg save
39 * - Quad vertices & texture coordinates
40 * - Indirect command buffer : Gmem save
41 * - Indirect command buffer : Gmem restore
42 * - Unused (padding to 8KB boundary)
43 * <1K - Texture Constant Shadow (768 bytes) (8K aligned)
44 * 18K - Shader Instruction Shadow
45 * - 6K vertex (32 byte aligned)
46 * - 6K pixel (32 byte aligned)
47 * - 6K shared (32 byte aligned)
48 *
49 * Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes
50 * 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of
51 * 16 bytes per constant. If the texture constants were transfered this way,
52 * the Command & Vertex Buffers section would extend past the 16K boundary.
53 * By moving the texture constant shadow area to start at 16KB boundary, we
54 * only require approximately 40 bytes more memory, but are able to use the
55 * LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up
56 * context switching.
57 *
58 * [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool
59 * constants would require an additional 8KB each, for alignment.]
60 *
61 */
62
63/* Constants */
64
65#define ALU_CONSTANTS 2048 /* DWORDS */
66#define NUM_REGISTERS 1024 /* DWORDS */
67#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
68#define CMD_BUFFER_LEN 9216 /* DWORDS */
69#else
70#define CMD_BUFFER_LEN 3072 /* DWORDS */
71#endif
72#define TEX_CONSTANTS (32*6) /* DWORDS */
73#define BOOL_CONSTANTS 8 /* DWORDS */
74#define LOOP_CONSTANTS 56 /* DWORDS */
75#define SHADER_INSTRUCT_LOG2 9U /* 2^n == SHADER_INSTRUCTIONS */
76
77#if defined(PM4_IM_STORE)
78/* 96-bit instructions */
79#define SHADER_INSTRUCT (1<<SHADER_INSTRUCT_LOG2)
80#else
81#define SHADER_INSTRUCT 0
82#endif
83
84/* LOAD_CONSTANT_CONTEXT shadow size */
85#define LCC_SHADOW_SIZE 0x2000 /* 8KB */
86
87#define ALU_SHADOW_SIZE LCC_SHADOW_SIZE /* 8KB */
88#define REG_SHADOW_SIZE 0x1000 /* 4KB */
89#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
90#define CMD_BUFFER_SIZE 0x9000 /* 36KB */
91#else
92#define CMD_BUFFER_SIZE 0x3000 /* 12KB */
93#endif
94#define TEX_SHADOW_SIZE (TEX_CONSTANTS*4) /* 768 bytes */
95#define SHADER_SHADOW_SIZE (SHADER_INSTRUCT*12) /* 6KB */
96
97#define REG_OFFSET LCC_SHADOW_SIZE
98#define CMD_OFFSET (REG_OFFSET + REG_SHADOW_SIZE)
99#define TEX_OFFSET (CMD_OFFSET + CMD_BUFFER_SIZE)
100#define SHADER_OFFSET ((TEX_OFFSET + TEX_SHADOW_SIZE + 32) & ~31)
101
102#define CONTEXT_SIZE (SHADER_OFFSET + 3 * SHADER_SHADOW_SIZE)
103
104/* A scratchpad used to build commands during context create */
105
106static struct tmp_ctx {
107 unsigned int *start; /* Command & Vertex buffer start */
108 unsigned int *cmd; /* Next available dword in C&V buffer */
109
110 /* address of buffers, needed when creating IB1 command buffers. */
111 uint32_t bool_shadow; /* bool constants */
112 uint32_t loop_shadow; /* loop constants */
113
114#if defined(PM4_IM_STORE)
115 uint32_t shader_shared; /* shared shader instruction shadow */
116 uint32_t shader_vertex; /* vertex shader instruction shadow */
117 uint32_t shader_pixel; /* pixel shader instruction shadow */
118#endif
119
120 /* Addresses in command buffer where separately handled registers
121 * are saved
122 */
123 uint32_t reg_values[33];
124 uint32_t chicken_restore;
125
126 uint32_t gmem_base; /* Base gpu address of GMEM */
127
128} tmp_ctx;
129
130/* context save (gmem -> sys) */
131
132/* pre-compiled vertex shader program
133*
134* attribute vec4 P;
135* void main(void)
136* {
137* gl_Position = P;
138* }
139*/
140#define GMEM2SYS_VTX_PGM_LEN 0x12
141
142static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = {
143 0x00011003, 0x00001000, 0xc2000000,
144 0x00001004, 0x00001000, 0xc4000000,
145 0x00001005, 0x00002000, 0x00000000,
146 0x1cb81000, 0x00398a88, 0x00000003,
147 0x140f803e, 0x00000000, 0xe2010100,
148 0x14000000, 0x00000000, 0xe2000000
149};
150
151/* pre-compiled fragment shader program
152*
153* precision highp float;
154* uniform vec4 clear_color;
155* void main(void)
156* {
157* gl_FragColor = clear_color;
158* }
159*/
160
161#define GMEM2SYS_FRAG_PGM_LEN 0x0c
162
163static unsigned int gmem2sys_frag_pgm[GMEM2SYS_FRAG_PGM_LEN] = {
164 0x00000000, 0x1002c400, 0x10000000,
165 0x00001003, 0x00002000, 0x00000000,
166 0x140f8000, 0x00000000, 0x22000000,
167 0x14000000, 0x00000000, 0xe2000000
168};
169
170/* context restore (sys -> gmem) */
171/* pre-compiled vertex shader program
172*
173* attribute vec4 position;
174* attribute vec4 texcoord;
175* varying vec4 texcoord0;
176* void main()
177* {
178* gl_Position = position;
179* texcoord0 = texcoord;
180* }
181*/
182
183#define SYS2GMEM_VTX_PGM_LEN 0x18
184
185static unsigned int sys2gmem_vtx_pgm[SYS2GMEM_VTX_PGM_LEN] = {
186 0x00052003, 0x00001000, 0xc2000000, 0x00001005,
187 0x00001000, 0xc4000000, 0x00001006, 0x10071000,
188 0x20000000, 0x18981000, 0x0039ba88, 0x00000003,
189 0x12982000, 0x40257b08, 0x00000002, 0x140f803e,
190 0x00000000, 0xe2010100, 0x140f8000, 0x00000000,
191 0xe2020200, 0x14000000, 0x00000000, 0xe2000000
192};
193
194/* pre-compiled fragment shader program
195*
196* precision mediump float;
197* uniform sampler2D tex0;
198* varying vec4 texcoord0;
199* void main()
200* {
201* gl_FragColor = texture2D(tex0, texcoord0.xy);
202* }
203*/
204
205#define SYS2GMEM_FRAG_PGM_LEN 0x0f
206
207static unsigned int sys2gmem_frag_pgm[SYS2GMEM_FRAG_PGM_LEN] = {
208 0x00011002, 0x00001000, 0xc4000000, 0x00001003,
209 0x10041000, 0x20000000, 0x10000001, 0x1ffff688,
210 0x00000002, 0x140f8000, 0x00000000, 0xe2000000,
211 0x14000000, 0x00000000, 0xe2000000
212};
213
214/* shader texture constants (sysmem -> gmem) */
215#define SYS2GMEM_TEX_CONST_LEN 6
216
217static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = {
218 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
219 * RFMode=ZeroClamp-1, Dim=1:2d
220 */
221 0x00000002, /* Pitch = TBD */
222
223 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
224 * NearestClamp=1:OGL Mode
225 */
226 0x00000800, /* Address[31:12] = TBD */
227
228 /* Width, Height, EndianSwap=0:None */
229 0, /* Width & Height = TBD */
230
231 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
232 * Mip=2:BaseMap
233 */
234 0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23,
235
236 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
237 * Dim3d=0
238 */
239 0,
240
241 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
242 * Dim=1:2d, MipPacking=0
243 */
244 1 << 9 /* Mip Address[31:12] = TBD */
245};
246
Jordan Crousea78c9172011-07-11 13:14:09 -0600247#define NUM_COLOR_FORMATS 13
248
249static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = {
250 FMT_4_4_4_4, /* COLORX_4_4_4_4 */
251 FMT_1_5_5_5, /* COLORX_1_5_5_5 */
252 FMT_5_6_5, /* COLORX_5_6_5 */
253 FMT_8, /* COLORX_8 */
254 FMT_8_8, /* COLORX_8_8 */
255 FMT_8_8_8_8, /* COLORX_8_8_8_8 */
256 FMT_8_8_8_8, /* COLORX_S8_8_8_8 */
257 FMT_16_FLOAT, /* COLORX_16_FLOAT */
258 FMT_16_16_FLOAT, /* COLORX_16_16_FLOAT */
259 FMT_16_16_16_16_FLOAT, /* COLORX_16_16_16_16_FLOAT */
260 FMT_32_FLOAT, /* COLORX_32_FLOAT */
261 FMT_32_32_FLOAT, /* COLORX_32_32_FLOAT */
262 FMT_32_32_32_32_FLOAT, /* COLORX_32_32_32_32_FLOAT */
263};
264
265static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = {
266 2, /* COLORX_4_4_4_4 */
267 2, /* COLORX_1_5_5_5 */
268 2, /* COLORX_5_6_5 */
269 1, /* COLORX_8 */
270 2, /* COLORX_8_8 8*/
271 4, /* COLORX_8_8_8_8 */
272 4, /* COLORX_S8_8_8_8 */
273 2, /* COLORX_16_FLOAT */
274 4, /* COLORX_16_16_FLOAT */
275 8, /* COLORX_16_16_16_16_FLOAT */
276 4, /* COLORX_32_FLOAT */
277 8, /* COLORX_32_32_FLOAT */
278 16, /* COLORX_32_32_32_32_FLOAT */
279};
280
281/* shader linkage info */
282#define SHADER_CONST_ADDR (11 * 6 + 3)
283
Jordan Crousea78c9172011-07-11 13:14:09 -0600284
285static unsigned int *program_shader(unsigned int *cmds, int vtxfrag,
286 unsigned int *shader_pgm, int dwords)
287{
288 /* load the patched vertex shader stream */
289 *cmds++ = pm4_type3_packet(PM4_IM_LOAD_IMMEDIATE, 2 + dwords);
290 /* 0=vertex shader, 1=fragment shader */
291 *cmds++ = vtxfrag;
292 /* instruction start & size (in 32-bit words) */
293 *cmds++ = ((0 << 16) | dwords);
294
295 memcpy(cmds, shader_pgm, dwords << 2);
296 cmds += dwords;
297
298 return cmds;
299}
300
301static unsigned int *reg_to_mem(unsigned int *cmds, uint32_t dst,
302 uint32_t src, int dwords)
303{
304 while (dwords-- > 0) {
305 *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
306 *cmds++ = src++;
307 *cmds++ = dst;
308 dst += 4;
309 }
310
311 return cmds;
312}
313
314#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
315
316static void build_reg_to_mem_range(unsigned int start, unsigned int end,
317 unsigned int **cmd,
318 struct adreno_context *drawctxt)
319{
320 unsigned int i = start;
321
322 for (i = start; i <= end; i++) {
323 *(*cmd)++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
324 *(*cmd)++ = i;
325 *(*cmd)++ =
326 ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) +
327 (i - 0x2000) * 4;
328 }
329}
330
331#endif
332
333/* chicken restore */
334static unsigned int *build_chicken_restore_cmds(
335 struct adreno_context *drawctxt)
336{
337 unsigned int *start = tmp_ctx.cmd;
338 unsigned int *cmds = start;
339
340 *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
341 *cmds++ = 0;
342
343 *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1);
344 tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate);
345 *cmds++ = 0x00000000;
346
347 /* create indirect buffer command for above command sequence */
348 create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds);
349
350 return cmds;
351}
352
353/****************************************************************************/
354/* context save */
355/****************************************************************************/
356
357static const unsigned int register_ranges_a20x[] = {
358 REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO,
359 REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR,
360 REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR,
361 REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET,
362 REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1,
363 REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST,
364 REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK,
365 REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK,
366 REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET,
367 REG_VGT_MAX_VTX_INDX, REG_RB_FOG_COLOR,
368 REG_RB_DEPTHCONTROL, REG_RB_MODECONTROL,
369 REG_PA_SU_POINT_SIZE, REG_PA_SC_LINE_STIPPLE,
370 REG_PA_SC_VIZ_QUERY, REG_PA_SC_VIZ_QUERY,
371 REG_VGT_VERTEX_REUSE_BLOCK_CNTL, REG_RB_DEPTH_CLEAR
372};
373
374static const unsigned int register_ranges_a22x[] = {
375 REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO,
376 REG_COHER_DEST_BASE_0, REG_PA_SC_SCREEN_SCISSOR_BR,
377 REG_PA_SC_WINDOW_OFFSET, REG_PA_SC_WINDOW_SCISSOR_BR,
378 REG_RB_STENCILREFMASK_BF, REG_PA_CL_VPORT_ZOFFSET,
379 REG_SQ_PROGRAM_CNTL, REG_SQ_WRAPPING_1,
380 REG_PA_SC_LINE_CNTL, REG_SQ_PS_CONST,
381 REG_PA_SC_AA_MASK, REG_PA_SC_AA_MASK,
382 REG_RB_SAMPLE_COUNT_CTL, REG_RB_COLOR_DEST_MASK,
383 REG_PA_SU_POLY_OFFSET_FRONT_SCALE, REG_PA_SU_POLY_OFFSET_BACK_OFFSET,
384 /* all the below registers are specific to Leia */
385 REG_LEIA_PC_MAX_VTX_INDX, REG_LEIA_PC_INDX_OFFSET,
386 REG_RB_COLOR_MASK, REG_RB_FOG_COLOR,
387 REG_RB_DEPTHCONTROL, REG_RB_COLORCONTROL,
388 REG_PA_CL_CLIP_CNTL, REG_PA_CL_VTE_CNTL,
389 REG_RB_MODECONTROL, REG_RB_SAMPLE_POS,
390 REG_PA_SU_POINT_SIZE, REG_PA_SU_LINE_CNTL,
391 REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL,
392 REG_LEIA_PC_VERTEX_REUSE_BLOCK_CNTL,
393 REG_RB_COPY_CONTROL, REG_RB_DEPTH_CLEAR
394};
395
396
397/* save h/w regs, alu constants, texture contants, etc. ...
398* requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr
399*/
400static void build_regsave_cmds(struct adreno_device *adreno_dev,
401 struct adreno_context *drawctxt)
402{
403 unsigned int *start = tmp_ctx.cmd;
404 unsigned int *cmd = start;
405
406 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
407 *cmd++ = 0;
408
409#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
410 /* Make sure the HW context has the correct register values
411 * before reading them. */
412 *cmd++ = pm4_type3_packet(PM4_CONTEXT_UPDATE, 1);
413 *cmd++ = 0;
414
415 {
416 unsigned int i = 0;
417 unsigned int reg_array_size = 0;
418 const unsigned int *ptr_register_ranges;
419
420 /* Based on chip id choose the register ranges */
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600421 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600422 ptr_register_ranges = register_ranges_a22x;
423 reg_array_size = ARRAY_SIZE(register_ranges_a22x);
424 } else {
425 ptr_register_ranges = register_ranges_a20x;
426 reg_array_size = ARRAY_SIZE(register_ranges_a20x);
427 }
428
429
430 /* Write HW registers into shadow */
431 for (i = 0; i < (reg_array_size/2) ; i++) {
432 build_reg_to_mem_range(ptr_register_ranges[i*2],
433 ptr_register_ranges[i*2+1],
434 &cmd, drawctxt);
435 }
436 }
437
438 /* Copy ALU constants */
439 cmd =
440 reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000,
441 REG_SQ_CONSTANT_0, ALU_CONSTANTS);
442
443 /* Copy Tex constants */
444 cmd =
445 reg_to_mem(cmd,
446 (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000,
447 REG_SQ_FETCH_0, TEX_CONSTANTS);
448#else
449
450 /* Insert a wait for idle packet before reading the registers.
451 * This is to fix a hang/reset seen during stress testing. In this
452 * hang, CP encountered a timeout reading SQ's boolean constant
453 * register. There is logic in the HW that blocks reading of this
454 * register when the SQ block is not idle, which we believe is
455 * contributing to the hang.*/
456 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
457 *cmd++ = 0;
458
459 /* H/w registers are already shadowed; just need to disable shadowing
460 * to prevent corruption.
461 */
462 *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
463 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
464 *cmd++ = 4 << 16; /* regs, start=0 */
465 *cmd++ = 0x0; /* count = 0 */
466
467 /* ALU constants are already shadowed; just need to disable shadowing
468 * to prevent corruption.
469 */
470 *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
471 *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
472 *cmd++ = 0 << 16; /* ALU, start=0 */
473 *cmd++ = 0x0; /* count = 0 */
474
475 /* Tex constants are already shadowed; just need to disable shadowing
476 * to prevent corruption.
477 */
478 *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
479 *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
480 *cmd++ = 1 << 16; /* Tex, start=0 */
481 *cmd++ = 0x0; /* count = 0 */
482#endif
483
484 /* Need to handle some of the registers separately */
485 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
486 *cmd++ = REG_SQ_GPR_MANAGEMENT;
487 *cmd++ = tmp_ctx.reg_values[0];
488
489 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
490 *cmd++ = REG_TP0_CHICKEN;
491 *cmd++ = tmp_ctx.reg_values[1];
492
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600493 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600494 unsigned int i;
495 unsigned int j = 2;
496 for (i = REG_LEIA_VSC_BIN_SIZE; i <=
497 REG_LEIA_VSC_PIPE_DATA_LENGTH_7; i++) {
498 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
499 *cmd++ = i;
500 *cmd++ = tmp_ctx.reg_values[j];
501 j++;
502 }
503 }
504
505 /* Copy Boolean constants */
506 cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS,
507 BOOL_CONSTANTS);
508
509 /* Copy Loop constants */
510 cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow,
511 REG_SQ_CF_LOOP, LOOP_CONSTANTS);
512
513 /* create indirect buffer command for above command sequence */
514 create_ib1(drawctxt, drawctxt->reg_save, start, cmd);
515
516 tmp_ctx.cmd = cmd;
517}
518
519/*copy colour, depth, & stencil buffers from graphics memory to system memory*/
520static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
521 struct adreno_context *drawctxt,
522 struct gmem_shadow_t *shadow)
523{
524 unsigned int *cmds = shadow->gmem_save_commands;
525 unsigned int *start = cmds;
526 /* Calculate the new offset based on the adjusted base */
527 unsigned int bytesperpixel = format2bytesperpixel[shadow->format];
528 unsigned int addr = shadow->gmemshadow.gpuaddr;
529 unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
530
531 /* Store TP0_CHICKEN register */
532 *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
533 *cmds++ = REG_TP0_CHICKEN;
534
535 *cmds++ = tmp_ctx.chicken_restore;
536
537 *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
538 *cmds++ = 0;
539
540 /* Set TP0_CHICKEN to zero */
541 *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1);
542 *cmds++ = 0x00000000;
543
544 /* Set PA_SC_AA_CONFIG to 0 */
545 *cmds++ = pm4_type0_packet(REG_PA_SC_AA_CONFIG, 1);
546 *cmds++ = 0x00000000;
547
548 /* program shader */
549
550 /* load shader vtx constants ... 5 dwords */
551 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
552 *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
553 *cmds++ = 0;
554 /* valid(?) vtx constant flag & addr */
555 *cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
556 /* limit = 12 dwords */
557 *cmds++ = 0x00000030;
558
559 /* Invalidate L2 cache to make sure vertices are updated */
560 *cmds++ = pm4_type0_packet(REG_TC_CNTL_STATUS, 1);
561 *cmds++ = 0x1;
562
563 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600564 *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
Jordan Crousea78c9172011-07-11 13:14:09 -0600565 *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */
566 *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */
567 *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */
568
569 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600570 *cmds++ = CP_REG(REG_PA_SC_AA_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600571 *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */
572
573 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600574 *cmds++ = CP_REG(REG_RB_COLORCONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600575 *cmds++ = 0x00000c20;
576
Tarun Karra16346b02011-07-24 15:04:26 -0700577 /* Repartition shaders */
578 *cmds++ = pm4_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
579 *cmds++ = 0x180;
580
581 /* Invalidate Vertex & Pixel instruction code address and sizes */
582 *cmds++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1);
583 *cmds++ = 0x00003F00;
584
585 *cmds++ = pm4_type3_packet(PM4_SET_SHADER_BASES, 1);
586 *cmds++ = (0x80000000) | 0x180;
587
Jordan Crousea78c9172011-07-11 13:14:09 -0600588 /* load the patched vertex shader stream */
589 cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);
590
591 /* Load the patched fragment shader stream */
592 cmds =
593 program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);
594
595 /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
596 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600597 *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600598 if (adreno_is_a22x(adreno_dev))
Jordan Crousea78c9172011-07-11 13:14:09 -0600599 *cmds++ = 0x10018001;
600 else
601 *cmds++ = 0x10010001;
602 *cmds++ = 0x00000008;
603
604 /* resolve */
605
606 /* PA_CL_VTE_CNTL */
607 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600608 *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600609 /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
610 *cmds++ = 0x00000b00;
611
612 /* program surface info */
613 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600614 *cmds++ = CP_REG(REG_RB_SURFACE_INFO);
Jordan Crousea78c9172011-07-11 13:14:09 -0600615 *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */
616
617 /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
618 * Base=gmem_base
619 */
620 /* gmem base assumed 4K aligned. */
621 BUG_ON(tmp_ctx.gmem_base & 0xFFF);
622 *cmds++ =
623 (shadow->
624 format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
625
626 /* disable Z */
627 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600628 *cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600629 if (adreno_is_a22x(adreno_dev))
Jordan Crousea78c9172011-07-11 13:14:09 -0600630 *cmds++ = 0x08;
631 else
632 *cmds++ = 0;
633
634 /* set REG_PA_SU_SC_MODE_CNTL
635 * Front_ptype = draw triangles
636 * Back_ptype = draw triangles
637 * Provoking vertex = last
638 */
639 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600640 *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600641 *cmds++ = 0x00080240;
642
643 /* Use maximum scissor values -- quad vertices already have the
644 * correct bounds */
645 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600646 *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600647 *cmds++ = (0 << 16) | 0;
648 *cmds++ = (0x1fff << 16) | (0x1fff);
649 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600650 *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600651 *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
652 *cmds++ = (0x1fff << 16) | (0x1fff);
653
654 /* load the viewport so that z scale = clear depth and
655 * z offset = 0.0f
656 */
657 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600658 *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
Jordan Crousea78c9172011-07-11 13:14:09 -0600659 *cmds++ = 0xbf800000; /* -1.0f */
660 *cmds++ = 0x0;
661
662 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600663 *cmds++ = CP_REG(REG_RB_COLOR_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600664 *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */
665
666 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600667 *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600668 *cmds++ = 0xffffffff;
669
670 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600671 *cmds++ = CP_REG(REG_SQ_WRAPPING_0);
Jordan Crousea78c9172011-07-11 13:14:09 -0600672 *cmds++ = 0x00000000;
673 *cmds++ = 0x00000000;
674
675 /* load the stencil ref value
676 * $AAM - do this later
677 */
678
679 /* load the COPY state */
680 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 6);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600681 *cmds++ = CP_REG(REG_RB_COPY_CONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600682 *cmds++ = 0; /* RB_COPY_CONTROL */
683 *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */
684 *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */
685
686 /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither,
687 * MaskWrite:R=G=B=A=1
688 */
689 *cmds++ = 0x0003c008 |
690 (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT);
691 /* Make sure we stay in offsetx field. */
692 BUG_ON(offset & 0xfffff000);
693 *cmds++ = offset;
694
695 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600696 *cmds++ = CP_REG(REG_RB_MODECONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600697 *cmds++ = 0x6; /* EDRAM copy */
698
699 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600700 *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600701 *cmds++ = 0x00010000;
702
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600703 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600704 *cmds++ = pm4_type3_packet(PM4_SET_DRAW_INIT_FLAGS, 1);
705 *cmds++ = 0;
706
707 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600708 *cmds++ = CP_REG(REG_LEIA_RB_LRZ_VSC_CONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600709 *cmds++ = 0x0000000;
710
711 *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 3);
712 *cmds++ = 0; /* viz query info. */
713 /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
714 *cmds++ = 0x00004088;
715 *cmds++ = 3; /* NumIndices=3 */
716 } else {
717 /* queue the draw packet */
718 *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2);
719 *cmds++ = 0; /* viz query info. */
720 /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
721 *cmds++ = 0x00030088;
722 }
723
724 /* create indirect buffer command for above command sequence */
725 create_ib1(drawctxt, shadow->gmem_save, start, cmds);
726
727 return cmds;
728}
729
730/* context restore */
731
732/*copy colour, depth, & stencil buffers from system memory to graphics memory*/
733static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
734 struct adreno_context *drawctxt,
735 struct gmem_shadow_t *shadow)
736{
737 unsigned int *cmds = shadow->gmem_restore_commands;
738 unsigned int *start = cmds;
739
740 /* Store TP0_CHICKEN register */
741 *cmds++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
742 *cmds++ = REG_TP0_CHICKEN;
743 *cmds++ = tmp_ctx.chicken_restore;
744
745 *cmds++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
746 *cmds++ = 0;
747
748 /* Set TP0_CHICKEN to zero */
749 *cmds++ = pm4_type0_packet(REG_TP0_CHICKEN, 1);
750 *cmds++ = 0x00000000;
751
752 /* Set PA_SC_AA_CONFIG to 0 */
753 *cmds++ = pm4_type0_packet(REG_PA_SC_AA_CONFIG, 1);
754 *cmds++ = 0x00000000;
755 /* shader constants */
756
757 /* vertex buffer constants */
758 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 7);
759
760 *cmds++ = (0x1 << 16) | (9 * 6);
761 /* valid(?) vtx constant flag & addr */
762 *cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
763 /* limit = 12 dwords */
764 *cmds++ = 0x00000030;
765 /* valid(?) vtx constant flag & addr */
766 *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3;
767 /* limit = 8 dwords */
768 *cmds++ = 0x00000020;
769 *cmds++ = 0;
770 *cmds++ = 0;
771
772 /* Invalidate L2 cache to make sure vertices are updated */
773 *cmds++ = pm4_type0_packet(REG_TC_CNTL_STATUS, 1);
774 *cmds++ = 0x1;
775
776 cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);
777
Tarun Karra16346b02011-07-24 15:04:26 -0700778 /* Repartition shaders */
779 *cmds++ = pm4_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
780 *cmds++ = 0x180;
781
782 /* Invalidate Vertex & Pixel instruction code address and sizes */
783 *cmds++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1);
784 *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
785
786 *cmds++ = pm4_type3_packet(PM4_SET_SHADER_BASES, 1);
787 *cmds++ = (0x80000000) | 0x180;
788
Jordan Crousea78c9172011-07-11 13:14:09 -0600789 /* Load the patched fragment shader stream */
790 cmds =
791 program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);
792
793 /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
794 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600795 *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600796 *cmds++ = 0x10030002;
797 *cmds++ = 0x00000008;
798
799 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600800 *cmds++ = CP_REG(REG_PA_SC_AA_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600801 *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */
802
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600803 if (!adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600804 /* PA_SC_VIZ_QUERY */
805 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600806 *cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY);
Jordan Crousea78c9172011-07-11 13:14:09 -0600807 *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */
808 }
809
810 /* RB_COLORCONTROL */
811 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600812 *cmds++ = CP_REG(REG_RB_COLORCONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600813 *cmds++ = 0x00000c20;
814
815 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 4);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600816 *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
Jordan Crousea78c9172011-07-11 13:14:09 -0600817 *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */
818 *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */
819 *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */
820
821 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600822 *cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600823 *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */
824 *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */
825
826 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600827 *cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600828 *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */
829
830 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600831 *cmds++ = CP_REG(REG_PA_SC_AA_CONFIG);
Jordan Crousea78c9172011-07-11 13:14:09 -0600832 *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */
833
834 /* set REG_PA_SU_SC_MODE_CNTL
835 * Front_ptype = draw triangles
836 * Back_ptype = draw triangles
837 * Provoking vertex = last
838 */
839 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600840 *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600841 *cmds++ = 0x00080240;
842
843 /* texture constants */
844 *cmds++ =
845 pm4_type3_packet(PM4_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
846 *cmds++ = (0x1 << 16) | (0 * 6);
847 memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2);
848 cmds[0] |= (shadow->pitch >> 5) << 22;
849 cmds[1] |=
850 shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format];
851 cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13;
852 cmds += SYS2GMEM_TEX_CONST_LEN;
853
854 /* program surface info */
855 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600856 *cmds++ = CP_REG(REG_RB_SURFACE_INFO);
Jordan Crousea78c9172011-07-11 13:14:09 -0600857 *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */
858
859 /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
860 * Base=gmem_base
861 */
862 *cmds++ =
863 (shadow->
864 format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
865
866 /* RB_DEPTHCONTROL */
867 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600868 *cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600869
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600870 if (adreno_is_a22x(adreno_dev))
Jordan Crousea78c9172011-07-11 13:14:09 -0600871 *cmds++ = 8; /* disable Z */
872 else
873 *cmds++ = 0; /* disable Z */
874
875 /* Use maximum scissor values -- quad vertices already
876 * have the correct bounds */
877 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600878 *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600879 *cmds++ = (0 << 16) | 0;
880 *cmds++ = ((0x1fff) << 16) | 0x1fff;
881 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600882 *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600883 *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
884 *cmds++ = ((0x1fff) << 16) | 0x1fff;
885
886 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600887 *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600888 /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
889 *cmds++ = 0x00000b00;
890
891 /*load the viewport so that z scale = clear depth and z offset = 0.0f */
892 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600893 *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
Jordan Crousea78c9172011-07-11 13:14:09 -0600894 *cmds++ = 0xbf800000;
895 *cmds++ = 0x0;
896
897 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600898 *cmds++ = CP_REG(REG_RB_COLOR_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600899 *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */
900
901 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600902 *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -0600903 *cmds++ = 0xffffffff;
904
905 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 3);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600906 *cmds++ = CP_REG(REG_SQ_WRAPPING_0);
Jordan Crousea78c9172011-07-11 13:14:09 -0600907 *cmds++ = 0x00000000;
908 *cmds++ = 0x00000000;
909
910 /* load the stencil ref value
911 * $AAM - do this later
912 */
913 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600914 *cmds++ = CP_REG(REG_RB_MODECONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600915 /* draw pixels with color and depth/stencil component */
916 *cmds++ = 0x4;
917
918 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600919 *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600920 *cmds++ = 0x00010000;
921
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600922 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600923 *cmds++ = pm4_type3_packet(PM4_SET_DRAW_INIT_FLAGS, 1);
924 *cmds++ = 0;
925
926 *cmds++ = pm4_type3_packet(PM4_SET_CONSTANT, 2);
Jordan Crouse0e0486f2011-07-28 08:37:58 -0600927 *cmds++ = CP_REG(REG_LEIA_RB_LRZ_VSC_CONTROL);
Jordan Crousea78c9172011-07-11 13:14:09 -0600928 *cmds++ = 0x0000000;
929
930 *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 3);
931 *cmds++ = 0; /* viz query info. */
932 /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
933 *cmds++ = 0x00004088;
934 *cmds++ = 3; /* NumIndices=3 */
935 } else {
936 /* queue the draw packet */
937 *cmds++ = pm4_type3_packet(PM4_DRAW_INDX, 2);
938 *cmds++ = 0; /* viz query info. */
939 /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
940 *cmds++ = 0x00030088;
941 }
942
943 /* create indirect buffer command for above command sequence */
944 create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
945
946 return cmds;
947}
948
Jordan Crousea78c9172011-07-11 13:14:09 -0600949static void build_regrestore_cmds(struct adreno_device *adreno_dev,
950 struct adreno_context *drawctxt)
951{
952 unsigned int *start = tmp_ctx.cmd;
953 unsigned int *cmd = start;
954
955 unsigned int i = 0;
956 unsigned int reg_array_size = 0;
957 const unsigned int *ptr_register_ranges;
958
959 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
960 *cmd++ = 0;
961
962 /* H/W Registers */
963 /* deferred pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, ???); */
964 cmd++;
965#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
966 /* Force mismatch */
967 *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
968#else
969 *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
970#endif
971
972 /* Based on chip id choose the registers ranges*/
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -0600973 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -0600974 ptr_register_ranges = register_ranges_a22x;
975 reg_array_size = ARRAY_SIZE(register_ranges_a22x);
976 } else {
977 ptr_register_ranges = register_ranges_a20x;
978 reg_array_size = ARRAY_SIZE(register_ranges_a20x);
979 }
980
981
982 for (i = 0; i < (reg_array_size/2); i++) {
983 cmd = reg_range(cmd, ptr_register_ranges[i*2],
984 ptr_register_ranges[i*2+1]);
985 }
986
987 /* Now we know how many register blocks we have, we can compute command
988 * length
989 */
990 start[2] =
991 pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3);
992 /* Enable shadowing for the entire register block. */
993#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
994 start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
995#else
996 start[4] |= (1 << 24) | (4 << 16);
997#endif
998
999 /* Need to handle some of the registers separately */
1000 *cmd++ = pm4_type0_packet(REG_SQ_GPR_MANAGEMENT, 1);
1001 tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate);
1002 *cmd++ = 0x00040400;
1003
1004 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
1005 *cmd++ = 0;
1006 *cmd++ = pm4_type0_packet(REG_TP0_CHICKEN, 1);
1007 tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate);
1008 *cmd++ = 0x00000000;
1009
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -06001010 if (adreno_is_a22x(adreno_dev)) {
Jordan Crousea78c9172011-07-11 13:14:09 -06001011 unsigned int i;
1012 unsigned int j = 2;
1013 for (i = REG_LEIA_VSC_BIN_SIZE; i <=
1014 REG_LEIA_VSC_PIPE_DATA_LENGTH_7; i++) {
1015 *cmd++ = pm4_type0_packet(i, 1);
1016 tmp_ctx.reg_values[j] = virt2gpu(cmd,
1017 &drawctxt->gpustate);
1018 *cmd++ = 0x00000000;
1019 j++;
1020 }
1021 }
1022
1023 /* ALU Constants */
1024 *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
1025 *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
1026#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1027 *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */
1028#else
1029 *cmd++ = (1 << 24) | (0 << 16) | 0;
1030#endif
1031 *cmd++ = ALU_CONSTANTS;
1032
1033 /* Texture Constants */
1034 *cmd++ = pm4_type3_packet(PM4_LOAD_CONSTANT_CONTEXT, 3);
1035 *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
1036#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
1037 /* Disable shadowing */
1038 *cmd++ = (0 << 24) | (1 << 16) | 0;
1039#else
1040 *cmd++ = (1 << 24) | (1 << 16) | 0;
1041#endif
1042 *cmd++ = TEX_CONSTANTS;
1043
1044 /* Boolean Constants */
1045 *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + BOOL_CONSTANTS);
1046 *cmd++ = (2 << 16) | 0;
1047
1048 /* the next BOOL_CONSTANT dwords is the shadow area for
1049 * boolean constants.
1050 */
1051 tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate);
1052 cmd += BOOL_CONSTANTS;
1053
1054 /* Loop Constants */
1055 *cmd++ = pm4_type3_packet(PM4_SET_CONSTANT, 1 + LOOP_CONSTANTS);
1056 *cmd++ = (3 << 16) | 0;
1057
1058 /* the next LOOP_CONSTANTS dwords is the shadow area for
1059 * loop constants.
1060 */
1061 tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate);
1062 cmd += LOOP_CONSTANTS;
1063
1064 /* create indirect buffer command for above command sequence */
1065 create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
1066
1067 tmp_ctx.cmd = cmd;
1068}
1069
Jordan Crousea78c9172011-07-11 13:14:09 -06001070static void
1071build_shader_save_restore_cmds(struct adreno_context *drawctxt)
1072{
1073 unsigned int *cmd = tmp_ctx.cmd;
1074 unsigned int *save, *restore, *fixup;
1075#if defined(PM4_IM_STORE)
1076 unsigned int *startSizeVtx, *startSizePix, *startSizeShared;
1077#endif
1078 unsigned int *partition1;
1079 unsigned int *shaderBases, *partition2;
1080
1081#if defined(PM4_IM_STORE)
1082 /* compute vertex, pixel and shared instruction shadow GPU addresses */
1083 tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
1084 tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + SHADER_SHADOW_SIZE;
1085 tmp_ctx.shader_shared = tmp_ctx.shader_pixel + SHADER_SHADOW_SIZE;
1086#endif
1087
1088 /* restore shader partitioning and instructions */
1089
1090 restore = cmd; /* start address */
1091
1092 /* Invalidate Vertex & Pixel instruction code address and sizes */
1093 *cmd++ = pm4_type3_packet(PM4_INVALIDATE_STATE, 1);
1094 *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
1095
1096 /* Restore previous shader vertex & pixel instruction bases. */
1097 *cmd++ = pm4_type3_packet(PM4_SET_SHADER_BASES, 1);
1098 shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */
1099
1100 /* write the shader partition information to a scratch register */
1101 *cmd++ = pm4_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
1102 partition1 = cmd++; /* TBD #4a: partition info (from save) */
1103
1104#if defined(PM4_IM_STORE)
1105 /* load vertex shader instructions from the shadow. */
1106 *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
1107 *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */
1108 startSizeVtx = cmd++; /* TBD #1: start/size (from save) */
1109
1110 /* load pixel shader instructions from the shadow. */
1111 *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
1112 *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */
1113 startSizePix = cmd++; /* TBD #2: start/size (from save) */
1114
1115 /* load shared shader instructions from the shadow. */
1116 *cmd++ = pm4_type3_packet(PM4_IM_LOAD, 2);
1117 *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */
1118 startSizeShared = cmd++; /* TBD #3: start/size (from save) */
1119#endif
1120
1121 /* create indirect buffer command for above command sequence */
1122 create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);
1123
1124 /*
1125 * fixup SET_SHADER_BASES data
1126 *
1127 * since self-modifying PM4 code is being used here, a seperate
1128 * command buffer is used for this fixup operation, to ensure the
1129 * commands are not read by the PM4 engine before the data fields
1130 * have been written.
1131 */
1132
1133 fixup = cmd; /* start address */
1134
1135 /* write the shader partition information to a scratch register */
1136 *cmd++ = pm4_type0_packet(REG_SCRATCH_REG2, 1);
1137 partition2 = cmd++; /* TBD #4b: partition info (from save) */
1138
1139 /* mask off unused bits, then OR with shader instruction memory size */
1140 *cmd++ = pm4_type3_packet(PM4_REG_RMW, 3);
1141 *cmd++ = REG_SCRATCH_REG2;
1142 /* AND off invalid bits. */
1143 *cmd++ = 0x0FFF0FFF;
1144 /* OR in instruction memory size */
1145 *cmd++ = (unsigned int)((SHADER_INSTRUCT_LOG2 - 5U) << 29);
1146
1147 /* write the computed value to the SET_SHADER_BASES data field */
1148 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
1149 *cmd++ = REG_SCRATCH_REG2;
1150 /* TBD #5: shader bases (to restore) */
1151 *cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate);
1152
1153 /* create indirect buffer command for above command sequence */
1154 create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);
1155
1156 /* save shader partitioning and instructions */
1157
1158 save = cmd; /* start address */
1159
1160 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
1161 *cmd++ = 0;
1162
1163 /* fetch the SQ_INST_STORE_MANAGMENT register value,
1164 * store the value in the data fields of the SET_CONSTANT commands
1165 * above.
1166 */
1167 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
1168 *cmd++ = REG_SQ_INST_STORE_MANAGMENT;
1169 /* TBD #4a: partition info (to restore) */
1170 *cmd++ = virt2gpu(partition1, &drawctxt->gpustate);
1171 *cmd++ = pm4_type3_packet(PM4_REG_TO_MEM, 2);
1172 *cmd++ = REG_SQ_INST_STORE_MANAGMENT;
1173 /* TBD #4b: partition info (to fixup) */
1174 *cmd++ = virt2gpu(partition2, &drawctxt->gpustate);
1175
1176#if defined(PM4_IM_STORE)
1177
1178 /* store the vertex shader instructions */
1179 *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
1180 *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */
1181 /* TBD #1: start/size (to restore) */
1182 *cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate);
1183
1184 /* store the pixel shader instructions */
1185 *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
1186 *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */
1187 /* TBD #2: start/size (to restore) */
1188 *cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate);
1189
1190 /* store the shared shader instructions if vertex base is nonzero */
1191
1192 *cmd++ = pm4_type3_packet(PM4_IM_STORE, 2);
1193 *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */
1194 /* TBD #3: start/size (to restore) */
1195 *cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate);
1196
1197#endif
1198
1199 *cmd++ = pm4_type3_packet(PM4_WAIT_FOR_IDLE, 1);
1200 *cmd++ = 0;
1201
1202 /* create indirect buffer command for above command sequence */
1203 create_ib1(drawctxt, drawctxt->shader_save, save, cmd);
1204
1205 tmp_ctx.cmd = cmd;
1206}
1207
1208/* create buffers for saving/restoring registers, constants, & GMEM */
1209static int a2xx_ctxt_gpustate_shadow(struct adreno_device *adreno_dev,
1210 struct adreno_context *drawctxt)
1211{
1212 int result;
1213
1214 /* Allocate vmalloc memory to store the gpustate */
1215 result = kgsl_allocate(&drawctxt->gpustate,
1216 drawctxt->pagetable, CONTEXT_SIZE);
1217
1218 if (result)
1219 return result;
1220
1221 drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
1222
1223 /* Blank out h/w register, constant, and command buffer shadows. */
1224 kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, CONTEXT_SIZE);
1225
1226 /* set-up command and vertex buffer pointers */
1227 tmp_ctx.cmd = tmp_ctx.start
1228 = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
1229
1230 /* build indirect command buffers to save & restore regs/constants */
1231 adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT);
1232 build_regrestore_cmds(adreno_dev, drawctxt);
1233 build_regsave_cmds(adreno_dev, drawctxt);
1234
1235 build_shader_save_restore_cmds(drawctxt);
1236
1237 kgsl_cache_range_op(&drawctxt->gpustate,
1238 KGSL_CACHE_OP_FLUSH);
1239
1240 return 0;
1241}
1242
1243/* create buffers for saving/restoring registers, constants, & GMEM */
1244static int a2xx_ctxt_gmem_shadow(struct adreno_device *adreno_dev,
1245 struct adreno_context *drawctxt)
1246{
1247 int result;
1248
Jordan Crouse0e0486f2011-07-28 08:37:58 -06001249 calc_gmemsize(&drawctxt->context_gmem_shadow,
1250 adreno_dev->gmemspace.sizebytes);
Jordan Crousea78c9172011-07-11 13:14:09 -06001251 tmp_ctx.gmem_base = adreno_dev->gmemspace.gpu_base;
1252
1253 result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
1254 drawctxt->pagetable, drawctxt->context_gmem_shadow.size);
1255
1256 if (result)
1257 return result;
1258
1259 /* we've allocated the shadow, when swapped out, GMEM must be saved. */
1260 drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW | CTXT_FLAGS_GMEM_SAVE;
1261
1262 /* blank out gmem shadow. */
1263 kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
1264 drawctxt->context_gmem_shadow.size);
1265
1266 /* build quad vertex buffer */
Jordan Crouse0e0486f2011-07-28 08:37:58 -06001267 build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
1268 &tmp_ctx.cmd);
Jordan Crousea78c9172011-07-11 13:14:09 -06001269
1270 /* build TP0_CHICKEN register restore command buffer */
1271 tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
1272
1273 /* build indirect command buffers to save & restore gmem */
1274 /* Idle because we are reading PM override registers */
1275 adreno_idle(&adreno_dev->dev, KGSL_TIMEOUT_DEFAULT);
1276 drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd;
1277 tmp_ctx.cmd =
1278 build_gmem2sys_cmds(adreno_dev, drawctxt,
1279 &drawctxt->context_gmem_shadow);
1280 drawctxt->context_gmem_shadow.gmem_restore_commands = tmp_ctx.cmd;
1281 tmp_ctx.cmd =
1282 build_sys2gmem_cmds(adreno_dev, drawctxt,
1283 &drawctxt->context_gmem_shadow);
1284
1285 kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
1286 KGSL_CACHE_OP_FLUSH);
1287
1288 return 0;
1289}
1290
1291static void a2xx_ctxt_save(struct adreno_device *adreno_dev,
1292 struct adreno_context *context)
1293{
1294 struct kgsl_device *device = &adreno_dev->dev;
1295
1296 if (context == NULL)
1297 return;
1298
1299 if (context->flags & CTXT_FLAGS_GPU_HANG)
1300 KGSL_CTXT_WARN(device,
1301 "Current active context has caused gpu hang\n");
1302
1303 KGSL_CTXT_INFO(device,
1304 "active context flags %08x\n", context->flags);
1305
1306 /* save registers and constants. */
1307 adreno_ringbuffer_issuecmds(device, 0, context->reg_save, 3);
1308
1309 if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
1310 /* save shader partitioning and instructions. */
1311 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
1312 context->shader_save, 3);
1313
1314 /* fixup shader partitioning parameter for
1315 * SET_SHADER_BASES.
1316 */
1317 adreno_ringbuffer_issuecmds(device, 0,
1318 context->shader_fixup, 3);
1319
1320 context->flags |= CTXT_FLAGS_SHADER_RESTORE;
1321 }
1322
1323 if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
1324 (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
1325 /* save gmem.
1326 * (note: changes shader. shader must already be saved.)
1327 */
1328 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
1329 context->context_gmem_shadow.gmem_save, 3);
1330
1331 /* Restore TP0_CHICKEN */
1332 adreno_ringbuffer_issuecmds(device, 0,
1333 context->chicken_restore, 3);
1334
1335 context->flags |= CTXT_FLAGS_GMEM_RESTORE;
1336 }
1337}
1338
1339static void a2xx_ctxt_restore(struct adreno_device *adreno_dev,
1340 struct adreno_context *context)
1341{
1342 struct kgsl_device *device = &adreno_dev->dev;
1343 unsigned int cmds[5];
1344
1345 if (context == NULL) {
1346 /* No context - set the default apgetable and thats it */
1347 kgsl_mmu_setstate(device, device->mmu.defaultpagetable);
1348 return;
1349 }
1350
1351 KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
1352
1353 cmds[0] = pm4_nop_packet(1);
1354 cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
1355 cmds[2] = pm4_type3_packet(PM4_MEM_WRITE, 2);
1356 cmds[3] = device->memstore.gpuaddr +
1357 KGSL_DEVICE_MEMSTORE_OFFSET(current_context);
1358 cmds[4] = (unsigned int) context;
1359 adreno_ringbuffer_issuecmds(device, 0, cmds, 5);
1360 kgsl_mmu_setstate(device, context->pagetable);
1361
1362#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
1363 kgsl_cffdump_syncmem(NULL, &context->gpustate,
1364 context->gpustate.gpuaddr, LCC_SHADOW_SIZE +
1365 REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false);
1366#endif
1367
1368 /* restore gmem.
1369 * (note: changes shader. shader must not already be restored.)
1370 */
1371 if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
1372 adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_PMODE,
1373 context->context_gmem_shadow.gmem_restore, 3);
1374
1375 /* Restore TP0_CHICKEN */
1376 adreno_ringbuffer_issuecmds(device, 0,
1377 context->chicken_restore, 3);
1378
1379 context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
1380 }
1381
1382 /* restore registers and constants. */
1383 adreno_ringbuffer_issuecmds(device, 0,
1384 context->reg_restore, 3);
1385
1386 /* restore shader instructions & partitioning. */
1387 if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
1388 adreno_ringbuffer_issuecmds(device, 0,
1389 context->shader_restore, 3);
1390 }
1391
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -06001392 if (adreno_is_a20x(adreno_dev)) {
1393 cmds[0] = pm4_type3_packet(PM4_SET_BIN_BASE_OFFSET, 1);
1394 cmds[1] = context->bin_base_offset;
Jordan Crousea78c9172011-07-11 13:14:09 -06001395 adreno_ringbuffer_issuecmds(device, 0, cmds, 2);
Jeremy Gebben5bb7ece2011-08-02 11:04:48 -06001396 }
Jordan Crousea78c9172011-07-11 13:14:09 -06001397}
1398
1399/*
1400 * Interrupt management
1401 *
1402 * a2xx interrupt control is distributed among the various
1403 * hardware components (RB, CP, MMU). The main interrupt
1404 * tells us which component fired the interrupt, but one needs
1405 * to go to the individual component to find out why. The
1406 * following functions provide the broken out support for
1407 * managing the interrupts
1408 */
1409
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001410#define RBBM_INT_MASK RBBM_INT_CNTL__RDERR_INT_MASK
Jordan Crousea78c9172011-07-11 13:14:09 -06001411
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001412#define CP_INT_MASK \
1413 (CP_INT_CNTL__T0_PACKET_IN_IB_MASK | \
Jordan Crousea78c9172011-07-11 13:14:09 -06001414 CP_INT_CNTL__OPCODE_ERROR_MASK | \
1415 CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK | \
1416 CP_INT_CNTL__RESERVED_BIT_ERROR_MASK | \
1417 CP_INT_CNTL__IB_ERROR_MASK | \
Jordan Crousea78c9172011-07-11 13:14:09 -06001418 CP_INT_CNTL__IB1_INT_MASK | \
1419 CP_INT_CNTL__RB_INT_MASK)
1420
1421#define VALID_STATUS_COUNT_MAX 10
1422
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001423static struct {
1424 unsigned int mask;
1425 const char *message;
1426} kgsl_cp_error_irqs[] = {
1427 { CP_INT_CNTL__T0_PACKET_IN_IB_MASK,
1428 "ringbuffer TO packet in IB interrupt" },
1429 { CP_INT_CNTL__OPCODE_ERROR_MASK,
1430 "ringbuffer opcode error interrupt" },
1431 { CP_INT_CNTL__PROTECTED_MODE_ERROR_MASK,
1432 "ringbuffer protected mode error interrupt" },
1433 { CP_INT_CNTL__RESERVED_BIT_ERROR_MASK,
1434 "ringbuffer reserved bit error interrupt" },
1435 { CP_INT_CNTL__IB_ERROR_MASK,
1436 "ringbuffer IB error interrupt" },
1437};
1438
Jordan Crousea78c9172011-07-11 13:14:09 -06001439static void a2xx_cp_intrcallback(struct kgsl_device *device)
1440{
1441 unsigned int status = 0, num_reads = 0, master_status = 0;
1442 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
1443 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001444 int i;
Jordan Crousea78c9172011-07-11 13:14:09 -06001445
1446 adreno_regread(device, REG_MASTER_INT_SIGNAL, &master_status);
1447 while (!status && (num_reads < VALID_STATUS_COUNT_MAX) &&
1448 (master_status & MASTER_INT_SIGNAL__CP_INT_STAT)) {
1449 adreno_regread(device, REG_CP_INT_STATUS, &status);
1450 adreno_regread(device, REG_MASTER_INT_SIGNAL,
1451 &master_status);
1452 num_reads++;
1453 }
1454 if (num_reads > 1)
1455 KGSL_DRV_WARN(device,
1456 "Looped %d times to read REG_CP_INT_STATUS\n",
1457 num_reads);
1458 if (!status) {
1459 if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) {
1460 /* This indicates that we could not read CP_INT_STAT.
1461 * As a precaution just wake up processes so
1462 * they can check their timestamps. Since, we
1463 * did not ack any interrupts this interrupt will
1464 * be generated again */
1465 KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n");
1466 wake_up_interruptible_all(&device->wait_queue);
1467 } else
1468 KGSL_DRV_WARN(device, "Spurious interrput detected\n");
1469 return;
1470 }
1471
1472 if (status & CP_INT_CNTL__RB_INT_MASK) {
1473 /* signal intr completion event */
1474 unsigned int enableflag = 0;
1475 kgsl_sharedmem_writel(&rb->device->memstore,
1476 KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable),
1477 enableflag);
1478 wmb();
1479 KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
1480 }
1481
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001482 for (i = 0; i < ARRAY_SIZE(kgsl_cp_error_irqs); i++) {
1483 if (status & kgsl_cp_error_irqs[i].mask) {
1484 KGSL_CMD_CRIT(rb->device, "%s\n",
1485 kgsl_cp_error_irqs[i].message);
1486 /*
1487 * on fatal errors, turn off the interrupts to
1488 * avoid storming. This has the side effect of
1489 * forcing a PM dump when the timestamp times out
1490 */
Jordan Crousea78c9172011-07-11 13:14:09 -06001491
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001492 kgsl_pwrctrl_irq(rb->device, KGSL_PWRFLAGS_OFF);
1493 }
1494 }
Jordan Crousea78c9172011-07-11 13:14:09 -06001495
1496 /* only ack bits we understand */
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001497 status &= CP_INT_MASK;
Jordan Crousea78c9172011-07-11 13:14:09 -06001498 adreno_regwrite(device, REG_CP_INT_ACK, status);
1499
1500 if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) {
1501 KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n");
1502 wake_up_interruptible_all(&device->wait_queue);
1503 atomic_notifier_call_chain(&(device->ts_notifier_list),
1504 device->id,
1505 NULL);
1506 }
1507}
1508
1509static void a2xx_rbbm_intrcallback(struct kgsl_device *device)
1510{
1511 unsigned int status = 0;
1512 unsigned int rderr = 0;
1513
1514 adreno_regread(device, REG_RBBM_INT_STATUS, &status);
1515
1516 if (status & RBBM_INT_CNTL__RDERR_INT_MASK) {
1517 union rbbm_read_error_u rerr;
1518 adreno_regread(device, REG_RBBM_READ_ERROR, &rderr);
1519 rerr.val = rderr;
1520 if (rerr.f.read_address == REG_CP_INT_STATUS &&
1521 rerr.f.read_error &&
1522 rerr.f.read_requester)
1523 KGSL_DRV_WARN(device,
1524 "rbbm read error interrupt: %08x\n", rderr);
1525 else
1526 KGSL_DRV_CRIT(device,
1527 "rbbm read error interrupt: %08x\n", rderr);
Jordan Crousea78c9172011-07-11 13:14:09 -06001528 }
1529
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001530 status &= RBBM_INT_MASK;
Jordan Crousea78c9172011-07-11 13:14:09 -06001531 adreno_regwrite(device, REG_RBBM_INT_ACK, status);
1532}
1533
1534irqreturn_t a2xx_irq_handler(struct adreno_device *adreno_dev)
1535{
1536 struct kgsl_device *device = &adreno_dev->dev;
1537 irqreturn_t result = IRQ_NONE;
1538 unsigned int status;
1539
1540 adreno_regread(device, REG_MASTER_INT_SIGNAL, &status);
1541
1542 if (status & MASTER_INT_SIGNAL__MH_INT_STAT) {
1543 kgsl_mh_intrcallback(device);
1544 result = IRQ_HANDLED;
1545 }
1546
1547 if (status & MASTER_INT_SIGNAL__CP_INT_STAT) {
1548 a2xx_cp_intrcallback(device);
1549 result = IRQ_HANDLED;
1550 }
1551
1552 if (status & MASTER_INT_SIGNAL__RBBM_INT_STAT) {
1553 a2xx_rbbm_intrcallback(device);
1554 result = IRQ_HANDLED;
1555 }
1556
1557 return result;
1558}
1559
1560static void a2xx_irq_control(struct adreno_device *adreno_dev, int state)
1561{
1562 struct kgsl_device *device = &adreno_dev->dev;
1563
1564 if (state) {
Jordan Crousec8c9fcd2011-07-28 08:37:58 -06001565 adreno_regwrite(device, REG_RBBM_INT_CNTL, RBBM_INT_MASK);
1566 adreno_regwrite(device, REG_CP_INT_CNTL, CP_INT_MASK);
Jordan Crousea78c9172011-07-11 13:14:09 -06001567 adreno_regwrite(device, MH_INTERRUPT_MASK, KGSL_MMU_INT_MASK);
1568 } else {
1569 adreno_regwrite(device, REG_RBBM_INT_CNTL, 0);
1570 adreno_regwrite(device, REG_CP_INT_CNTL, 0);
1571 adreno_regwrite(device, MH_INTERRUPT_MASK, 0);
1572 }
1573}
1574
1575struct adreno_gpudev adreno_a2xx_gpudev = {
1576 .ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow,
1577 .ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow,
1578 .ctxt_save = a2xx_ctxt_save,
1579 .ctxt_restore = a2xx_ctxt_restore,
1580 .irq_handler = a2xx_irq_handler,
1581 .irq_control = a2xx_irq_control,
1582};