blob: 2fe2c4cb1c03dbc0d66b0f1e35bdfea5734c6f61 [file] [log] [blame]
Steve Kondikf7652b32013-11-26 15:20:51 -08001/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13#include <linux/firmware.h>
14#include <linux/slab.h>
15#include <linux/sched.h>
16#include <linux/log2.h>
17#include <linux/time.h>
18#include <linux/delay.h>
19
20#include "kgsl.h"
21#include "kgsl_sharedmem.h"
22#include "kgsl_cffdump.h"
23
24#include "adreno.h"
25#include "adreno_pm4types.h"
26#include "adreno_ringbuffer.h"
27
28#include "a2xx_reg.h"
29#include "a3xx_reg.h"
30
31#define GSL_RB_NOP_SIZEDWORDS 2
32
33/*
34 * CP DEBUG settings for all cores:
35 * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
36 * PROG_END_PTR_ENABLE [25] - Allow 128 bit writes to the VBIF
37 */
38
39#define CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
40
41void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
42{
43 struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
44 BUG_ON(rb->wptr == 0);
45
46 /* Let the pwrscale policy know that new commands have
47 been submitted. */
48 kgsl_pwrscale_busy(rb->device);
49
50 /*synchronize memory before informing the hardware of the
51 *new commands.
52 */
53 mb();
54
55 adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr);
56}
57
58static int
59adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb,
60 struct adreno_context *context,
61 unsigned int numcmds, int wptr_ahead)
62{
63 int nopcount;
64 unsigned int freecmds;
65 unsigned int *cmds;
66 uint cmds_gpu;
67 unsigned long wait_time;
68 unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
69 unsigned long wait_time_part;
70 unsigned int rptr;
71
72 /* if wptr ahead, fill the remaining with NOPs */
73 if (wptr_ahead) {
74 /* -1 for header */
75 nopcount = rb->sizedwords - rb->wptr - 1;
76
77 cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
78 cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr;
79
80 GSL_RB_WRITE(rb->device, cmds, cmds_gpu,
81 cp_nop_packet(nopcount));
82
83 /* Make sure that rptr is not 0 before submitting
84 * commands at the end of ringbuffer. We do not
85 * want the rptr and wptr to become equal when
86 * the ringbuffer is not empty */
87 do {
88 rptr = adreno_get_rptr(rb);
89 } while (!rptr);
90
91 rb->wptr = 0;
92 }
93
94 wait_time = jiffies + wait_timeout;
95 wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
96 /* wait for space in ringbuffer */
97 while (1) {
98 rptr = adreno_get_rptr(rb);
99
100 freecmds = rptr - rb->wptr;
101
102 if (freecmds == 0 || freecmds > numcmds)
103 break;
104
105 if (time_after(jiffies, wait_time)) {
106 KGSL_DRV_ERR(rb->device,
107 "Timed out while waiting for freespace in ringbuffer "
108 "rptr: 0x%x, wptr: 0x%x\n", rptr, rb->wptr);
109 return -ETIMEDOUT;
110 }
111
112 }
113 return 0;
114}
115
116unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
117 struct adreno_context *context,
118 unsigned int numcmds)
119{
120 unsigned int *ptr = NULL;
121 int ret = 0;
122 unsigned int rptr;
123 BUG_ON(numcmds >= rb->sizedwords);
124
125 rptr = adreno_get_rptr(rb);
126 /* check for available space */
127 if (rb->wptr >= rptr) {
128 /* wptr ahead or equal to rptr */
129 /* reserve dwords for nop packet */
130 if ((rb->wptr + numcmds) > (rb->sizedwords -
131 GSL_RB_NOP_SIZEDWORDS))
132 ret = adreno_ringbuffer_waitspace(rb, context,
133 numcmds, 1);
134 } else {
135 /* wptr behind rptr */
136 if ((rb->wptr + numcmds) >= rptr)
137 ret = adreno_ringbuffer_waitspace(rb, context,
138 numcmds, 0);
139 /* check for remaining space */
140 /* reserve dwords for nop packet */
141 if (!ret && (rb->wptr + numcmds) > (rb->sizedwords -
142 GSL_RB_NOP_SIZEDWORDS))
143 ret = adreno_ringbuffer_waitspace(rb, context,
144 numcmds, 1);
145 }
146
147 if (!ret) {
148 ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
149 rb->wptr += numcmds;
150 } else
151 ptr = ERR_PTR(ret);
152
153 return ptr;
154}
155
156static int _load_firmware(struct kgsl_device *device, const char *fwfile,
157 void **data, int *len)
158{
159 const struct firmware *fw = NULL;
160 int ret;
161
162 ret = request_firmware(&fw, fwfile, device->dev);
163
164 if (ret) {
165 KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n",
166 fwfile, ret);
167 return ret;
168 }
169
170 *data = kmalloc(fw->size, GFP_KERNEL);
171
172 if (*data) {
173 memcpy(*data, fw->data, fw->size);
174 *len = fw->size;
175 } else
176 KGSL_MEM_ERR(device, "kmalloc(%d) failed\n", fw->size);
177
178 release_firmware(fw);
179 return (*data != NULL) ? 0 : -ENOMEM;
180}
181
182int adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device)
183{
184 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
185 int ret = 0;
186
187 if (adreno_dev->pm4_fw == NULL) {
188 int len;
189 void *ptr;
190
191 ret = _load_firmware(device, adreno_dev->pm4_fwfile,
192 &ptr, &len);
193
194 if (ret)
195 goto err;
196
197 /* PM4 size is 3 dword aligned plus 1 dword of version */
198 if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) {
199 KGSL_DRV_ERR(device, "Bad firmware size: %d\n", len);
200 ret = -EINVAL;
201 kfree(ptr);
202 goto err;
203 }
204
205 adreno_dev->pm4_fw_size = len / sizeof(uint32_t);
206 adreno_dev->pm4_fw = ptr;
207 adreno_dev->pm4_fw_version = adreno_dev->pm4_fw[1];
208 }
209
210err:
211 return ret;
212}
213
214/**
215 * adreno_ringbuffer_load_pm4_ucode() - Load pm4 ucode
216 * @device: Pointer to a KGSL device
217 * @start: Starting index in pm4 ucode to load
218 * @addr: Address to load the pm4 ucode
219 *
220 * Load the pm4 ucode from @start at @addr.
221 */
222int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
223 unsigned int start, unsigned int addr)
224{
225 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
226 int i;
227
228 if (adreno_dev->pm4_fw == NULL) {
229 int ret = adreno_ringbuffer_read_pm4_ucode(device);
230 if (ret)
231 return ret;
232 }
233
234 KGSL_DRV_INFO(device, "loading pm4 ucode version: %d\n",
235 adreno_dev->pm4_fw_version);
236
237 adreno_writereg(adreno_dev, ADRENO_REG_CP_DEBUG, CP_DEBUG_DEFAULT);
238 adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr);
239 for (i = 1; i < adreno_dev->pm4_fw_size; i++)
240 adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA,
241 adreno_dev->pm4_fw[i]);
242
243 return 0;
244}
245
246int adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device)
247{
248 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
249 int ret = 0;
250
251 if (adreno_dev->pfp_fw == NULL) {
252 int len;
253 void *ptr;
254
255 ret = _load_firmware(device, adreno_dev->pfp_fwfile,
256 &ptr, &len);
257 if (ret)
258 goto err;
259
260 /* PFP size shold be dword aligned */
261 if (len % sizeof(uint32_t) != 0) {
262 KGSL_DRV_ERR(device, "Bad firmware size: %d\n", len);
263 ret = -EINVAL;
264 kfree(ptr);
265 goto err;
266 }
267
268 adreno_dev->pfp_fw_size = len / sizeof(uint32_t);
269 adreno_dev->pfp_fw = ptr;
270 adreno_dev->pfp_fw_version = adreno_dev->pfp_fw[5];
271 }
272
273err:
274 return ret;
275}
276
277/**
278 * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode
279 * @device: Pointer to a KGSL device
280 * @start: Starting index in pfp ucode to load
281 * @addr: Address to load the pfp ucode
282 *
283 * Load the pfp ucode from @start at @addr.
284 */
285int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
286 unsigned int start, unsigned int addr)
287{
288 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
289 int i;
290
291 if (adreno_dev->pfp_fw == NULL) {
292 int ret = adreno_ringbuffer_read_pfp_ucode(device);
293 if (ret)
294 return ret;
295 }
296
297 KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
298 adreno_dev->pfp_fw_version);
299
300 adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr);
301 for (i = 1; i < adreno_dev->pfp_fw_size; i++)
302 adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
303 adreno_dev->pfp_fw[i]);
304
305 return 0;
306}
307
308/**
309 * _ringbuffer_start_common() - Ringbuffer start
310 * @rb: Pointer to adreno ringbuffer
311 *
312 * Setup ringbuffer for GPU.
313 */
314int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
315{
316 int status;
317 union reg_cp_rb_cntl cp_rb_cntl;
318 unsigned int rb_cntl;
319 struct kgsl_device *device = rb->device;
320 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
321
322 if (rb->flags & KGSL_FLAGS_STARTED)
323 return 0;
324
325 kgsl_sharedmem_set(rb->device, &rb->memptrs_desc, 0, 0,
326 sizeof(struct kgsl_rbmemptrs));
327
328 kgsl_sharedmem_set(rb->device, &rb->buffer_desc, 0, 0xAA,
329 (rb->sizedwords << 2));
330
331 if (adreno_is_a2xx(adreno_dev)) {
332 kgsl_regwrite(device, REG_CP_RB_WPTR_BASE,
333 (rb->memptrs_desc.gpuaddr
334 + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET));
335
336 /* setup WPTR delay */
337 kgsl_regwrite(device, REG_CP_RB_WPTR_DELAY,
338 0 /*0x70000010 */);
339 }
340
341 /*setup REG_CP_RB_CNTL */
342 adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_CNTL, &rb_cntl);
343 cp_rb_cntl.val = rb_cntl;
344
345 /*
346 * The size of the ringbuffer in the hardware is the log2
347 * representation of the size in quadwords (sizedwords / 2)
348 */
349 cp_rb_cntl.f.rb_bufsz = ilog2(rb->sizedwords >> 1);
350
351 /*
352 * Specify the quadwords to read before updating mem RPTR.
353 * Like above, pass the log2 representation of the blocksize
354 * in quadwords.
355 */
356 cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3);
357
358 if (adreno_is_a2xx(adreno_dev)) {
359 /* WPTR polling */
360 cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN;
361 }
362
363 /* mem RPTR writebacks */
364 cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE;
365
366 adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL, cp_rb_cntl.val);
367
368 adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
369 rb->buffer_desc.gpuaddr);
370
371 adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR,
372 rb->memptrs_desc.gpuaddr +
373 GSL_RB_MEMPTRS_RPTR_OFFSET);
374
375 if (adreno_is_a2xx(adreno_dev)) {
376 /* explicitly clear all cp interrupts */
377 kgsl_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF);
378 }
379
380 /* setup scratch/timestamp */
381 adreno_writereg(adreno_dev, ADRENO_REG_SCRATCH_ADDR,
382 device->memstore.gpuaddr +
383 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
384 soptimestamp));
385
386 adreno_writereg(adreno_dev, ADRENO_REG_SCRATCH_UMSK,
387 GSL_RB_MEMPTRS_SCRATCH_MASK);
388
389 /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
390 if (adreno_is_a305(adreno_dev) || adreno_is_a305c(adreno_dev) ||
391 adreno_is_a320(adreno_dev))
392 kgsl_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x000E0602);
393 else if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
394 kgsl_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x003E2008);
395
396 rb->wptr = 0;
397
398 /* clear ME_HALT to start micro engine */
399 adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
400
401 /* ME init is GPU specific, so jump into the sub-function */
402 status = adreno_dev->gpudev->rb_init(adreno_dev, rb);
403 if (status)
404 return status;
405
406 /* idle device to validate ME INIT */
407 status = adreno_idle(device);
408
409 if (status == 0)
410 rb->flags |= KGSL_FLAGS_STARTED;
411
412 return status;
413}
414
415/**
416 * adreno_ringbuffer_warm_start() - Ringbuffer warm start
417 * @rb: Pointer to adreno ringbuffer
418 *
419 * Start the ringbuffer but load only jump tables part of the
420 * microcode.
421 */
422int adreno_ringbuffer_warm_start(struct adreno_ringbuffer *rb)
423{
424 int status;
425 struct kgsl_device *device = rb->device;
426 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
427
428 /* load the CP ucode */
429 status = adreno_ringbuffer_load_pm4_ucode(device,
430 adreno_dev->pm4_jt_idx, adreno_dev->pm4_jt_addr);
431 if (status != 0)
432 return status;
433
434 /* load the prefetch parser ucode */
435 status = adreno_ringbuffer_load_pfp_ucode(device,
436 adreno_dev->pfp_jt_idx, adreno_dev->pfp_jt_addr);
437 if (status != 0)
438 return status;
439
440 return _ringbuffer_start_common(rb);
441}
442
443int adreno_ringbuffer_start(struct adreno_ringbuffer *rb)
444{
445 int status;
446
447 if (rb->flags & KGSL_FLAGS_STARTED)
448 return 0;
449
450 /* load the CP ucode */
451 status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1, 0);
452 if (status != 0)
453 return status;
454
455 /* load the prefetch parser ucode */
456 status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1, 0);
457 if (status != 0)
458 return status;
459
460 return _ringbuffer_start_common(rb);
461}
462
463void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb)
464{
465 struct kgsl_device *device = rb->device;
466 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
467
468 if (rb->flags & KGSL_FLAGS_STARTED) {
469 if (adreno_is_a200(adreno_dev))
470 kgsl_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000);
471
472 rb->flags &= ~KGSL_FLAGS_STARTED;
473 }
474}
475
476int adreno_ringbuffer_init(struct kgsl_device *device)
477{
478 int status;
479 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
480 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
481
482 rb->device = device;
483 /*
484 * It is silly to convert this to words and then back to bytes
485 * immediately below, but most of the rest of the code deals
486 * in words, so we might as well only do the math once
487 */
488 rb->sizedwords = KGSL_RB_SIZE >> 2;
489
490 rb->buffer_desc.flags = KGSL_MEMFLAGS_GPUREADONLY;
491 /* allocate memory for ringbuffer */
492 status = kgsl_allocate_contiguous(&rb->buffer_desc,
493 (rb->sizedwords << 2));
494
495 if (status != 0) {
496 adreno_ringbuffer_close(rb);
497 return status;
498 }
499
500 /* allocate memory for polling and timestamps */
501 /* This really can be at 4 byte alignment boundry but for using MMU
502 * we need to make it at page boundary */
503 status = kgsl_allocate_contiguous(&rb->memptrs_desc,
504 sizeof(struct kgsl_rbmemptrs));
505
506 if (status != 0) {
507 adreno_ringbuffer_close(rb);
508 return status;
509 }
510
511 /* overlay structure on memptrs memory */
512 rb->memptrs = (struct kgsl_rbmemptrs *) rb->memptrs_desc.hostptr;
513
514 rb->global_ts = 0;
515
516 return 0;
517}
518
519void adreno_ringbuffer_close(struct adreno_ringbuffer *rb)
520{
521 struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
522
523 kgsl_sharedmem_free(&rb->buffer_desc);
524 kgsl_sharedmem_free(&rb->memptrs_desc);
525
526 kfree(adreno_dev->pfp_fw);
527 kfree(adreno_dev->pm4_fw);
528
529 adreno_dev->pfp_fw = NULL;
530 adreno_dev->pm4_fw = NULL;
531
532 memset(rb, 0, sizeof(struct adreno_ringbuffer));
533}
534
535static int
536adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
537 struct adreno_context *drawctxt,
538 unsigned int flags, unsigned int *cmds,
539 int sizedwords, uint32_t timestamp)
540{
541 struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
542 unsigned int *ringcmds;
543 unsigned int total_sizedwords = sizedwords;
544 unsigned int i;
545 unsigned int rcmd_gpu;
546 unsigned int context_id;
547 unsigned int gpuaddr = rb->device->memstore.gpuaddr;
548
549 if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base))
550 return -EINVAL;
551
552 rb->global_ts++;
553
554 /* If this is a internal IB, use the global timestamp for it */
555 if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
556 timestamp = rb->global_ts;
557 context_id = KGSL_MEMSTORE_GLOBAL;
558 } else {
559 context_id = drawctxt->base.id;
560 }
561
562 /*
563 * Note that we cannot safely take drawctxt->mutex here without
564 * potential mutex inversion with device->mutex which is held
565 * here. As a result, any other code that accesses this variable
566 * must also use device->mutex.
567 */
568 if (drawctxt)
569 drawctxt->internal_timestamp = rb->global_ts;
570
571 /* reserve space to temporarily turn off protected mode
572 * error checking if needed
573 */
574 total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
575 /* 2 dwords to store the start of command sequence */
576 total_sizedwords += 2;
577 /* internal ib command identifier for the ringbuffer */
578 total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
579
580 /* Add two dwords for the CP_INTERRUPT */
581 total_sizedwords += drawctxt ? 2 : 0;
582
583 /* context rollover */
584 if (adreno_is_a3xx(adreno_dev))
585 total_sizedwords += 3;
586
587 /* For HLSQ updates below */
588 if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev))
589 total_sizedwords += 4;
590
591 if (adreno_is_a2xx(adreno_dev))
592 total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
593
594 total_sizedwords += 3; /* sop timestamp */
595 total_sizedwords += 4; /* eop timestamp */
596
597 if (drawctxt) {
598 total_sizedwords += 3; /* global timestamp without cache
599 * flush for non-zero context */
600 }
601
602 if (adreno_is_a20x(adreno_dev))
603 total_sizedwords += 2; /* CACHE_FLUSH */
604
605 if (flags & KGSL_CMD_FLAGS_WFI)
606 total_sizedwords += 2; /* WFI */
607
608 /* Add space for the power on shader fixup if we need it */
609 if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
610 total_sizedwords += 5;
611
612 ringcmds = adreno_ringbuffer_allocspace(rb, drawctxt, total_sizedwords);
613
614 if (IS_ERR(ringcmds))
615 return PTR_ERR(ringcmds);
616 if (ringcmds == NULL)
617 return -ENOSPC;
618
619 rcmd_gpu = rb->buffer_desc.gpuaddr
620 + sizeof(uint)*(rb->wptr-total_sizedwords);
621
622 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, cp_nop_packet(1));
623 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
624
625 if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) {
626 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, cp_nop_packet(1));
627 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
628 KGSL_CMD_INTERNAL_IDENTIFIER);
629 }
630
631 if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) {
632 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, cp_nop_packet(1));
633 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
634 KGSL_PWRON_FIXUP_IDENTIFIER);
635 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
636 CP_HDR_INDIRECT_BUFFER_PFD);
637 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
638 adreno_dev->pwron_fixup.gpuaddr);
639 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
640 adreno_dev->pwron_fixup_dwords);
641 }
642
643 /* start-of-pipeline timestamp */
644 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
645 cp_type3_packet(CP_MEM_WRITE, 2));
646 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
647 KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
648 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, timestamp);
649
650 if (flags & KGSL_CMD_FLAGS_PMODE) {
651 /* disable protected mode error checking */
652 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
653 cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
654 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0);
655 }
656
657 for (i = 0; i < sizedwords; i++) {
658 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, *cmds);
659 cmds++;
660 }
661
662 if (flags & KGSL_CMD_FLAGS_PMODE) {
663 /* re-enable protected mode error checking */
664 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
665 cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
666 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 1);
667 }
668
669 /* HW Workaround for MMU Page fault
670 * due to memory getting free early before
671 * GPU completes it.
672 */
673 if (adreno_is_a2xx(adreno_dev)) {
674 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
675 cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
676 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x00);
677 }
678
679 if (adreno_is_a3xx(adreno_dev) || adreno_is_a4xx(adreno_dev)) {
680 /*
681 * Flush HLSQ lazy updates to make sure there are no
682 * resources pending for indirect loads after the timestamp
683 */
684
685 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
686 cp_type3_packet(CP_EVENT_WRITE, 1));
687 GSL_RB_WRITE(rb->device, ringcmds,
688 rcmd_gpu, 0x07); /* HLSQ_FLUSH */
689 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
690 cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
691 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x00);
692 }
693
694 /*
695 * end-of-pipeline timestamp. If per context timestamps is not
696 * enabled, then context_id will be KGSL_MEMSTORE_GLOBAL so all
697 * eop timestamps will work out.
698 */
699 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
700 cp_type3_packet(CP_EVENT_WRITE, 3));
701 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
702 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
703 KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
704 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, timestamp);
705
706 if (drawctxt) {
707 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
708 cp_type3_packet(CP_MEM_WRITE, 2));
709 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
710 KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
711 eoptimestamp)));
712 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, rb->global_ts);
713 }
714
715 if (adreno_is_a20x(adreno_dev)) {
716 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
717 cp_type3_packet(CP_EVENT_WRITE, 1));
718 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, CACHE_FLUSH);
719 }
720
721 if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
722 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
723 cp_type3_packet(CP_INTERRUPT, 1));
724 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
725 CP_INT_CNTL__RB_INT_MASK);
726 }
727
728 if (adreno_is_a3xx(adreno_dev)) {
729 /* Dummy set-constant to trigger context rollover */
730 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
731 cp_type3_packet(CP_SET_CONSTANT, 2));
732 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
733 (0x4<<16)|(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000));
734 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0);
735 }
736
737 if (flags & KGSL_CMD_FLAGS_WFI) {
738 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
739 cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
740 GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x00000000);
741 }
742
743 adreno_ringbuffer_submit(rb);
744
745 return 0;
746}
747
748unsigned int
749adreno_ringbuffer_issuecmds(struct kgsl_device *device,
750 struct adreno_context *drawctxt,
751 unsigned int flags,
752 unsigned int *cmds,
753 int sizedwords)
754{
755 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
756 struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
757
758 flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
759
760 return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds,
761 sizedwords, 0);
762}
763
764static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
765 int sizedwords);
766
767static bool
768_handle_type3(struct kgsl_device_private *dev_priv, uint *hostaddr)
769{
770 unsigned int opcode = cp_type3_opcode(*hostaddr);
771 switch (opcode) {
772 case CP_INDIRECT_BUFFER_PFD:
773 case CP_INDIRECT_BUFFER_PFE:
774 case CP_COND_INDIRECT_BUFFER_PFE:
775 case CP_COND_INDIRECT_BUFFER_PFD:
776 return _parse_ibs(dev_priv, hostaddr[1], hostaddr[2]);
777 case CP_NOP:
778 case CP_WAIT_FOR_IDLE:
779 case CP_WAIT_REG_MEM:
780 case CP_WAIT_REG_EQ:
781 case CP_WAT_REG_GTE:
782 case CP_WAIT_UNTIL_READ:
783 case CP_WAIT_IB_PFD_COMPLETE:
784 case CP_REG_RMW:
785 case CP_REG_TO_MEM:
786 case CP_MEM_WRITE:
787 case CP_MEM_WRITE_CNTR:
788 case CP_COND_EXEC:
789 case CP_COND_WRITE:
790 case CP_EVENT_WRITE:
791 case CP_EVENT_WRITE_SHD:
792 case CP_EVENT_WRITE_CFL:
793 case CP_EVENT_WRITE_ZPD:
794 case CP_DRAW_INDX:
795 case CP_DRAW_INDX_2:
796 case CP_DRAW_INDX_BIN:
797 case CP_DRAW_INDX_2_BIN:
798 case CP_VIZ_QUERY:
799 case CP_SET_STATE:
800 case CP_SET_CONSTANT:
801 case CP_IM_LOAD:
802 case CP_IM_LOAD_IMMEDIATE:
803 case CP_LOAD_CONSTANT_CONTEXT:
804 case CP_INVALIDATE_STATE:
805 case CP_SET_SHADER_BASES:
806 case CP_SET_BIN_MASK:
807 case CP_SET_BIN_SELECT:
808 case CP_SET_BIN_BASE_OFFSET:
809 case CP_SET_BIN_DATA:
810 case CP_CONTEXT_UPDATE:
811 case CP_INTERRUPT:
812 case CP_IM_STORE:
813 case CP_LOAD_STATE:
814 break;
815 /* these shouldn't come from userspace */
816 case CP_ME_INIT:
817 case CP_SET_PROTECTED_MODE:
818 default:
819 KGSL_CMD_ERR(dev_priv->device, "bad CP opcode %0x\n", opcode);
820 return false;
821 break;
822 }
823
824 return true;
825}
826
827static bool
828_handle_type0(struct kgsl_device_private *dev_priv, uint *hostaddr)
829{
830 unsigned int reg = type0_pkt_offset(*hostaddr);
831 unsigned int cnt = type0_pkt_size(*hostaddr);
832 if (reg < 0x0192 || (reg + cnt) >= 0x8000) {
833 KGSL_CMD_ERR(dev_priv->device, "bad type0 reg: 0x%0x cnt: %d\n",
834 reg, cnt);
835 return false;
836 }
837 return true;
838}
839
840/*
841 * Traverse IBs and dump them to test vector. Detect swap by inspecting
842 * register writes, keeping note of the current state, and dump
843 * framebuffer config to test vector
844 */
845static bool _parse_ibs(struct kgsl_device_private *dev_priv,
846 uint gpuaddr, int sizedwords)
847{
848 static uint level; /* recursion level */
849 bool ret = false;
850 uint *hostaddr, *hoststart;
851 int dwords_left = sizedwords; /* dwords left in the current command
852 buffer */
853 struct kgsl_mem_entry *entry;
854
855 entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
856 gpuaddr, sizedwords * sizeof(uint));
857 if (entry == NULL) {
858 KGSL_CMD_ERR(dev_priv->device,
859 "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
860 return false;
861 }
862
863 hostaddr = (uint *)kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
864 if (hostaddr == NULL) {
865 KGSL_CMD_ERR(dev_priv->device,
866 "no mapping for gpuaddr: 0x%08x\n", gpuaddr);
867 return false;
868 }
869
870 hoststart = hostaddr;
871
872 level++;
873
874 KGSL_CMD_INFO(dev_priv->device, "ib: gpuaddr:0x%08x, wc:%d, hptr:%p\n",
875 gpuaddr, sizedwords, hostaddr);
876
877 mb();
878 while (dwords_left > 0) {
879 bool cur_ret = true;
880 int count = 0; /* dword count including packet header */
881
882 switch (*hostaddr >> 30) {
883 case 0x0: /* type-0 */
884 count = (*hostaddr >> 16)+2;
885 cur_ret = _handle_type0(dev_priv, hostaddr);
886 break;
887 case 0x1: /* type-1 */
888 count = 2;
889 break;
890 case 0x3: /* type-3 */
891 count = ((*hostaddr >> 16) & 0x3fff) + 2;
892 cur_ret = _handle_type3(dev_priv, hostaddr);
893 break;
894 default:
895 KGSL_CMD_ERR(dev_priv->device, "unexpected type: "
896 "type:%d, word:0x%08x @ 0x%p, gpu:0x%08x\n",
897 *hostaddr >> 30, *hostaddr, hostaddr,
898 gpuaddr+4*(sizedwords-dwords_left));
899 cur_ret = false;
900 count = dwords_left;
901 break;
902 }
903
904 if (!cur_ret) {
905 KGSL_CMD_ERR(dev_priv->device,
906 "bad sub-type: #:%d/%d, v:0x%08x"
907 " @ 0x%p[gb:0x%08x], level:%d\n",
908 sizedwords-dwords_left, sizedwords, *hostaddr,
909 hostaddr, gpuaddr+4*(sizedwords-dwords_left),
910 level);
911
912 if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
913 >= 2)
914 print_hex_dump(KERN_ERR,
915 level == 1 ? "IB1:" : "IB2:",
916 DUMP_PREFIX_OFFSET, 32, 4, hoststart,
917 sizedwords*4, 0);
918 goto done;
919 }
920
921 /* jump to next packet */
922 dwords_left -= count;
923 hostaddr += count;
924 if (dwords_left < 0) {
925 KGSL_CMD_ERR(dev_priv->device,
926 "bad count: c:%d, #:%d/%d, "
927 "v:0x%08x @ 0x%p[gb:0x%08x], level:%d\n",
928 count, sizedwords-(dwords_left+count),
929 sizedwords, *(hostaddr-count), hostaddr-count,
930 gpuaddr+4*(sizedwords-(dwords_left+count)),
931 level);
932 if (ADRENO_DEVICE(dev_priv->device)->ib_check_level
933 >= 2)
934 print_hex_dump(KERN_ERR,
935 level == 1 ? "IB1:" : "IB2:",
936 DUMP_PREFIX_OFFSET, 32, 4, hoststart,
937 sizedwords*4, 0);
938 goto done;
939 }
940 }
941
942 ret = true;
943done:
944 if (!ret)
945 KGSL_DRV_ERR(dev_priv->device,
946 "parsing failed: gpuaddr:0x%08x, "
947 "host:0x%p, wc:%d\n", gpuaddr, hoststart, sizedwords);
948
949 level--;
950
951 return ret;
952}
953
954/**
955 * _ringbuffer_verify_ib() - parse an IB and verify that it is correct
956 * @dev_priv: Pointer to the process struct
957 * @ibdesc: Pointer to the IB descriptor
958 *
959 * This function only gets called if debugging is enabled - it walks the IB and
960 * does additional level parsing and verification above and beyond what KGSL
961 * core does
962 */
963static inline bool _ringbuffer_verify_ib(struct kgsl_device_private *dev_priv,
964 struct kgsl_ibdesc *ibdesc)
965{
966 struct kgsl_device *device = dev_priv->device;
967 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
968
969 /* Check that the size of the IBs is under the allowable limit */
970 if (ibdesc->sizedwords == 0 || ibdesc->sizedwords > 0xFFFFF) {
971 KGSL_DRV_ERR(device, "Invalid IB size 0x%X\n",
972 ibdesc->sizedwords);
973 return false;
974 }
975
976 if (unlikely(adreno_dev->ib_check_level >= 1) &&
977 !_parse_ibs(dev_priv, ibdesc->gpuaddr, ibdesc->sizedwords)) {
978 KGSL_DRV_ERR(device, "Could not verify the IBs\n");
979 return false;
980 }
981
982 return true;
983}
984
985int
986adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
987 struct kgsl_context *context,
988 struct kgsl_cmdbatch *cmdbatch,
989 uint32_t *timestamp)
990{
991 struct kgsl_device *device = dev_priv->device;
992 struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
993 struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
994 int i, ret;
995
996 if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
997 return -EDEADLK;
998
999 /* Verify the IBs before they get queued */
1000
1001 for (i = 0; i < cmdbatch->ibcount; i++) {
1002 if (!_ringbuffer_verify_ib(dev_priv, &cmdbatch->ibdesc[i]))
1003 return -EINVAL;
1004 }
1005
1006 /* For now everybody has the same priority */
1007 cmdbatch->priority = ADRENO_CONTEXT_DEFAULT_PRIORITY;
1008
1009 /* Queue the command in the ringbuffer */
1010 ret = adreno_dispatcher_queue_cmd(adreno_dev, drawctxt, cmdbatch,
1011 timestamp);
1012
1013 if (ret)
1014 KGSL_DRV_ERR(device, "adreno_dispatcher_queue_cmd returned %d\n",
1015 ret);
1016 else {
1017 /*
1018 * only call trace_gpu_job_enqueue for actual commands - dummy
1019 * sync command batches won't get scheduled on the GPU
1020 */
1021
1022 if (!(cmdbatch->flags & KGSL_CONTEXT_SYNC)) {
1023 const char *str = "3D";
1024 if (drawctxt->type == KGSL_CONTEXT_TYPE_CL ||
1025 drawctxt->type == KGSL_CONTEXT_TYPE_RS)
1026 str = "compute";
1027
1028 kgsl_trace_gpu_job_enqueue(drawctxt->base.id,
1029 cmdbatch->timestamp, str);
1030 }
1031 }
1032
1033 return ret;
1034}
1035
1036/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
1037int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
1038 struct kgsl_cmdbatch *cmdbatch)
1039{
1040 struct kgsl_device *device = &adreno_dev->dev;
1041 struct kgsl_ibdesc *ibdesc;
1042 unsigned int numibs;
1043 unsigned int *link;
1044 unsigned int *cmds;
1045 unsigned int i;
1046 struct kgsl_context *context;
1047 struct adreno_context *drawctxt;
1048 unsigned int start_index = 0;
1049 int flags = KGSL_CMD_FLAGS_NONE;
1050 int ret;
1051
1052 context = cmdbatch->context;
1053 drawctxt = ADRENO_CONTEXT(context);
1054
1055 ibdesc = cmdbatch->ibdesc;
1056 numibs = cmdbatch->ibcount;
1057
1058 /*When preamble is enabled, the preamble buffer with state restoration
1059 commands are stored in the first node of the IB chain. We can skip that
1060 if a context switch hasn't occured */
1061
1062 if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) &&
1063 !test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) &&
1064 (adreno_dev->drawctxt_active == drawctxt))
1065 start_index = 1;
1066
1067 /*
1068 * In skip mode don't issue the draw IBs but keep all the other
1069 * accoutrements of a submision (including the interrupt) to keep
1070 * the accounting sane. Set start_index and numibs to 0 to just
1071 * generate the start and end markers and skip everything else
1072 */
1073
1074 if (test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)) {
1075 start_index = 0;
1076 numibs = 0;
1077 }
1078
1079 cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
1080 GFP_KERNEL);
1081 if (!link) {
1082 ret = -ENOMEM;
1083 goto done;
1084 }
1085
1086 if (!start_index) {
1087 *cmds++ = cp_nop_packet(1);
1088 *cmds++ = KGSL_START_OF_IB_IDENTIFIER;
1089 } else {
1090 *cmds++ = cp_nop_packet(4);
1091 *cmds++ = KGSL_START_OF_IB_IDENTIFIER;
1092 *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
1093 *cmds++ = ibdesc[0].gpuaddr;
1094 *cmds++ = ibdesc[0].sizedwords;
1095 }
1096 for (i = start_index; i < numibs; i++) {
1097
1098 /*
1099 * Skip 0 sized IBs - these are presumed to have been removed
1100 * from consideration by the FT policy
1101 */
1102
1103 if (ibdesc[i].sizedwords == 0)
1104 *cmds++ = cp_nop_packet(2);
1105 else
1106 *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
1107
1108 *cmds++ = ibdesc[i].gpuaddr;
1109 *cmds++ = ibdesc[i].sizedwords;
1110 }
1111
1112 *cmds++ = cp_nop_packet(1);
1113 *cmds++ = KGSL_END_OF_IB_IDENTIFIER;
1114
1115 ret = kgsl_setstate(&device->mmu, context->id,
1116 kgsl_mmu_pt_get_flags(device->mmu.hwpagetable,
1117 device->id));
1118
1119 if (ret)
1120 goto done;
1121
1122 ret = adreno_drawctxt_switch(adreno_dev, drawctxt, cmdbatch->flags);
1123
1124 /*
1125 * In the unlikely event of an error in the drawctxt switch,
1126 * treat it like a hang
1127 */
1128 if (ret)
1129 goto done;
1130
1131 if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv))
1132 flags = KGSL_CMD_FLAGS_WFI;
1133
1134 /*
1135 * For some targets, we need to execute a dummy shader operation after a
1136 * power collapse
1137 */
1138
1139 if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
1140 test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
1141 flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
1142
1143 ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
1144 drawctxt,
1145 flags,
1146 &link[0], (cmds - link),
1147 cmdbatch->timestamp);
1148
1149#ifdef CONFIG_MSM_KGSL_CFF_DUMP
1150 /*
1151 * insert wait for idle after every IB1
1152 * this is conservative but works reliably and is ok
1153 * even for performance simulations
1154 */
1155 adreno_idle(device);
1156#endif
1157
1158done:
1159 device->pwrctrl.irq_last = 0;
1160 kgsl_trace_issueibcmds(device, context->id, cmdbatch,
1161 cmdbatch->timestamp, cmdbatch->flags, ret,
1162 drawctxt ? drawctxt->type : 0);
1163
1164 kfree(link);
1165 return ret;
1166}