blob: 639d5b216eedc5228de3e1ba24aee17cbe6d9063 [file] [log] [blame]
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28#include <linux/seq_file.h>
29#include "drmP.h"
30#include "drm.h"
31#include "radeon_drm.h"
Jerome Glisse771fe6b2009-06-05 14:42:42 +020032#include "radeon_reg.h"
33#include "radeon.h"
Ben Hutchings70967ab2009-08-29 14:53:51 +010034#include <linux/firmware.h>
35#include <linux/platform_device.h>
36
37/* Firmware Names */
38#define FIRMWARE_R100 "radeon/R100_cp.bin"
39#define FIRMWARE_R200 "radeon/R200_cp.bin"
40#define FIRMWARE_R300 "radeon/R300_cp.bin"
41#define FIRMWARE_R420 "radeon/R420_cp.bin"
42#define FIRMWARE_RS690 "radeon/RS690_cp.bin"
43#define FIRMWARE_RS600 "radeon/RS600_cp.bin"
44#define FIRMWARE_R520 "radeon/R520_cp.bin"
45
46MODULE_FIRMWARE(FIRMWARE_R100);
47MODULE_FIRMWARE(FIRMWARE_R200);
48MODULE_FIRMWARE(FIRMWARE_R300);
49MODULE_FIRMWARE(FIRMWARE_R420);
50MODULE_FIRMWARE(FIRMWARE_RS690);
51MODULE_FIRMWARE(FIRMWARE_RS600);
52MODULE_FIRMWARE(FIRMWARE_R520);
Jerome Glisse771fe6b2009-06-05 14:42:42 +020053
54/* This files gather functions specifics to:
55 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
56 *
57 * Some of these functions might be used by newer ASICs.
58 */
59void r100_hdp_reset(struct radeon_device *rdev);
60void r100_gpu_init(struct radeon_device *rdev);
61int r100_gui_wait_for_idle(struct radeon_device *rdev);
62int r100_mc_wait_for_idle(struct radeon_device *rdev);
63void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
64void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
65int r100_debugfs_mc_info_init(struct radeon_device *rdev);
66
67
68/*
69 * PCI GART
70 */
71void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
72{
73 /* TODO: can we do somethings here ? */
74 /* It seems hw only cache one entry so we should discard this
75 * entry otherwise if first GPU GART read hit this entry it
76 * could end up in wrong address. */
77}
78
79int r100_pci_gart_enable(struct radeon_device *rdev)
80{
81 uint32_t tmp;
82 int r;
83
84 /* Initialize common gart structure */
85 r = radeon_gart_init(rdev);
86 if (r) {
87 return r;
88 }
89 if (rdev->gart.table.ram.ptr == NULL) {
90 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
91 r = radeon_gart_table_ram_alloc(rdev);
92 if (r) {
93 return r;
94 }
95 }
96 /* discard memory request outside of configured range */
97 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
98 WREG32(RADEON_AIC_CNTL, tmp);
99 /* set address range for PCI address translate */
100 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
101 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
102 WREG32(RADEON_AIC_HI_ADDR, tmp);
103 /* Enable bus mastering */
104 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
105 WREG32(RADEON_BUS_CNTL, tmp);
106 /* set PCI GART page-table base address */
107 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
108 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
109 WREG32(RADEON_AIC_CNTL, tmp);
110 r100_pci_gart_tlb_flush(rdev);
111 rdev->gart.ready = true;
112 return 0;
113}
114
115void r100_pci_gart_disable(struct radeon_device *rdev)
116{
117 uint32_t tmp;
118
119 /* discard memory request outside of configured range */
120 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
121 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
122 WREG32(RADEON_AIC_LO_ADDR, 0);
123 WREG32(RADEON_AIC_HI_ADDR, 0);
124}
125
126int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
127{
128 if (i < 0 || i > rdev->gart.num_gpu_pages) {
129 return -EINVAL;
130 }
Dave Airlieed10f952009-06-29 18:29:11 +1000131 rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200132 return 0;
133}
134
135int r100_gart_enable(struct radeon_device *rdev)
136{
137 if (rdev->flags & RADEON_IS_AGP) {
138 r100_pci_gart_disable(rdev);
139 return 0;
140 }
141 return r100_pci_gart_enable(rdev);
142}
143
144
145/*
146 * MC
147 */
148void r100_mc_disable_clients(struct radeon_device *rdev)
149{
150 uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
151
152 /* FIXME: is this function correct for rs100,rs200,rs300 ? */
153 if (r100_gui_wait_for_idle(rdev)) {
154 printk(KERN_WARNING "Failed to wait GUI idle while "
155 "programming pipes. Bad things might happen.\n");
156 }
157
158 /* stop display and memory access */
159 ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
160 WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
161 crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
162 WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
163 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
164
165 r100_gpu_wait_for_vsync(rdev);
166
167 WREG32(RADEON_CRTC_GEN_CNTL,
168 (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
169 RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
170
171 if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
172 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
173
174 r100_gpu_wait_for_vsync2(rdev);
175 WREG32(RADEON_CRTC2_GEN_CNTL,
176 (crtc2_gen_cntl &
177 ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
178 RADEON_CRTC2_DISP_REQ_EN_B);
179 }
180
181 udelay(500);
182}
183
184void r100_mc_setup(struct radeon_device *rdev)
185{
186 uint32_t tmp;
187 int r;
188
189 r = r100_debugfs_mc_info_init(rdev);
190 if (r) {
191 DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
192 }
193 /* Write VRAM size in case we are limiting it */
Dave Airlie7a50f012009-07-21 20:39:30 +1000194 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
195 /* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM,
196 * if the aperture is 64MB but we have 32MB VRAM
197 * we report only 32MB VRAM but we have to set MC_FB_LOCATION
198 * to 64MB, otherwise the gpu accidentially dies */
199 tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200200 tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
201 tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
202 WREG32(RADEON_MC_FB_LOCATION, tmp);
203
204 /* Enable bus mastering */
205 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
206 WREG32(RADEON_BUS_CNTL, tmp);
207
208 if (rdev->flags & RADEON_IS_AGP) {
209 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
210 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
211 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
212 WREG32(RADEON_MC_AGP_LOCATION, tmp);
213 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
214 } else {
215 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
216 WREG32(RADEON_AGP_BASE, 0);
217 }
218
219 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
220 tmp |= (7 << 28);
221 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
222 (void)RREG32(RADEON_HOST_PATH_CNTL);
223 WREG32(RADEON_HOST_PATH_CNTL, tmp);
224 (void)RREG32(RADEON_HOST_PATH_CNTL);
225}
226
227int r100_mc_init(struct radeon_device *rdev)
228{
229 int r;
230
231 if (r100_debugfs_rbbm_init(rdev)) {
232 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
233 }
234
235 r100_gpu_init(rdev);
236 /* Disable gart which also disable out of gart access */
237 r100_pci_gart_disable(rdev);
238
239 /* Setup GPU memory space */
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200240 rdev->mc.gtt_location = 0xFFFFFFFFUL;
241 if (rdev->flags & RADEON_IS_AGP) {
242 r = radeon_agp_init(rdev);
243 if (r) {
244 printk(KERN_WARNING "[drm] Disabling AGP\n");
245 rdev->flags &= ~RADEON_IS_AGP;
246 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
247 } else {
248 rdev->mc.gtt_location = rdev->mc.agp_base;
249 }
250 }
251 r = radeon_mc_setup(rdev);
252 if (r) {
253 return r;
254 }
255
256 r100_mc_disable_clients(rdev);
257 if (r100_mc_wait_for_idle(rdev)) {
258 printk(KERN_WARNING "Failed to wait MC idle while "
259 "programming pipes. Bad things might happen.\n");
260 }
261
262 r100_mc_setup(rdev);
263 return 0;
264}
265
266void r100_mc_fini(struct radeon_device *rdev)
267{
268 r100_pci_gart_disable(rdev);
269 radeon_gart_table_ram_free(rdev);
270 radeon_gart_fini(rdev);
271}
272
273
274/*
Michel Dänzer7ed220d2009-08-13 11:10:51 +0200275 * Interrupts
276 */
277int r100_irq_set(struct radeon_device *rdev)
278{
279 uint32_t tmp = 0;
280
281 if (rdev->irq.sw_int) {
282 tmp |= RADEON_SW_INT_ENABLE;
283 }
284 if (rdev->irq.crtc_vblank_int[0]) {
285 tmp |= RADEON_CRTC_VBLANK_MASK;
286 }
287 if (rdev->irq.crtc_vblank_int[1]) {
288 tmp |= RADEON_CRTC2_VBLANK_MASK;
289 }
290 WREG32(RADEON_GEN_INT_CNTL, tmp);
291 return 0;
292}
293
294static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
295{
296 uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
297 uint32_t irq_mask = RADEON_SW_INT_TEST | RADEON_CRTC_VBLANK_STAT |
298 RADEON_CRTC2_VBLANK_STAT;
299
300 if (irqs) {
301 WREG32(RADEON_GEN_INT_STATUS, irqs);
302 }
303 return irqs & irq_mask;
304}
305
306int r100_irq_process(struct radeon_device *rdev)
307{
308 uint32_t status;
309
310 status = r100_irq_ack(rdev);
311 if (!status) {
312 return IRQ_NONE;
313 }
314 while (status) {
315 /* SW interrupt */
316 if (status & RADEON_SW_INT_TEST) {
317 radeon_fence_process(rdev);
318 }
319 /* Vertical blank interrupts */
320 if (status & RADEON_CRTC_VBLANK_STAT) {
321 drm_handle_vblank(rdev->ddev, 0);
322 }
323 if (status & RADEON_CRTC2_VBLANK_STAT) {
324 drm_handle_vblank(rdev->ddev, 1);
325 }
326 status = r100_irq_ack(rdev);
327 }
328 return IRQ_HANDLED;
329}
330
331u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
332{
333 if (crtc == 0)
334 return RREG32(RADEON_CRTC_CRNT_FRAME);
335 else
336 return RREG32(RADEON_CRTC2_CRNT_FRAME);
337}
338
339
340/*
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200341 * Fence emission
342 */
343void r100_fence_ring_emit(struct radeon_device *rdev,
344 struct radeon_fence *fence)
345{
346 /* Who ever call radeon_fence_emit should call ring_lock and ask
347 * for enough space (today caller are ib schedule and buffer move) */
348 /* Wait until IDLE & CLEAN */
349 radeon_ring_write(rdev, PACKET0(0x1720, 0));
350 radeon_ring_write(rdev, (1 << 16) | (1 << 17));
351 /* Emit fence sequence & fire IRQ */
352 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
353 radeon_ring_write(rdev, fence->seq);
354 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
355 radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
356}
357
358
359/*
360 * Writeback
361 */
362int r100_wb_init(struct radeon_device *rdev)
363{
364 int r;
365
366 if (rdev->wb.wb_obj == NULL) {
367 r = radeon_object_create(rdev, NULL, 4096,
368 true,
369 RADEON_GEM_DOMAIN_GTT,
370 false, &rdev->wb.wb_obj);
371 if (r) {
372 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
373 return r;
374 }
375 r = radeon_object_pin(rdev->wb.wb_obj,
376 RADEON_GEM_DOMAIN_GTT,
377 &rdev->wb.gpu_addr);
378 if (r) {
379 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
380 return r;
381 }
382 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
383 if (r) {
384 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
385 return r;
386 }
387 }
388 WREG32(0x774, rdev->wb.gpu_addr);
389 WREG32(0x70C, rdev->wb.gpu_addr + 1024);
390 WREG32(0x770, 0xff);
391 return 0;
392}
393
394void r100_wb_fini(struct radeon_device *rdev)
395{
396 if (rdev->wb.wb_obj) {
397 radeon_object_kunmap(rdev->wb.wb_obj);
398 radeon_object_unpin(rdev->wb.wb_obj);
399 radeon_object_unref(&rdev->wb.wb_obj);
400 rdev->wb.wb = NULL;
401 rdev->wb.wb_obj = NULL;
402 }
403}
404
405int r100_copy_blit(struct radeon_device *rdev,
406 uint64_t src_offset,
407 uint64_t dst_offset,
408 unsigned num_pages,
409 struct radeon_fence *fence)
410{
411 uint32_t cur_pages;
412 uint32_t stride_bytes = PAGE_SIZE;
413 uint32_t pitch;
414 uint32_t stride_pixels;
415 unsigned ndw;
416 int num_loops;
417 int r = 0;
418
419 /* radeon limited to 16k stride */
420 stride_bytes &= 0x3fff;
421 /* radeon pitch is /64 */
422 pitch = stride_bytes / 64;
423 stride_pixels = stride_bytes / 4;
424 num_loops = DIV_ROUND_UP(num_pages, 8191);
425
426 /* Ask for enough room for blit + flush + fence */
427 ndw = 64 + (10 * num_loops);
428 r = radeon_ring_lock(rdev, ndw);
429 if (r) {
430 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
431 return -EINVAL;
432 }
433 while (num_pages > 0) {
434 cur_pages = num_pages;
435 if (cur_pages > 8191) {
436 cur_pages = 8191;
437 }
438 num_pages -= cur_pages;
439
440 /* pages are in Y direction - height
441 page width in X direction - width */
442 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
443 radeon_ring_write(rdev,
444 RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
445 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
446 RADEON_GMC_SRC_CLIPPING |
447 RADEON_GMC_DST_CLIPPING |
448 RADEON_GMC_BRUSH_NONE |
449 (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
450 RADEON_GMC_SRC_DATATYPE_COLOR |
451 RADEON_ROP3_S |
452 RADEON_DP_SRC_SOURCE_MEMORY |
453 RADEON_GMC_CLR_CMP_CNTL_DIS |
454 RADEON_GMC_WR_MSK_DIS);
455 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
456 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
457 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
458 radeon_ring_write(rdev, 0);
459 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
460 radeon_ring_write(rdev, num_pages);
461 radeon_ring_write(rdev, num_pages);
462 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
463 }
464 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
465 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
466 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
467 radeon_ring_write(rdev,
468 RADEON_WAIT_2D_IDLECLEAN |
469 RADEON_WAIT_HOST_IDLECLEAN |
470 RADEON_WAIT_DMA_GUI_IDLE);
471 if (fence) {
472 r = radeon_fence_emit(rdev, fence);
473 }
474 radeon_ring_unlock_commit(rdev);
475 return r;
476}
477
478
479/*
480 * CP
481 */
482void r100_ring_start(struct radeon_device *rdev)
483{
484 int r;
485
486 r = radeon_ring_lock(rdev, 2);
487 if (r) {
488 return;
489 }
490 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
491 radeon_ring_write(rdev,
492 RADEON_ISYNC_ANY2D_IDLE3D |
493 RADEON_ISYNC_ANY3D_IDLE2D |
494 RADEON_ISYNC_WAIT_IDLEGUI |
495 RADEON_ISYNC_CPSCRATCH_IDLEGUI);
496 radeon_ring_unlock_commit(rdev);
497}
498
Ben Hutchings70967ab2009-08-29 14:53:51 +0100499
500/* Load the microcode for the CP */
501static int r100_cp_init_microcode(struct radeon_device *rdev)
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200502{
Ben Hutchings70967ab2009-08-29 14:53:51 +0100503 struct platform_device *pdev;
504 const char *fw_name = NULL;
505 int err;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200506
Ben Hutchings70967ab2009-08-29 14:53:51 +0100507 DRM_DEBUG("\n");
508
509 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
510 err = IS_ERR(pdev);
511 if (err) {
512 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
513 return -EINVAL;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200514 }
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200515 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
516 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
517 (rdev->family == CHIP_RS200)) {
518 DRM_INFO("Loading R100 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100519 fw_name = FIRMWARE_R100;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200520 } else if ((rdev->family == CHIP_R200) ||
521 (rdev->family == CHIP_RV250) ||
522 (rdev->family == CHIP_RV280) ||
523 (rdev->family == CHIP_RS300)) {
524 DRM_INFO("Loading R200 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100525 fw_name = FIRMWARE_R200;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200526 } else if ((rdev->family == CHIP_R300) ||
527 (rdev->family == CHIP_R350) ||
528 (rdev->family == CHIP_RV350) ||
529 (rdev->family == CHIP_RV380) ||
530 (rdev->family == CHIP_RS400) ||
531 (rdev->family == CHIP_RS480)) {
532 DRM_INFO("Loading R300 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100533 fw_name = FIRMWARE_R300;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200534 } else if ((rdev->family == CHIP_R420) ||
535 (rdev->family == CHIP_R423) ||
536 (rdev->family == CHIP_RV410)) {
537 DRM_INFO("Loading R400 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100538 fw_name = FIRMWARE_R420;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200539 } else if ((rdev->family == CHIP_RS690) ||
540 (rdev->family == CHIP_RS740)) {
541 DRM_INFO("Loading RS690/RS740 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100542 fw_name = FIRMWARE_RS690;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200543 } else if (rdev->family == CHIP_RS600) {
544 DRM_INFO("Loading RS600 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100545 fw_name = FIRMWARE_RS600;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200546 } else if ((rdev->family == CHIP_RV515) ||
547 (rdev->family == CHIP_R520) ||
548 (rdev->family == CHIP_RV530) ||
549 (rdev->family == CHIP_R580) ||
550 (rdev->family == CHIP_RV560) ||
551 (rdev->family == CHIP_RV570)) {
552 DRM_INFO("Loading R500 Microcode\n");
Ben Hutchings70967ab2009-08-29 14:53:51 +0100553 fw_name = FIRMWARE_R520;
554 }
555
556 err = request_firmware(&rdev->fw, fw_name, &pdev->dev);
557 platform_device_unregister(pdev);
558 if (err) {
559 printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
560 fw_name);
561 } else if (rdev->fw->size % 8) {
562 printk(KERN_ERR
563 "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
564 rdev->fw->size, fw_name);
565 err = -EINVAL;
566 release_firmware(rdev->fw);
567 rdev->fw = NULL;
568 }
569 return err;
570}
571static void r100_cp_load_microcode(struct radeon_device *rdev)
572{
573 const __be32 *fw_data;
574 int i, size;
575
576 if (r100_gui_wait_for_idle(rdev)) {
577 printk(KERN_WARNING "Failed to wait GUI idle while "
578 "programming pipes. Bad things might happen.\n");
579 }
580
581 if (rdev->fw) {
582 size = rdev->fw->size / 4;
583 fw_data = (const __be32 *)&rdev->fw->data[0];
584 WREG32(RADEON_CP_ME_RAM_ADDR, 0);
585 for (i = 0; i < size; i += 2) {
586 WREG32(RADEON_CP_ME_RAM_DATAH,
587 be32_to_cpup(&fw_data[i]));
588 WREG32(RADEON_CP_ME_RAM_DATAL,
589 be32_to_cpup(&fw_data[i + 1]));
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200590 }
591 }
592}
593
594int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
595{
596 unsigned rb_bufsz;
597 unsigned rb_blksz;
598 unsigned max_fetch;
599 unsigned pre_write_timer;
600 unsigned pre_write_limit;
601 unsigned indirect2_start;
602 unsigned indirect1_start;
603 uint32_t tmp;
604 int r;
605
606 if (r100_debugfs_cp_init(rdev)) {
607 DRM_ERROR("Failed to register debugfs file for CP !\n");
608 }
609 /* Reset CP */
610 tmp = RREG32(RADEON_CP_CSQ_STAT);
611 if ((tmp & (1 << 31))) {
612 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
613 WREG32(RADEON_CP_CSQ_MODE, 0);
614 WREG32(RADEON_CP_CSQ_CNTL, 0);
615 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
616 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
617 mdelay(2);
618 WREG32(RADEON_RBBM_SOFT_RESET, 0);
619 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
620 mdelay(2);
621 tmp = RREG32(RADEON_CP_CSQ_STAT);
622 if ((tmp & (1 << 31))) {
623 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
624 }
625 } else {
626 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
627 }
Ben Hutchings70967ab2009-08-29 14:53:51 +0100628
629 if (!rdev->fw) {
630 r = r100_cp_init_microcode(rdev);
631 if (r) {
632 DRM_ERROR("Failed to load firmware!\n");
633 return r;
634 }
635 }
636
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200637 /* Align ring size */
638 rb_bufsz = drm_order(ring_size / 8);
639 ring_size = (1 << (rb_bufsz + 1)) * 4;
640 r100_cp_load_microcode(rdev);
641 r = radeon_ring_init(rdev, ring_size);
642 if (r) {
643 return r;
644 }
645 /* Each time the cp read 1024 bytes (16 dword/quadword) update
646 * the rptr copy in system ram */
647 rb_blksz = 9;
648 /* cp will read 128bytes at a time (4 dwords) */
649 max_fetch = 1;
650 rdev->cp.align_mask = 16 - 1;
651 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
652 pre_write_timer = 64;
653 /* Force CP_RB_WPTR write if written more than one time before the
654 * delay expire
655 */
656 pre_write_limit = 0;
657 /* Setup the cp cache like this (cache size is 96 dwords) :
658 * RING 0 to 15
659 * INDIRECT1 16 to 79
660 * INDIRECT2 80 to 95
661 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
662 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
663 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
664 * Idea being that most of the gpu cmd will be through indirect1 buffer
665 * so it gets the bigger cache.
666 */
667 indirect2_start = 80;
668 indirect1_start = 16;
669 /* cp setup */
670 WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
671 WREG32(RADEON_CP_RB_CNTL,
Michel Dänzer4e484e72009-06-16 17:29:06 +0200672#ifdef __BIG_ENDIAN
673 RADEON_BUF_SWAP_32BIT |
674#endif
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200675 REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
676 REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
677 REG_SET(RADEON_MAX_FETCH, max_fetch) |
678 RADEON_RB_NO_UPDATE);
679 /* Set ring address */
680 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
681 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
682 /* Force read & write ptr to 0 */
683 tmp = RREG32(RADEON_CP_RB_CNTL);
684 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
685 WREG32(RADEON_CP_RB_RPTR_WR, 0);
686 WREG32(RADEON_CP_RB_WPTR, 0);
687 WREG32(RADEON_CP_RB_CNTL, tmp);
688 udelay(10);
689 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
690 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
691 /* Set cp mode to bus mastering & enable cp*/
692 WREG32(RADEON_CP_CSQ_MODE,
693 REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
694 REG_SET(RADEON_INDIRECT1_START, indirect1_start));
695 WREG32(0x718, 0);
696 WREG32(0x744, 0x00004D4D);
697 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
698 radeon_ring_start(rdev);
699 r = radeon_ring_test(rdev);
700 if (r) {
701 DRM_ERROR("radeon: cp isn't working (%d).\n", r);
702 return r;
703 }
704 rdev->cp.ready = true;
705 return 0;
706}
707
708void r100_cp_fini(struct radeon_device *rdev)
709{
710 /* Disable ring */
711 rdev->cp.ready = false;
712 WREG32(RADEON_CP_CSQ_CNTL, 0);
713 radeon_ring_fini(rdev);
714 DRM_INFO("radeon: cp finalized\n");
715}
716
717void r100_cp_disable(struct radeon_device *rdev)
718{
719 /* Disable ring */
720 rdev->cp.ready = false;
721 WREG32(RADEON_CP_CSQ_MODE, 0);
722 WREG32(RADEON_CP_CSQ_CNTL, 0);
723 if (r100_gui_wait_for_idle(rdev)) {
724 printk(KERN_WARNING "Failed to wait GUI idle while "
725 "programming pipes. Bad things might happen.\n");
726 }
727}
728
729int r100_cp_reset(struct radeon_device *rdev)
730{
731 uint32_t tmp;
732 bool reinit_cp;
733 int i;
734
735 reinit_cp = rdev->cp.ready;
736 rdev->cp.ready = false;
737 WREG32(RADEON_CP_CSQ_MODE, 0);
738 WREG32(RADEON_CP_CSQ_CNTL, 0);
739 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
740 (void)RREG32(RADEON_RBBM_SOFT_RESET);
741 udelay(200);
742 WREG32(RADEON_RBBM_SOFT_RESET, 0);
743 /* Wait to prevent race in RBBM_STATUS */
744 mdelay(1);
745 for (i = 0; i < rdev->usec_timeout; i++) {
746 tmp = RREG32(RADEON_RBBM_STATUS);
747 if (!(tmp & (1 << 16))) {
748 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
749 tmp);
750 if (reinit_cp) {
751 return r100_cp_init(rdev, rdev->cp.ring_size);
752 }
753 return 0;
754 }
755 DRM_UDELAY(1);
756 }
757 tmp = RREG32(RADEON_RBBM_STATUS);
758 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
759 return -1;
760}
761
762
763/*
764 * CS functions
765 */
766int r100_cs_parse_packet0(struct radeon_cs_parser *p,
767 struct radeon_cs_packet *pkt,
Jerome Glisse068a1172009-06-17 13:28:30 +0200768 const unsigned *auth, unsigned n,
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200769 radeon_packet0_check_t check)
770{
771 unsigned reg;
772 unsigned i, j, m;
773 unsigned idx;
774 int r;
775
776 idx = pkt->idx + 1;
777 reg = pkt->reg;
Jerome Glisse068a1172009-06-17 13:28:30 +0200778 /* Check that register fall into register range
779 * determined by the number of entry (n) in the
780 * safe register bitmap.
781 */
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200782 if (pkt->one_reg_wr) {
783 if ((reg >> 7) > n) {
784 return -EINVAL;
785 }
786 } else {
787 if (((reg + (pkt->count << 2)) >> 7) > n) {
788 return -EINVAL;
789 }
790 }
791 for (i = 0; i <= pkt->count; i++, idx++) {
792 j = (reg >> 7);
793 m = 1 << ((reg >> 2) & 31);
794 if (auth[j] & m) {
795 r = check(p, pkt, idx, reg);
796 if (r) {
797 return r;
798 }
799 }
800 if (pkt->one_reg_wr) {
801 if (!(auth[j] & m)) {
802 break;
803 }
804 } else {
805 reg += 4;
806 }
807 }
808 return 0;
809}
810
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200811void r100_cs_dump_packet(struct radeon_cs_parser *p,
812 struct radeon_cs_packet *pkt)
813{
814 struct radeon_cs_chunk *ib_chunk;
815 volatile uint32_t *ib;
816 unsigned i;
817 unsigned idx;
818
819 ib = p->ib->ptr;
820 ib_chunk = &p->chunks[p->chunk_ib_idx];
821 idx = pkt->idx;
822 for (i = 0; i <= (pkt->count + 1); i++, idx++) {
823 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
824 }
825}
826
827/**
828 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
829 * @parser: parser structure holding parsing context.
830 * @pkt: where to store packet informations
831 *
832 * Assume that chunk_ib_index is properly set. Will return -EINVAL
833 * if packet is bigger than remaining ib size. or if packets is unknown.
834 **/
835int r100_cs_packet_parse(struct radeon_cs_parser *p,
836 struct radeon_cs_packet *pkt,
837 unsigned idx)
838{
839 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
Roel Kluinfa992392009-08-03 14:20:32 +0200840 uint32_t header;
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200841
842 if (idx >= ib_chunk->length_dw) {
843 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
844 idx, ib_chunk->length_dw);
845 return -EINVAL;
846 }
Roel Kluinfa992392009-08-03 14:20:32 +0200847 header = ib_chunk->kdata[idx];
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200848 pkt->idx = idx;
849 pkt->type = CP_PACKET_GET_TYPE(header);
850 pkt->count = CP_PACKET_GET_COUNT(header);
851 switch (pkt->type) {
852 case PACKET_TYPE0:
853 pkt->reg = CP_PACKET0_GET_REG(header);
854 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
855 break;
856 case PACKET_TYPE3:
857 pkt->opcode = CP_PACKET3_GET_OPCODE(header);
858 break;
859 case PACKET_TYPE2:
860 pkt->count = -1;
861 break;
862 default:
863 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
864 return -EINVAL;
865 }
866 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
867 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
868 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
869 return -EINVAL;
870 }
871 return 0;
872}
873
874/**
Dave Airlie531369e2009-06-29 11:21:25 +1000875 * r100_cs_packet_next_vline() - parse userspace VLINE packet
876 * @parser: parser structure holding parsing context.
877 *
878 * Userspace sends a special sequence for VLINE waits.
879 * PACKET0 - VLINE_START_END + value
880 * PACKET0 - WAIT_UNTIL +_value
881 * RELOC (P3) - crtc_id in reloc.
882 *
883 * This function parses this and relocates the VLINE START END
884 * and WAIT UNTIL packets to the correct crtc.
885 * It also detects a switched off crtc and nulls out the
886 * wait in that case.
887 */
888int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
889{
890 struct radeon_cs_chunk *ib_chunk;
891 struct drm_mode_object *obj;
892 struct drm_crtc *crtc;
893 struct radeon_crtc *radeon_crtc;
894 struct radeon_cs_packet p3reloc, waitreloc;
895 int crtc_id;
896 int r;
897 uint32_t header, h_idx, reg;
898
899 ib_chunk = &p->chunks[p->chunk_ib_idx];
900
901 /* parse the wait until */
902 r = r100_cs_packet_parse(p, &waitreloc, p->idx);
903 if (r)
904 return r;
905
906 /* check its a wait until and only 1 count */
907 if (waitreloc.reg != RADEON_WAIT_UNTIL ||
908 waitreloc.count != 0) {
909 DRM_ERROR("vline wait had illegal wait until segment\n");
910 r = -EINVAL;
911 return r;
912 }
913
914 if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) {
915 DRM_ERROR("vline wait had illegal wait until\n");
916 r = -EINVAL;
917 return r;
918 }
919
920 /* jump over the NOP */
921 r = r100_cs_packet_parse(p, &p3reloc, p->idx);
922 if (r)
923 return r;
924
925 h_idx = p->idx - 2;
926 p->idx += waitreloc.count;
927 p->idx += p3reloc.count;
928
929 header = ib_chunk->kdata[h_idx];
930 crtc_id = ib_chunk->kdata[h_idx + 5];
931 reg = ib_chunk->kdata[h_idx] >> 2;
932 mutex_lock(&p->rdev->ddev->mode_config.mutex);
933 obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
934 if (!obj) {
935 DRM_ERROR("cannot find crtc %d\n", crtc_id);
936 r = -EINVAL;
937 goto out;
938 }
939 crtc = obj_to_crtc(obj);
940 radeon_crtc = to_radeon_crtc(crtc);
941 crtc_id = radeon_crtc->crtc_id;
942
943 if (!crtc->enabled) {
944 /* if the CRTC isn't enabled - we need to nop out the wait until */
945 ib_chunk->kdata[h_idx + 2] = PACKET2(0);
946 ib_chunk->kdata[h_idx + 3] = PACKET2(0);
947 } else if (crtc_id == 1) {
948 switch (reg) {
949 case AVIVO_D1MODE_VLINE_START_END:
950 header &= R300_CP_PACKET0_REG_MASK;
951 header |= AVIVO_D2MODE_VLINE_START_END >> 2;
952 break;
953 case RADEON_CRTC_GUI_TRIG_VLINE:
954 header &= R300_CP_PACKET0_REG_MASK;
955 header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
956 break;
957 default:
958 DRM_ERROR("unknown crtc reloc\n");
959 r = -EINVAL;
960 goto out;
961 }
962 ib_chunk->kdata[h_idx] = header;
963 ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
964 }
965out:
966 mutex_unlock(&p->rdev->ddev->mode_config.mutex);
967 return r;
968}
969
970/**
Jerome Glisse771fe6b2009-06-05 14:42:42 +0200971 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
972 * @parser: parser structure holding parsing context.
973 * @data: pointer to relocation data
974 * @offset_start: starting offset
975 * @offset_mask: offset mask (to align start offset on)
976 * @reloc: reloc informations
977 *
978 * Check next packet is relocation packet3, do bo validation and compute
979 * GPU offset using the provided start.
980 **/
981int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
982 struct radeon_cs_reloc **cs_reloc)
983{
984 struct radeon_cs_chunk *ib_chunk;
985 struct radeon_cs_chunk *relocs_chunk;
986 struct radeon_cs_packet p3reloc;
987 unsigned idx;
988 int r;
989
990 if (p->chunk_relocs_idx == -1) {
991 DRM_ERROR("No relocation chunk !\n");
992 return -EINVAL;
993 }
994 *cs_reloc = NULL;
995 ib_chunk = &p->chunks[p->chunk_ib_idx];
996 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
997 r = r100_cs_packet_parse(p, &p3reloc, p->idx);
998 if (r) {
999 return r;
1000 }
1001 p->idx += p3reloc.count + 2;
1002 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1003 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1004 p3reloc.idx);
1005 r100_cs_dump_packet(p, &p3reloc);
1006 return -EINVAL;
1007 }
1008 idx = ib_chunk->kdata[p3reloc.idx + 1];
1009 if (idx >= relocs_chunk->length_dw) {
1010 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1011 idx, relocs_chunk->length_dw);
1012 r100_cs_dump_packet(p, &p3reloc);
1013 return -EINVAL;
1014 }
1015 /* FIXME: we assume reloc size is 4 dwords */
1016 *cs_reloc = p->relocs_ptr[(idx / 4)];
1017 return 0;
1018}
1019
1020static int r100_packet0_check(struct radeon_cs_parser *p,
1021 struct radeon_cs_packet *pkt)
1022{
1023 struct radeon_cs_chunk *ib_chunk;
1024 struct radeon_cs_reloc *reloc;
1025 volatile uint32_t *ib;
1026 uint32_t tmp;
1027 unsigned reg;
1028 unsigned i;
1029 unsigned idx;
1030 bool onereg;
1031 int r;
Dave Airliee024e112009-06-24 09:48:08 +10001032 u32 tile_flags = 0;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001033
1034 ib = p->ib->ptr;
1035 ib_chunk = &p->chunks[p->chunk_ib_idx];
1036 idx = pkt->idx + 1;
1037 reg = pkt->reg;
1038 onereg = false;
1039 if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
1040 onereg = true;
1041 }
1042 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1043 switch (reg) {
Dave Airlie531369e2009-06-29 11:21:25 +10001044 case RADEON_CRTC_GUI_TRIG_VLINE:
1045 r = r100_cs_packet_parse_vline(p);
1046 if (r) {
1047 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1048 idx, reg);
1049 r100_cs_dump_packet(p, pkt);
1050 return r;
1051 }
1052 break;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001053 /* FIXME: only allow PACKET3 blit? easier to check for out of
1054 * range access */
1055 case RADEON_DST_PITCH_OFFSET:
1056 case RADEON_SRC_PITCH_OFFSET:
1057 r = r100_cs_packet_next_reloc(p, &reloc);
1058 if (r) {
1059 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1060 idx, reg);
1061 r100_cs_dump_packet(p, pkt);
1062 return r;
1063 }
1064 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1065 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
Dave Airliee024e112009-06-24 09:48:08 +10001066
1067 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1068 tile_flags |= RADEON_DST_TILE_MACRO;
1069 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1070 if (reg == RADEON_SRC_PITCH_OFFSET) {
1071 DRM_ERROR("Cannot src blit from microtiled surface\n");
1072 r100_cs_dump_packet(p, pkt);
1073 return -EINVAL;
1074 }
1075 tile_flags |= RADEON_DST_TILE_MICRO;
1076 }
1077
1078 tmp |= tile_flags;
1079 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001080 break;
1081 case RADEON_RB3D_DEPTHOFFSET:
1082 case RADEON_RB3D_COLOROFFSET:
1083 case R300_RB3D_COLOROFFSET0:
1084 case R300_ZB_DEPTHOFFSET:
1085 case R200_PP_TXOFFSET_0:
1086 case R200_PP_TXOFFSET_1:
1087 case R200_PP_TXOFFSET_2:
1088 case R200_PP_TXOFFSET_3:
1089 case R200_PP_TXOFFSET_4:
1090 case R200_PP_TXOFFSET_5:
1091 case RADEON_PP_TXOFFSET_0:
1092 case RADEON_PP_TXOFFSET_1:
1093 case RADEON_PP_TXOFFSET_2:
1094 case R300_TX_OFFSET_0:
1095 case R300_TX_OFFSET_0+4:
1096 case R300_TX_OFFSET_0+8:
1097 case R300_TX_OFFSET_0+12:
1098 case R300_TX_OFFSET_0+16:
1099 case R300_TX_OFFSET_0+20:
1100 case R300_TX_OFFSET_0+24:
1101 case R300_TX_OFFSET_0+28:
1102 case R300_TX_OFFSET_0+32:
1103 case R300_TX_OFFSET_0+36:
1104 case R300_TX_OFFSET_0+40:
1105 case R300_TX_OFFSET_0+44:
1106 case R300_TX_OFFSET_0+48:
1107 case R300_TX_OFFSET_0+52:
1108 case R300_TX_OFFSET_0+56:
1109 case R300_TX_OFFSET_0+60:
Dave Airlieb995e432009-07-14 02:02:32 +10001110 /* rn50 has no 3D engine so fail on any 3d setup */
1111 if (ASIC_IS_RN50(p->rdev)) {
1112 DRM_ERROR("attempt to use RN50 3D engine failed\n");
1113 return -EINVAL;
1114 }
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001115 r = r100_cs_packet_next_reloc(p, &reloc);
1116 if (r) {
1117 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1118 idx, reg);
1119 r100_cs_dump_packet(p, pkt);
1120 return r;
1121 }
1122 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1123 break;
Dave Airliee024e112009-06-24 09:48:08 +10001124 case R300_RB3D_COLORPITCH0:
1125 case RADEON_RB3D_COLORPITCH:
1126 r = r100_cs_packet_next_reloc(p, &reloc);
1127 if (r) {
1128 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1129 idx, reg);
1130 r100_cs_dump_packet(p, pkt);
1131 return r;
1132 }
1133
1134 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1135 tile_flags |= RADEON_COLOR_TILE_ENABLE;
1136 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1137 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1138
1139 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1140 tmp |= tile_flags;
1141 ib[idx] = tmp;
1142 break;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001143 default:
1144 /* FIXME: we don't want to allow anyothers packet */
1145 break;
1146 }
1147 if (onereg) {
1148 /* FIXME: forbid onereg write to register on relocate */
1149 break;
1150 }
1151 }
1152 return 0;
1153}
1154
Jerome Glisse068a1172009-06-17 13:28:30 +02001155int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1156 struct radeon_cs_packet *pkt,
1157 struct radeon_object *robj)
1158{
1159 struct radeon_cs_chunk *ib_chunk;
1160 unsigned idx;
1161
1162 ib_chunk = &p->chunks[p->chunk_ib_idx];
1163 idx = pkt->idx + 1;
1164 if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
1165 DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1166 "(need %u have %lu) !\n",
1167 ib_chunk->kdata[idx+2] + 1,
1168 radeon_object_size(robj));
1169 return -EINVAL;
1170 }
1171 return 0;
1172}
1173
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001174static int r100_packet3_check(struct radeon_cs_parser *p,
1175 struct radeon_cs_packet *pkt)
1176{
1177 struct radeon_cs_chunk *ib_chunk;
1178 struct radeon_cs_reloc *reloc;
1179 unsigned idx;
1180 unsigned i, c;
1181 volatile uint32_t *ib;
1182 int r;
1183
1184 ib = p->ib->ptr;
1185 ib_chunk = &p->chunks[p->chunk_ib_idx];
1186 idx = pkt->idx + 1;
1187 switch (pkt->opcode) {
1188 case PACKET3_3D_LOAD_VBPNTR:
1189 c = ib_chunk->kdata[idx++];
1190 for (i = 0; i < (c - 1); i += 2, idx += 3) {
1191 r = r100_cs_packet_next_reloc(p, &reloc);
1192 if (r) {
1193 DRM_ERROR("No reloc for packet3 %d\n",
1194 pkt->opcode);
1195 r100_cs_dump_packet(p, pkt);
1196 return r;
1197 }
1198 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1199 r = r100_cs_packet_next_reloc(p, &reloc);
1200 if (r) {
1201 DRM_ERROR("No reloc for packet3 %d\n",
1202 pkt->opcode);
1203 r100_cs_dump_packet(p, pkt);
1204 return r;
1205 }
1206 ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1207 }
1208 if (c & 1) {
1209 r = r100_cs_packet_next_reloc(p, &reloc);
1210 if (r) {
1211 DRM_ERROR("No reloc for packet3 %d\n",
1212 pkt->opcode);
1213 r100_cs_dump_packet(p, pkt);
1214 return r;
1215 }
1216 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1217 }
1218 break;
1219 case PACKET3_INDX_BUFFER:
1220 r = r100_cs_packet_next_reloc(p, &reloc);
1221 if (r) {
1222 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1223 r100_cs_dump_packet(p, pkt);
1224 return r;
1225 }
1226 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
Jerome Glisse068a1172009-06-17 13:28:30 +02001227 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1228 if (r) {
1229 return r;
1230 }
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001231 break;
1232 case 0x23:
1233 /* FIXME: cleanup */
1234 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1235 r = r100_cs_packet_next_reloc(p, &reloc);
1236 if (r) {
1237 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1238 r100_cs_dump_packet(p, pkt);
1239 return r;
1240 }
1241 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1242 break;
1243 case PACKET3_3D_DRAW_IMMD:
1244 /* triggers drawing using in-packet vertex data */
1245 case PACKET3_3D_DRAW_IMMD_2:
1246 /* triggers drawing using in-packet vertex data */
1247 case PACKET3_3D_DRAW_VBUF_2:
1248 /* triggers drawing of vertex buffers setup elsewhere */
1249 case PACKET3_3D_DRAW_INDX_2:
1250 /* triggers drawing using indices to vertex buffer */
1251 case PACKET3_3D_DRAW_VBUF:
1252 /* triggers drawing of vertex buffers setup elsewhere */
1253 case PACKET3_3D_DRAW_INDX:
1254 /* triggers drawing using indices to vertex buffer */
1255 case PACKET3_NOP:
1256 break;
1257 default:
1258 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1259 return -EINVAL;
1260 }
1261 return 0;
1262}
1263
1264int r100_cs_parse(struct radeon_cs_parser *p)
1265{
1266 struct radeon_cs_packet pkt;
1267 int r;
1268
1269 do {
1270 r = r100_cs_packet_parse(p, &pkt, p->idx);
1271 if (r) {
1272 return r;
1273 }
1274 p->idx += pkt.count + 2;
1275 switch (pkt.type) {
Jerome Glisse068a1172009-06-17 13:28:30 +02001276 case PACKET_TYPE0:
1277 r = r100_packet0_check(p, &pkt);
1278 break;
1279 case PACKET_TYPE2:
1280 break;
1281 case PACKET_TYPE3:
1282 r = r100_packet3_check(p, &pkt);
1283 break;
1284 default:
1285 DRM_ERROR("Unknown packet type %d !\n",
1286 pkt.type);
1287 return -EINVAL;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001288 }
1289 if (r) {
1290 return r;
1291 }
1292 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1293 return 0;
1294}
1295
1296
1297/*
1298 * Global GPU functions
1299 */
1300void r100_errata(struct radeon_device *rdev)
1301{
1302 rdev->pll_errata = 0;
1303
1304 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1305 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
1306 }
1307
1308 if (rdev->family == CHIP_RV100 ||
1309 rdev->family == CHIP_RS100 ||
1310 rdev->family == CHIP_RS200) {
1311 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
1312 }
1313}
1314
1315/* Wait for vertical sync on primary CRTC */
1316void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
1317{
1318 uint32_t crtc_gen_cntl, tmp;
1319 int i;
1320
1321 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
1322 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
1323 !(crtc_gen_cntl & RADEON_CRTC_EN)) {
1324 return;
1325 }
1326 /* Clear the CRTC_VBLANK_SAVE bit */
1327 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
1328 for (i = 0; i < rdev->usec_timeout; i++) {
1329 tmp = RREG32(RADEON_CRTC_STATUS);
1330 if (tmp & RADEON_CRTC_VBLANK_SAVE) {
1331 return;
1332 }
1333 DRM_UDELAY(1);
1334 }
1335}
1336
1337/* Wait for vertical sync on secondary CRTC */
1338void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
1339{
1340 uint32_t crtc2_gen_cntl, tmp;
1341 int i;
1342
1343 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
1344 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
1345 !(crtc2_gen_cntl & RADEON_CRTC2_EN))
1346 return;
1347
1348 /* Clear the CRTC_VBLANK_SAVE bit */
1349 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
1350 for (i = 0; i < rdev->usec_timeout; i++) {
1351 tmp = RREG32(RADEON_CRTC2_STATUS);
1352 if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
1353 return;
1354 }
1355 DRM_UDELAY(1);
1356 }
1357}
1358
1359int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
1360{
1361 unsigned i;
1362 uint32_t tmp;
1363
1364 for (i = 0; i < rdev->usec_timeout; i++) {
1365 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
1366 if (tmp >= n) {
1367 return 0;
1368 }
1369 DRM_UDELAY(1);
1370 }
1371 return -1;
1372}
1373
1374int r100_gui_wait_for_idle(struct radeon_device *rdev)
1375{
1376 unsigned i;
1377 uint32_t tmp;
1378
1379 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
1380 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
1381 " Bad things might happen.\n");
1382 }
1383 for (i = 0; i < rdev->usec_timeout; i++) {
1384 tmp = RREG32(RADEON_RBBM_STATUS);
1385 if (!(tmp & (1 << 31))) {
1386 return 0;
1387 }
1388 DRM_UDELAY(1);
1389 }
1390 return -1;
1391}
1392
1393int r100_mc_wait_for_idle(struct radeon_device *rdev)
1394{
1395 unsigned i;
1396 uint32_t tmp;
1397
1398 for (i = 0; i < rdev->usec_timeout; i++) {
1399 /* read MC_STATUS */
1400 tmp = RREG32(0x0150);
1401 if (tmp & (1 << 2)) {
1402 return 0;
1403 }
1404 DRM_UDELAY(1);
1405 }
1406 return -1;
1407}
1408
1409void r100_gpu_init(struct radeon_device *rdev)
1410{
1411 /* TODO: anythings to do here ? pipes ? */
1412 r100_hdp_reset(rdev);
1413}
1414
1415void r100_hdp_reset(struct radeon_device *rdev)
1416{
1417 uint32_t tmp;
1418
1419 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
1420 tmp |= (7 << 28);
1421 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
1422 (void)RREG32(RADEON_HOST_PATH_CNTL);
1423 udelay(200);
1424 WREG32(RADEON_RBBM_SOFT_RESET, 0);
1425 WREG32(RADEON_HOST_PATH_CNTL, tmp);
1426 (void)RREG32(RADEON_HOST_PATH_CNTL);
1427}
1428
1429int r100_rb2d_reset(struct radeon_device *rdev)
1430{
1431 uint32_t tmp;
1432 int i;
1433
1434 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
1435 (void)RREG32(RADEON_RBBM_SOFT_RESET);
1436 udelay(200);
1437 WREG32(RADEON_RBBM_SOFT_RESET, 0);
1438 /* Wait to prevent race in RBBM_STATUS */
1439 mdelay(1);
1440 for (i = 0; i < rdev->usec_timeout; i++) {
1441 tmp = RREG32(RADEON_RBBM_STATUS);
1442 if (!(tmp & (1 << 26))) {
1443 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
1444 tmp);
1445 return 0;
1446 }
1447 DRM_UDELAY(1);
1448 }
1449 tmp = RREG32(RADEON_RBBM_STATUS);
1450 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
1451 return -1;
1452}
1453
1454int r100_gpu_reset(struct radeon_device *rdev)
1455{
1456 uint32_t status;
1457
1458 /* reset order likely matter */
1459 status = RREG32(RADEON_RBBM_STATUS);
1460 /* reset HDP */
1461 r100_hdp_reset(rdev);
1462 /* reset rb2d */
1463 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
1464 r100_rb2d_reset(rdev);
1465 }
1466 /* TODO: reset 3D engine */
1467 /* reset CP */
1468 status = RREG32(RADEON_RBBM_STATUS);
1469 if (status & (1 << 16)) {
1470 r100_cp_reset(rdev);
1471 }
1472 /* Check if GPU is idle */
1473 status = RREG32(RADEON_RBBM_STATUS);
1474 if (status & (1 << 31)) {
1475 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
1476 return -1;
1477 }
1478 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
1479 return 0;
1480}
1481
1482
1483/*
1484 * VRAM info
1485 */
1486static void r100_vram_get_type(struct radeon_device *rdev)
1487{
1488 uint32_t tmp;
1489
1490 rdev->mc.vram_is_ddr = false;
1491 if (rdev->flags & RADEON_IS_IGP)
1492 rdev->mc.vram_is_ddr = true;
1493 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
1494 rdev->mc.vram_is_ddr = true;
1495 if ((rdev->family == CHIP_RV100) ||
1496 (rdev->family == CHIP_RS100) ||
1497 (rdev->family == CHIP_RS200)) {
1498 tmp = RREG32(RADEON_MEM_CNTL);
1499 if (tmp & RV100_HALF_MODE) {
1500 rdev->mc.vram_width = 32;
1501 } else {
1502 rdev->mc.vram_width = 64;
1503 }
1504 if (rdev->flags & RADEON_SINGLE_CRTC) {
1505 rdev->mc.vram_width /= 4;
1506 rdev->mc.vram_is_ddr = true;
1507 }
1508 } else if (rdev->family <= CHIP_RV280) {
1509 tmp = RREG32(RADEON_MEM_CNTL);
1510 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
1511 rdev->mc.vram_width = 128;
1512 } else {
1513 rdev->mc.vram_width = 64;
1514 }
1515 } else {
1516 /* newer IGPs */
1517 rdev->mc.vram_width = 128;
1518 }
1519}
1520
Dave Airlie2a0f8912009-07-11 04:44:47 +10001521static u32 r100_get_accessible_vram(struct radeon_device *rdev)
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001522{
Dave Airlie2a0f8912009-07-11 04:44:47 +10001523 u32 aper_size;
1524 u8 byte;
1525
1526 aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
1527
1528 /* Set HDP_APER_CNTL only on cards that are known not to be broken,
1529 * that is has the 2nd generation multifunction PCI interface
1530 */
1531 if (rdev->family == CHIP_RV280 ||
1532 rdev->family >= CHIP_RV350) {
1533 WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
1534 ~RADEON_HDP_APER_CNTL);
1535 DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
1536 return aper_size * 2;
1537 }
1538
1539 /* Older cards have all sorts of funny issues to deal with. First
1540 * check if it's a multifunction card by reading the PCI config
1541 * header type... Limit those to one aperture size
1542 */
1543 pci_read_config_byte(rdev->pdev, 0xe, &byte);
1544 if (byte & 0x80) {
1545 DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
1546 DRM_INFO("Limiting VRAM to one aperture\n");
1547 return aper_size;
1548 }
1549
1550 /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
1551 * have set it up. We don't write this as it's broken on some ASICs but
1552 * we expect the BIOS to have done the right thing (might be too optimistic...)
1553 */
1554 if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
1555 return aper_size * 2;
1556 return aper_size;
1557}
1558
1559void r100_vram_init_sizes(struct radeon_device *rdev)
1560{
1561 u64 config_aper_size;
1562 u32 accessible;
1563
1564 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001565
1566 if (rdev->flags & RADEON_IS_IGP) {
1567 uint32_t tom;
1568 /* read NB_TOM to get the amount of ram stolen for the GPU */
1569 tom = RREG32(RADEON_NB_TOM);
Dave Airlie7a50f012009-07-21 20:39:30 +10001570 rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
Dave Airlie3e43d822009-07-09 15:04:18 +10001571 /* for IGPs we need to keep VRAM where it was put by the BIOS */
1572 rdev->mc.vram_location = (tom & 0xffff) << 16;
Dave Airlie7a50f012009-07-21 20:39:30 +10001573 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
1574 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001575 } else {
Dave Airlie7a50f012009-07-21 20:39:30 +10001576 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001577 /* Some production boards of m6 will report 0
1578 * if it's 8 MB
1579 */
Dave Airlie7a50f012009-07-21 20:39:30 +10001580 if (rdev->mc.real_vram_size == 0) {
1581 rdev->mc.real_vram_size = 8192 * 1024;
1582 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001583 }
Dave Airlie3e43d822009-07-09 15:04:18 +10001584 /* let driver place VRAM */
1585 rdev->mc.vram_location = 0xFFFFFFFFUL;
Dave Airlie2a0f8912009-07-11 04:44:47 +10001586 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
1587 * Novell bug 204882 + along with lots of ubuntu ones */
Dave Airlie7a50f012009-07-21 20:39:30 +10001588 if (config_aper_size > rdev->mc.real_vram_size)
1589 rdev->mc.mc_vram_size = config_aper_size;
1590 else
1591 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001592 }
1593
Dave Airlie2a0f8912009-07-11 04:44:47 +10001594 /* work out accessible VRAM */
1595 accessible = r100_get_accessible_vram(rdev);
1596
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001597 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
1598 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
Dave Airlie2a0f8912009-07-11 04:44:47 +10001599
1600 if (accessible > rdev->mc.aper_size)
1601 accessible = rdev->mc.aper_size;
1602
Dave Airlie7a50f012009-07-21 20:39:30 +10001603 if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
1604 rdev->mc.mc_vram_size = rdev->mc.aper_size;
1605
1606 if (rdev->mc.real_vram_size > rdev->mc.aper_size)
1607 rdev->mc.real_vram_size = rdev->mc.aper_size;
Dave Airlie2a0f8912009-07-11 04:44:47 +10001608}
1609
1610void r100_vram_info(struct radeon_device *rdev)
1611{
1612 r100_vram_get_type(rdev);
1613
1614 r100_vram_init_sizes(rdev);
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001615}
1616
1617
1618/*
1619 * Indirect registers accessor
1620 */
1621void r100_pll_errata_after_index(struct radeon_device *rdev)
1622{
1623 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
1624 return;
1625 }
1626 (void)RREG32(RADEON_CLOCK_CNTL_DATA);
1627 (void)RREG32(RADEON_CRTC_GEN_CNTL);
1628}
1629
1630static void r100_pll_errata_after_data(struct radeon_device *rdev)
1631{
1632 /* This workarounds is necessary on RV100, RS100 and RS200 chips
1633 * or the chip could hang on a subsequent access
1634 */
1635 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
1636 udelay(5000);
1637 }
1638
1639 /* This function is required to workaround a hardware bug in some (all?)
1640 * revisions of the R300. This workaround should be called after every
1641 * CLOCK_CNTL_INDEX register access. If not, register reads afterward
1642 * may not be correct.
1643 */
1644 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
1645 uint32_t save, tmp;
1646
1647 save = RREG32(RADEON_CLOCK_CNTL_INDEX);
1648 tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
1649 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
1650 tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
1651 WREG32(RADEON_CLOCK_CNTL_INDEX, save);
1652 }
1653}
1654
1655uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
1656{
1657 uint32_t data;
1658
1659 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
1660 r100_pll_errata_after_index(rdev);
1661 data = RREG32(RADEON_CLOCK_CNTL_DATA);
1662 r100_pll_errata_after_data(rdev);
1663 return data;
1664}
1665
1666void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1667{
1668 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
1669 r100_pll_errata_after_index(rdev);
1670 WREG32(RADEON_CLOCK_CNTL_DATA, v);
1671 r100_pll_errata_after_data(rdev);
1672}
1673
Jerome Glisse068a1172009-06-17 13:28:30 +02001674int r100_init(struct radeon_device *rdev)
1675{
1676 return 0;
1677}
1678
Jerome Glisse771fe6b2009-06-05 14:42:42 +02001679/*
1680 * Debugfs info
1681 */
1682#if defined(CONFIG_DEBUG_FS)
1683static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
1684{
1685 struct drm_info_node *node = (struct drm_info_node *) m->private;
1686 struct drm_device *dev = node->minor->dev;
1687 struct radeon_device *rdev = dev->dev_private;
1688 uint32_t reg, value;
1689 unsigned i;
1690
1691 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
1692 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
1693 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1694 for (i = 0; i < 64; i++) {
1695 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
1696 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
1697 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
1698 value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
1699 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
1700 }
1701 return 0;
1702}
1703
1704static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
1705{
1706 struct drm_info_node *node = (struct drm_info_node *) m->private;
1707 struct drm_device *dev = node->minor->dev;
1708 struct radeon_device *rdev = dev->dev_private;
1709 uint32_t rdp, wdp;
1710 unsigned count, i, j;
1711
1712 radeon_ring_free_size(rdev);
1713 rdp = RREG32(RADEON_CP_RB_RPTR);
1714 wdp = RREG32(RADEON_CP_RB_WPTR);
1715 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
1716 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1717 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
1718 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
1719 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
1720 seq_printf(m, "%u dwords in ring\n", count);
1721 for (j = 0; j <= count; j++) {
1722 i = (rdp + j) & rdev->cp.ptr_mask;
1723 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
1724 }
1725 return 0;
1726}
1727
1728
1729static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
1730{
1731 struct drm_info_node *node = (struct drm_info_node *) m->private;
1732 struct drm_device *dev = node->minor->dev;
1733 struct radeon_device *rdev = dev->dev_private;
1734 uint32_t csq_stat, csq2_stat, tmp;
1735 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
1736 unsigned i;
1737
1738 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1739 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
1740 csq_stat = RREG32(RADEON_CP_CSQ_STAT);
1741 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
1742 r_rptr = (csq_stat >> 0) & 0x3ff;
1743 r_wptr = (csq_stat >> 10) & 0x3ff;
1744 ib1_rptr = (csq_stat >> 20) & 0x3ff;
1745 ib1_wptr = (csq2_stat >> 0) & 0x3ff;
1746 ib2_rptr = (csq2_stat >> 10) & 0x3ff;
1747 ib2_wptr = (csq2_stat >> 20) & 0x3ff;
1748 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
1749 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
1750 seq_printf(m, "Ring rptr %u\n", r_rptr);
1751 seq_printf(m, "Ring wptr %u\n", r_wptr);
1752 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
1753 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
1754 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
1755 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
1756 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
1757 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
1758 seq_printf(m, "Ring fifo:\n");
1759 for (i = 0; i < 256; i++) {
1760 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1761 tmp = RREG32(RADEON_CP_CSQ_DATA);
1762 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
1763 }
1764 seq_printf(m, "Indirect1 fifo:\n");
1765 for (i = 256; i <= 512; i++) {
1766 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1767 tmp = RREG32(RADEON_CP_CSQ_DATA);
1768 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
1769 }
1770 seq_printf(m, "Indirect2 fifo:\n");
1771 for (i = 640; i < ib1_wptr; i++) {
1772 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1773 tmp = RREG32(RADEON_CP_CSQ_DATA);
1774 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
1775 }
1776 return 0;
1777}
1778
1779static int r100_debugfs_mc_info(struct seq_file *m, void *data)
1780{
1781 struct drm_info_node *node = (struct drm_info_node *) m->private;
1782 struct drm_device *dev = node->minor->dev;
1783 struct radeon_device *rdev = dev->dev_private;
1784 uint32_t tmp;
1785
1786 tmp = RREG32(RADEON_CONFIG_MEMSIZE);
1787 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
1788 tmp = RREG32(RADEON_MC_FB_LOCATION);
1789 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
1790 tmp = RREG32(RADEON_BUS_CNTL);
1791 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
1792 tmp = RREG32(RADEON_MC_AGP_LOCATION);
1793 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
1794 tmp = RREG32(RADEON_AGP_BASE);
1795 seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
1796 tmp = RREG32(RADEON_HOST_PATH_CNTL);
1797 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
1798 tmp = RREG32(0x01D0);
1799 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
1800 tmp = RREG32(RADEON_AIC_LO_ADDR);
1801 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
1802 tmp = RREG32(RADEON_AIC_HI_ADDR);
1803 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
1804 tmp = RREG32(0x01E4);
1805 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
1806 return 0;
1807}
1808
1809static struct drm_info_list r100_debugfs_rbbm_list[] = {
1810 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
1811};
1812
1813static struct drm_info_list r100_debugfs_cp_list[] = {
1814 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
1815 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
1816};
1817
1818static struct drm_info_list r100_debugfs_mc_info_list[] = {
1819 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
1820};
1821#endif
1822
1823int r100_debugfs_rbbm_init(struct radeon_device *rdev)
1824{
1825#if defined(CONFIG_DEBUG_FS)
1826 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
1827#else
1828 return 0;
1829#endif
1830}
1831
1832int r100_debugfs_cp_init(struct radeon_device *rdev)
1833{
1834#if defined(CONFIG_DEBUG_FS)
1835 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
1836#else
1837 return 0;
1838#endif
1839}
1840
1841int r100_debugfs_mc_info_init(struct radeon_device *rdev)
1842{
1843#if defined(CONFIG_DEBUG_FS)
1844 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
1845#else
1846 return 0;
1847#endif
1848}
Dave Airliee024e112009-06-24 09:48:08 +10001849
1850int r100_set_surface_reg(struct radeon_device *rdev, int reg,
1851 uint32_t tiling_flags, uint32_t pitch,
1852 uint32_t offset, uint32_t obj_size)
1853{
1854 int surf_index = reg * 16;
1855 int flags = 0;
1856
1857 /* r100/r200 divide by 16 */
1858 if (rdev->family < CHIP_R300)
1859 flags = pitch / 16;
1860 else
1861 flags = pitch / 8;
1862
1863 if (rdev->family <= CHIP_RS200) {
1864 if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
1865 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
1866 flags |= RADEON_SURF_TILE_COLOR_BOTH;
1867 if (tiling_flags & RADEON_TILING_MACRO)
1868 flags |= RADEON_SURF_TILE_COLOR_MACRO;
1869 } else if (rdev->family <= CHIP_RV280) {
1870 if (tiling_flags & (RADEON_TILING_MACRO))
1871 flags |= R200_SURF_TILE_COLOR_MACRO;
1872 if (tiling_flags & RADEON_TILING_MICRO)
1873 flags |= R200_SURF_TILE_COLOR_MICRO;
1874 } else {
1875 if (tiling_flags & RADEON_TILING_MACRO)
1876 flags |= R300_SURF_TILE_MACRO;
1877 if (tiling_flags & RADEON_TILING_MICRO)
1878 flags |= R300_SURF_TILE_MICRO;
1879 }
1880
1881 DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
1882 WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
1883 WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
1884 WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
1885 return 0;
1886}
1887
1888void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
1889{
1890 int surf_index = reg * 16;
1891 WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
1892}
Jerome Glissec93bb852009-07-13 21:04:08 +02001893
1894void r100_bandwidth_update(struct radeon_device *rdev)
1895{
1896 fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
1897 fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
1898 fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
1899 uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
1900 fixed20_12 memtcas_ff[8] = {
1901 fixed_init(1),
1902 fixed_init(2),
1903 fixed_init(3),
1904 fixed_init(0),
1905 fixed_init_half(1),
1906 fixed_init_half(2),
1907 fixed_init(0),
1908 };
1909 fixed20_12 memtcas_rs480_ff[8] = {
1910 fixed_init(0),
1911 fixed_init(1),
1912 fixed_init(2),
1913 fixed_init(3),
1914 fixed_init(0),
1915 fixed_init_half(1),
1916 fixed_init_half(2),
1917 fixed_init_half(3),
1918 };
1919 fixed20_12 memtcas2_ff[8] = {
1920 fixed_init(0),
1921 fixed_init(1),
1922 fixed_init(2),
1923 fixed_init(3),
1924 fixed_init(4),
1925 fixed_init(5),
1926 fixed_init(6),
1927 fixed_init(7),
1928 };
1929 fixed20_12 memtrbs[8] = {
1930 fixed_init(1),
1931 fixed_init_half(1),
1932 fixed_init(2),
1933 fixed_init_half(2),
1934 fixed_init(3),
1935 fixed_init_half(3),
1936 fixed_init(4),
1937 fixed_init_half(4)
1938 };
1939 fixed20_12 memtrbs_r4xx[8] = {
1940 fixed_init(4),
1941 fixed_init(5),
1942 fixed_init(6),
1943 fixed_init(7),
1944 fixed_init(8),
1945 fixed_init(9),
1946 fixed_init(10),
1947 fixed_init(11)
1948 };
1949 fixed20_12 min_mem_eff;
1950 fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
1951 fixed20_12 cur_latency_mclk, cur_latency_sclk;
1952 fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
1953 disp_drain_rate2, read_return_rate;
1954 fixed20_12 time_disp1_drop_priority;
1955 int c;
1956 int cur_size = 16; /* in octawords */
1957 int critical_point = 0, critical_point2;
1958/* uint32_t read_return_rate, time_disp1_drop_priority; */
1959 int stop_req, max_stop_req;
1960 struct drm_display_mode *mode1 = NULL;
1961 struct drm_display_mode *mode2 = NULL;
1962 uint32_t pixel_bytes1 = 0;
1963 uint32_t pixel_bytes2 = 0;
1964
1965 if (rdev->mode_info.crtcs[0]->base.enabled) {
1966 mode1 = &rdev->mode_info.crtcs[0]->base.mode;
1967 pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
1968 }
1969 if (rdev->mode_info.crtcs[1]->base.enabled) {
1970 mode2 = &rdev->mode_info.crtcs[1]->base.mode;
1971 pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
1972 }
1973
1974 min_mem_eff.full = rfixed_const_8(0);
1975 /* get modes */
1976 if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
1977 uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
1978 mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
1979 mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
1980 /* check crtc enables */
1981 if (mode2)
1982 mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
1983 if (mode1)
1984 mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
1985 WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
1986 }
1987
1988 /*
1989 * determine is there is enough bw for current mode
1990 */
1991 mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
1992 temp_ff.full = rfixed_const(100);
1993 mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
1994 sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
1995 sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
1996
1997 temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
1998 temp_ff.full = rfixed_const(temp);
1999 mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
2000
2001 pix_clk.full = 0;
2002 pix_clk2.full = 0;
2003 peak_disp_bw.full = 0;
2004 if (mode1) {
2005 temp_ff.full = rfixed_const(1000);
2006 pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
2007 pix_clk.full = rfixed_div(pix_clk, temp_ff);
2008 temp_ff.full = rfixed_const(pixel_bytes1);
2009 peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
2010 }
2011 if (mode2) {
2012 temp_ff.full = rfixed_const(1000);
2013 pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
2014 pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
2015 temp_ff.full = rfixed_const(pixel_bytes2);
2016 peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
2017 }
2018
2019 mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
2020 if (peak_disp_bw.full >= mem_bw.full) {
2021 DRM_ERROR("You may not have enough display bandwidth for current mode\n"
2022 "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
2023 }
2024
2025 /* Get values from the EXT_MEM_CNTL register...converting its contents. */
2026 temp = RREG32(RADEON_MEM_TIMING_CNTL);
2027 if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
2028 mem_trcd = ((temp >> 2) & 0x3) + 1;
2029 mem_trp = ((temp & 0x3)) + 1;
2030 mem_tras = ((temp & 0x70) >> 4) + 1;
2031 } else if (rdev->family == CHIP_R300 ||
2032 rdev->family == CHIP_R350) { /* r300, r350 */
2033 mem_trcd = (temp & 0x7) + 1;
2034 mem_trp = ((temp >> 8) & 0x7) + 1;
2035 mem_tras = ((temp >> 11) & 0xf) + 4;
2036 } else if (rdev->family == CHIP_RV350 ||
2037 rdev->family <= CHIP_RV380) {
2038 /* rv3x0 */
2039 mem_trcd = (temp & 0x7) + 3;
2040 mem_trp = ((temp >> 8) & 0x7) + 3;
2041 mem_tras = ((temp >> 11) & 0xf) + 6;
2042 } else if (rdev->family == CHIP_R420 ||
2043 rdev->family == CHIP_R423 ||
2044 rdev->family == CHIP_RV410) {
2045 /* r4xx */
2046 mem_trcd = (temp & 0xf) + 3;
2047 if (mem_trcd > 15)
2048 mem_trcd = 15;
2049 mem_trp = ((temp >> 8) & 0xf) + 3;
2050 if (mem_trp > 15)
2051 mem_trp = 15;
2052 mem_tras = ((temp >> 12) & 0x1f) + 6;
2053 if (mem_tras > 31)
2054 mem_tras = 31;
2055 } else { /* RV200, R200 */
2056 mem_trcd = (temp & 0x7) + 1;
2057 mem_trp = ((temp >> 8) & 0x7) + 1;
2058 mem_tras = ((temp >> 12) & 0xf) + 4;
2059 }
2060 /* convert to FF */
2061 trcd_ff.full = rfixed_const(mem_trcd);
2062 trp_ff.full = rfixed_const(mem_trp);
2063 tras_ff.full = rfixed_const(mem_tras);
2064
2065 /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
2066 temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
2067 data = (temp & (7 << 20)) >> 20;
2068 if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
2069 if (rdev->family == CHIP_RS480) /* don't think rs400 */
2070 tcas_ff = memtcas_rs480_ff[data];
2071 else
2072 tcas_ff = memtcas_ff[data];
2073 } else
2074 tcas_ff = memtcas2_ff[data];
2075
2076 if (rdev->family == CHIP_RS400 ||
2077 rdev->family == CHIP_RS480) {
2078 /* extra cas latency stored in bits 23-25 0-4 clocks */
2079 data = (temp >> 23) & 0x7;
2080 if (data < 5)
2081 tcas_ff.full += rfixed_const(data);
2082 }
2083
2084 if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
2085 /* on the R300, Tcas is included in Trbs.
2086 */
2087 temp = RREG32(RADEON_MEM_CNTL);
2088 data = (R300_MEM_NUM_CHANNELS_MASK & temp);
2089 if (data == 1) {
2090 if (R300_MEM_USE_CD_CH_ONLY & temp) {
2091 temp = RREG32(R300_MC_IND_INDEX);
2092 temp &= ~R300_MC_IND_ADDR_MASK;
2093 temp |= R300_MC_READ_CNTL_CD_mcind;
2094 WREG32(R300_MC_IND_INDEX, temp);
2095 temp = RREG32(R300_MC_IND_DATA);
2096 data = (R300_MEM_RBS_POSITION_C_MASK & temp);
2097 } else {
2098 temp = RREG32(R300_MC_READ_CNTL_AB);
2099 data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2100 }
2101 } else {
2102 temp = RREG32(R300_MC_READ_CNTL_AB);
2103 data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2104 }
2105 if (rdev->family == CHIP_RV410 ||
2106 rdev->family == CHIP_R420 ||
2107 rdev->family == CHIP_R423)
2108 trbs_ff = memtrbs_r4xx[data];
2109 else
2110 trbs_ff = memtrbs[data];
2111 tcas_ff.full += trbs_ff.full;
2112 }
2113
2114 sclk_eff_ff.full = sclk_ff.full;
2115
2116 if (rdev->flags & RADEON_IS_AGP) {
2117 fixed20_12 agpmode_ff;
2118 agpmode_ff.full = rfixed_const(radeon_agpmode);
2119 temp_ff.full = rfixed_const_666(16);
2120 sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
2121 }
2122 /* TODO PCIE lanes may affect this - agpmode == 16?? */
2123
2124 if (ASIC_IS_R300(rdev)) {
2125 sclk_delay_ff.full = rfixed_const(250);
2126 } else {
2127 if ((rdev->family == CHIP_RV100) ||
2128 rdev->flags & RADEON_IS_IGP) {
2129 if (rdev->mc.vram_is_ddr)
2130 sclk_delay_ff.full = rfixed_const(41);
2131 else
2132 sclk_delay_ff.full = rfixed_const(33);
2133 } else {
2134 if (rdev->mc.vram_width == 128)
2135 sclk_delay_ff.full = rfixed_const(57);
2136 else
2137 sclk_delay_ff.full = rfixed_const(41);
2138 }
2139 }
2140
2141 mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
2142
2143 if (rdev->mc.vram_is_ddr) {
2144 if (rdev->mc.vram_width == 32) {
2145 k1.full = rfixed_const(40);
2146 c = 3;
2147 } else {
2148 k1.full = rfixed_const(20);
2149 c = 1;
2150 }
2151 } else {
2152 k1.full = rfixed_const(40);
2153 c = 3;
2154 }
2155
2156 temp_ff.full = rfixed_const(2);
2157 mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
2158 temp_ff.full = rfixed_const(c);
2159 mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
2160 temp_ff.full = rfixed_const(4);
2161 mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
2162 mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
2163 mc_latency_mclk.full += k1.full;
2164
2165 mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
2166 mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
2167
2168 /*
2169 HW cursor time assuming worst case of full size colour cursor.
2170 */
2171 temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
2172 temp_ff.full += trcd_ff.full;
2173 if (temp_ff.full < tras_ff.full)
2174 temp_ff.full = tras_ff.full;
2175 cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
2176
2177 temp_ff.full = rfixed_const(cur_size);
2178 cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
2179 /*
2180 Find the total latency for the display data.
2181 */
2182 disp_latency_overhead.full = rfixed_const(80);
2183 disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
2184 mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
2185 mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
2186
2187 if (mc_latency_mclk.full > mc_latency_sclk.full)
2188 disp_latency.full = mc_latency_mclk.full;
2189 else
2190 disp_latency.full = mc_latency_sclk.full;
2191
2192 /* setup Max GRPH_STOP_REQ default value */
2193 if (ASIC_IS_RV100(rdev))
2194 max_stop_req = 0x5c;
2195 else
2196 max_stop_req = 0x7c;
2197
2198 if (mode1) {
2199 /* CRTC1
2200 Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
2201 GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
2202 */
2203 stop_req = mode1->hdisplay * pixel_bytes1 / 16;
2204
2205 if (stop_req > max_stop_req)
2206 stop_req = max_stop_req;
2207
2208 /*
2209 Find the drain rate of the display buffer.
2210 */
2211 temp_ff.full = rfixed_const((16/pixel_bytes1));
2212 disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
2213
2214 /*
2215 Find the critical point of the display buffer.
2216 */
2217 crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
2218 crit_point_ff.full += rfixed_const_half(0);
2219
2220 critical_point = rfixed_trunc(crit_point_ff);
2221
2222 if (rdev->disp_priority == 2) {
2223 critical_point = 0;
2224 }
2225
2226 /*
2227 The critical point should never be above max_stop_req-4. Setting
2228 GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
2229 */
2230 if (max_stop_req - critical_point < 4)
2231 critical_point = 0;
2232
2233 if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
2234 /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
2235 critical_point = 0x10;
2236 }
2237
2238 temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
2239 temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
2240 temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
2241 temp &= ~(RADEON_GRPH_START_REQ_MASK);
2242 if ((rdev->family == CHIP_R350) &&
2243 (stop_req > 0x15)) {
2244 stop_req -= 0x10;
2245 }
2246 temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
2247 temp |= RADEON_GRPH_BUFFER_SIZE;
2248 temp &= ~(RADEON_GRPH_CRITICAL_CNTL |
2249 RADEON_GRPH_CRITICAL_AT_SOF |
2250 RADEON_GRPH_STOP_CNTL);
2251 /*
2252 Write the result into the register.
2253 */
2254 WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
2255 (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
2256
2257#if 0
2258 if ((rdev->family == CHIP_RS400) ||
2259 (rdev->family == CHIP_RS480)) {
2260 /* attempt to program RS400 disp regs correctly ??? */
2261 temp = RREG32(RS400_DISP1_REG_CNTL);
2262 temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
2263 RS400_DISP1_STOP_REQ_LEVEL_MASK);
2264 WREG32(RS400_DISP1_REQ_CNTL1, (temp |
2265 (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
2266 (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
2267 temp = RREG32(RS400_DMIF_MEM_CNTL1);
2268 temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
2269 RS400_DISP1_CRITICAL_POINT_STOP_MASK);
2270 WREG32(RS400_DMIF_MEM_CNTL1, (temp |
2271 (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
2272 (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
2273 }
2274#endif
2275
2276 DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
2277 /* (unsigned int)info->SavedReg->grph_buffer_cntl, */
2278 (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
2279 }
2280
2281 if (mode2) {
2282 u32 grph2_cntl;
2283 stop_req = mode2->hdisplay * pixel_bytes2 / 16;
2284
2285 if (stop_req > max_stop_req)
2286 stop_req = max_stop_req;
2287
2288 /*
2289 Find the drain rate of the display buffer.
2290 */
2291 temp_ff.full = rfixed_const((16/pixel_bytes2));
2292 disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
2293
2294 grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
2295 grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
2296 grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
2297 grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
2298 if ((rdev->family == CHIP_R350) &&
2299 (stop_req > 0x15)) {
2300 stop_req -= 0x10;
2301 }
2302 grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
2303 grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
2304 grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL |
2305 RADEON_GRPH_CRITICAL_AT_SOF |
2306 RADEON_GRPH_STOP_CNTL);
2307
2308 if ((rdev->family == CHIP_RS100) ||
2309 (rdev->family == CHIP_RS200))
2310 critical_point2 = 0;
2311 else {
2312 temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
2313 temp_ff.full = rfixed_const(temp);
2314 temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
2315 if (sclk_ff.full < temp_ff.full)
2316 temp_ff.full = sclk_ff.full;
2317
2318 read_return_rate.full = temp_ff.full;
2319
2320 if (mode1) {
2321 temp_ff.full = read_return_rate.full - disp_drain_rate.full;
2322 time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
2323 } else {
2324 time_disp1_drop_priority.full = 0;
2325 }
2326 crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
2327 crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
2328 crit_point_ff.full += rfixed_const_half(0);
2329
2330 critical_point2 = rfixed_trunc(crit_point_ff);
2331
2332 if (rdev->disp_priority == 2) {
2333 critical_point2 = 0;
2334 }
2335
2336 if (max_stop_req - critical_point2 < 4)
2337 critical_point2 = 0;
2338
2339 }
2340
2341 if (critical_point2 == 0 && rdev->family == CHIP_R300) {
2342 /* some R300 cards have problem with this set to 0 */
2343 critical_point2 = 0x10;
2344 }
2345
2346 WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
2347 (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
2348
2349 if ((rdev->family == CHIP_RS400) ||
2350 (rdev->family == CHIP_RS480)) {
2351#if 0
2352 /* attempt to program RS400 disp2 regs correctly ??? */
2353 temp = RREG32(RS400_DISP2_REQ_CNTL1);
2354 temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
2355 RS400_DISP2_STOP_REQ_LEVEL_MASK);
2356 WREG32(RS400_DISP2_REQ_CNTL1, (temp |
2357 (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
2358 (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
2359 temp = RREG32(RS400_DISP2_REQ_CNTL2);
2360 temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
2361 RS400_DISP2_CRITICAL_POINT_STOP_MASK);
2362 WREG32(RS400_DISP2_REQ_CNTL2, (temp |
2363 (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
2364 (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
2365#endif
2366 WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
2367 WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
2368 WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC);
2369 WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
2370 }
2371
2372 DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
2373 (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
2374 }
2375}