| /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*- | 
 |  * | 
 |  * Copyright (C) The Weather Channel, Inc.  2002. | 
 |  * Copyright (C) 2004 Nicolai Haehnle. | 
 |  * All Rights Reserved. | 
 |  * | 
 |  * The Weather Channel (TM) funded Tungsten Graphics to develop the | 
 |  * initial release of the Radeon 8500 driver under the XFree86 license. | 
 |  * This notice must be preserved. | 
 |  * | 
 |  * Permission is hereby granted, free of charge, to any person obtaining a | 
 |  * copy of this software and associated documentation files (the "Software"), | 
 |  * to deal in the Software without restriction, including without limitation | 
 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
 |  * and/or sell copies of the Software, and to permit persons to whom the | 
 |  * Software is furnished to do so, subject to the following conditions: | 
 |  * | 
 |  * The above copyright notice and this permission notice (including the next | 
 |  * paragraph) shall be included in all copies or substantial portions of the | 
 |  * Software. | 
 |  * | 
 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
 |  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR | 
 |  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | 
 |  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
 |  * DEALINGS IN THE SOFTWARE. | 
 |  * | 
 |  * Authors: | 
 |  *    Nicolai Haehnle <prefect_@gmx.net> | 
 |  */ | 
 |  | 
 | #include "drmP.h" | 
 | #include "drm.h" | 
 | #include "radeon_drm.h" | 
 | #include "radeon_drv.h" | 
 | #include "r300_reg.h" | 
 |  | 
 | #define R300_SIMULTANEOUS_CLIPRECTS		4 | 
 |  | 
 | /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects | 
 |  */ | 
 | static const int r300_cliprect_cntl[4] = { | 
 | 	0xAAAA, | 
 | 	0xEEEE, | 
 | 	0xFEFE, | 
 | 	0xFFFE | 
 | }; | 
 |  | 
 | /** | 
 |  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command | 
 |  * buffer, starting with index n. | 
 |  */ | 
 | static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, | 
 | 			       drm_radeon_kcmd_buffer_t *cmdbuf, int n) | 
 | { | 
 | 	drm_clip_rect_t box; | 
 | 	int nr; | 
 | 	int i; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	nr = cmdbuf->nbox - n; | 
 | 	if (nr > R300_SIMULTANEOUS_CLIPRECTS) | 
 | 		nr = R300_SIMULTANEOUS_CLIPRECTS; | 
 |  | 
 | 	DRM_DEBUG("%i cliprects\n", nr); | 
 |  | 
 | 	if (nr) { | 
 | 		BEGIN_RING(6 + nr * 2); | 
 | 		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1)); | 
 |  | 
 | 		for (i = 0; i < nr; ++i) { | 
 | 			if (DRM_COPY_FROM_USER_UNCHECKED | 
 | 			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) { | 
 | 				DRM_ERROR("copy cliprect faulted\n"); | 
 | 				return DRM_ERR(EFAULT); | 
 | 			} | 
 |  | 
 | 			box.x1 = | 
 | 			    (box.x1 + | 
 | 			     R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; | 
 | 			box.y1 = | 
 | 			    (box.y1 + | 
 | 			     R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; | 
 | 			box.x2 = | 
 | 			    (box.x2 + | 
 | 			     R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; | 
 | 			box.y2 = | 
 | 			    (box.y2 + | 
 | 			     R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; | 
 |  | 
 | 			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | | 
 | 				 (box.y1 << R300_CLIPRECT_Y_SHIFT)); | 
 | 			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | | 
 | 				 (box.y2 << R300_CLIPRECT_Y_SHIFT)); | 
 | 		} | 
 |  | 
 | 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]); | 
 |  | 
 | 		/* TODO/SECURITY: Force scissors to a safe value, otherwise the | 
 | 		 * client might be able to trample over memory. | 
 | 		 * The impact should be very limited, but I'd rather be safe than | 
 | 		 * sorry. | 
 | 		 */ | 
 | 		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1)); | 
 | 		OUT_RING(0); | 
 | 		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK); | 
 | 		ADVANCE_RING(); | 
 | 	} else { | 
 | 		/* Why we allow zero cliprect rendering: | 
 | 		 * There are some commands in a command buffer that must be submitted | 
 | 		 * even when there are no cliprects, e.g. DMA buffer discard | 
 | 		 * or state setting (though state setting could be avoided by | 
 | 		 * simulating a loss of context). | 
 | 		 * | 
 | 		 * Now since the cmdbuf interface is so chaotic right now (and is | 
 | 		 * bound to remain that way for a bit until things settle down), | 
 | 		 * it is basically impossible to filter out the commands that are | 
 | 		 * necessary and those that aren't. | 
 | 		 * | 
 | 		 * So I choose the safe way and don't do any filtering at all; | 
 | 		 * instead, I simply set up the engine so that all rendering | 
 | 		 * can't produce any fragments. | 
 | 		 */ | 
 | 		BEGIN_RING(2); | 
 | 		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0); | 
 | 		ADVANCE_RING(); | 
 | 	} | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static u8 r300_reg_flags[0x10000 >> 2]; | 
 |  | 
 | void r300_init_reg_flags(void) | 
 | { | 
 | 	int i; | 
 | 	memset(r300_reg_flags, 0, 0x10000 >> 2); | 
 | #define ADD_RANGE_MARK(reg, count,mark) \ | 
 | 		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\ | 
 | 			r300_reg_flags[i]|=(mark); | 
 |  | 
 | #define MARK_SAFE		1 | 
 | #define MARK_CHECK_OFFSET	2 | 
 |  | 
 | #define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE) | 
 |  | 
 | 	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */ | 
 | 	ADD_RANGE(R300_SE_VPORT_XSCALE, 6); | 
 | 	ADD_RANGE(0x2080, 1); | 
 | 	ADD_RANGE(R300_SE_VTE_CNTL, 2); | 
 | 	ADD_RANGE(0x2134, 2); | 
 | 	ADD_RANGE(0x2140, 1); | 
 | 	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2); | 
 | 	ADD_RANGE(0x21DC, 1); | 
 | 	ADD_RANGE(0x221C, 1); | 
 | 	ADD_RANGE(0x2220, 4); | 
 | 	ADD_RANGE(0x2288, 1); | 
 | 	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); | 
 | 	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); | 
 | 	ADD_RANGE(R300_GB_ENABLE, 1); | 
 | 	ADD_RANGE(R300_GB_MSPOS0, 5); | 
 | 	ADD_RANGE(R300_TX_ENABLE, 1); | 
 | 	ADD_RANGE(0x4200, 4); | 
 | 	ADD_RANGE(0x4214, 1); | 
 | 	ADD_RANGE(R300_RE_POINTSIZE, 1); | 
 | 	ADD_RANGE(0x4230, 3); | 
 | 	ADD_RANGE(R300_RE_LINE_CNT, 1); | 
 | 	ADD_RANGE(0x4238, 1); | 
 | 	ADD_RANGE(0x4260, 3); | 
 | 	ADD_RANGE(0x4274, 4); | 
 | 	ADD_RANGE(0x4288, 5); | 
 | 	ADD_RANGE(0x42A0, 1); | 
 | 	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4); | 
 | 	ADD_RANGE(0x42B4, 1); | 
 | 	ADD_RANGE(R300_RE_CULL_CNTL, 1); | 
 | 	ADD_RANGE(0x42C0, 2); | 
 | 	ADD_RANGE(R300_RS_CNTL_0, 2); | 
 | 	ADD_RANGE(R300_RS_INTERP_0, 8); | 
 | 	ADD_RANGE(R300_RS_ROUTE_0, 8); | 
 | 	ADD_RANGE(0x43A4, 2); | 
 | 	ADD_RANGE(0x43E8, 1); | 
 | 	ADD_RANGE(R300_PFS_CNTL_0, 3); | 
 | 	ADD_RANGE(R300_PFS_NODE_0, 4); | 
 | 	ADD_RANGE(R300_PFS_TEXI_0, 64); | 
 | 	ADD_RANGE(0x46A4, 5); | 
 | 	ADD_RANGE(R300_PFS_INSTR0_0, 64); | 
 | 	ADD_RANGE(R300_PFS_INSTR1_0, 64); | 
 | 	ADD_RANGE(R300_PFS_INSTR2_0, 64); | 
 | 	ADD_RANGE(R300_PFS_INSTR3_0, 64); | 
 | 	ADD_RANGE(0x4BC0, 1); | 
 | 	ADD_RANGE(0x4BC8, 3); | 
 | 	ADD_RANGE(R300_PP_ALPHA_TEST, 2); | 
 | 	ADD_RANGE(0x4BD8, 1); | 
 | 	ADD_RANGE(R300_PFS_PARAM_0_X, 64); | 
 | 	ADD_RANGE(0x4E00, 1); | 
 | 	ADD_RANGE(R300_RB3D_CBLEND, 2); | 
 | 	ADD_RANGE(R300_RB3D_COLORMASK, 1); | 
 | 	ADD_RANGE(0x4E10, 3); | 
 | 	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */ | 
 | 	ADD_RANGE(R300_RB3D_COLORPITCH0, 1); | 
 | 	ADD_RANGE(0x4E50, 9); | 
 | 	ADD_RANGE(0x4E88, 1); | 
 | 	ADD_RANGE(0x4EA0, 2); | 
 | 	ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3); | 
 | 	ADD_RANGE(0x4F10, 4); | 
 | 	ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */ | 
 | 	ADD_RANGE(R300_RB3D_DEPTHPITCH, 1); | 
 | 	ADD_RANGE(0x4F28, 1); | 
 | 	ADD_RANGE(0x4F30, 2); | 
 | 	ADD_RANGE(0x4F44, 1); | 
 | 	ADD_RANGE(0x4F54, 1); | 
 |  | 
 | 	ADD_RANGE(R300_TX_FILTER_0, 16); | 
 | 	ADD_RANGE(R300_TX_UNK1_0, 16); | 
 | 	ADD_RANGE(R300_TX_SIZE_0, 16); | 
 | 	ADD_RANGE(R300_TX_FORMAT_0, 16); | 
 | 	ADD_RANGE(R300_TX_PITCH_0, 16); | 
 | 	/* Texture offset is dangerous and needs more checking */ | 
 | 	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET); | 
 | 	ADD_RANGE(R300_TX_UNK4_0, 16); | 
 | 	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16); | 
 |  | 
 | 	/* Sporadic registers used as primitives are emitted */ | 
 | 	ADD_RANGE(0x4f18, 1); | 
 | 	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1); | 
 | 	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8); | 
 | 	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8); | 
 |  | 
 | } | 
 |  | 
 | static __inline__ int r300_check_range(unsigned reg, int count) | 
 | { | 
 | 	int i; | 
 | 	if (reg & ~0xffff) | 
 | 		return -1; | 
 | 	for (i = (reg >> 2); i < (reg >> 2) + count; i++) | 
 | 		if (r300_reg_flags[i] != MARK_SAFE) | 
 | 			return 1; | 
 | 	return 0; | 
 | } | 
 |  | 
 |   /* we expect offsets passed to the framebuffer to be either within video memory or | 
 |      within AGP space */ | 
 | static __inline__ int r300_check_offset(drm_radeon_private_t *dev_priv, | 
 | 					u32 offset) | 
 | { | 
 | 	/* we realy want to check against end of video aperture | 
 | 	   but this value is not being kept. | 
 | 	   This code is correct for now (does the same thing as the | 
 | 	   code that sets MC_FB_LOCATION) in radeon_cp.c */ | 
 | 	if ((offset >= dev_priv->fb_location) && | 
 | 	    (offset < dev_priv->gart_vm_start)) | 
 | 		return 0; | 
 | 	if ((offset >= dev_priv->gart_vm_start) && | 
 | 	    (offset < dev_priv->gart_vm_start + dev_priv->gart_size)) | 
 | 		return 0; | 
 | 	return 1; | 
 | } | 
 |  | 
 | static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t * | 
 | 							  dev_priv, | 
 | 							  drm_radeon_kcmd_buffer_t | 
 | 							  * cmdbuf, | 
 | 							  drm_r300_cmd_header_t | 
 | 							  header) | 
 | { | 
 | 	int reg; | 
 | 	int sz; | 
 | 	int i; | 
 | 	int values[64]; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	sz = header.packet0.count; | 
 | 	reg = (header.packet0.reghi << 8) | header.packet0.reglo; | 
 |  | 
 | 	if ((sz > 64) || (sz < 0)) { | 
 | 		DRM_ERROR | 
 | 		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", | 
 | 		     reg, sz); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 | 	for (i = 0; i < sz; i++) { | 
 | 		values[i] = ((int *)cmdbuf->buf)[i]; | 
 | 		switch (r300_reg_flags[(reg >> 2) + i]) { | 
 | 		case MARK_SAFE: | 
 | 			break; | 
 | 		case MARK_CHECK_OFFSET: | 
 | 			if (r300_check_offset(dev_priv, (u32) values[i])) { | 
 | 				DRM_ERROR | 
 | 				    ("Offset failed range check (reg=%04x sz=%d)\n", | 
 | 				     reg, sz); | 
 | 				return DRM_ERR(EINVAL); | 
 | 			} | 
 | 			break; | 
 | 		default: | 
 | 			DRM_ERROR("Register %04x failed check as flag=%02x\n", | 
 | 				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]); | 
 | 			return DRM_ERR(EINVAL); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	BEGIN_RING(1 + sz); | 
 | 	OUT_RING(CP_PACKET0(reg, sz - 1)); | 
 | 	OUT_RING_TABLE(values, sz); | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += sz * 4; | 
 | 	cmdbuf->bufsz -= sz * 4; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /** | 
 |  * Emits a packet0 setting arbitrary registers. | 
 |  * Called by r300_do_cp_cmdbuf. | 
 |  * | 
 |  * Note that checks are performed on contents and addresses of the registers | 
 |  */ | 
 | static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, | 
 | 					drm_radeon_kcmd_buffer_t *cmdbuf, | 
 | 					drm_r300_cmd_header_t header) | 
 | { | 
 | 	int reg; | 
 | 	int sz; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	sz = header.packet0.count; | 
 | 	reg = (header.packet0.reghi << 8) | header.packet0.reglo; | 
 |  | 
 | 	if (!sz) | 
 | 		return 0; | 
 |  | 
 | 	if (sz * 4 > cmdbuf->bufsz) | 
 | 		return DRM_ERR(EINVAL); | 
 |  | 
 | 	if (reg + sz * 4 >= 0x10000) { | 
 | 		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, | 
 | 			  sz); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 |  | 
 | 	if (r300_check_range(reg, sz)) { | 
 | 		/* go and check everything */ | 
 | 		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, | 
 | 							   header); | 
 | 	} | 
 | 	/* the rest of the data is safe to emit, whatever the values the user passed */ | 
 |  | 
 | 	BEGIN_RING(1 + sz); | 
 | 	OUT_RING(CP_PACKET0(reg, sz - 1)); | 
 | 	OUT_RING_TABLE((int *)cmdbuf->buf, sz); | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += sz * 4; | 
 | 	cmdbuf->bufsz -= sz * 4; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /** | 
 |  * Uploads user-supplied vertex program instructions or parameters onto | 
 |  * the graphics card. | 
 |  * Called by r300_do_cp_cmdbuf. | 
 |  */ | 
 | static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv, | 
 | 				    drm_radeon_kcmd_buffer_t *cmdbuf, | 
 | 				    drm_r300_cmd_header_t header) | 
 | { | 
 | 	int sz; | 
 | 	int addr; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	sz = header.vpu.count; | 
 | 	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo; | 
 |  | 
 | 	if (!sz) | 
 | 		return 0; | 
 | 	if (sz * 16 > cmdbuf->bufsz) | 
 | 		return DRM_ERR(EINVAL); | 
 |  | 
 | 	BEGIN_RING(5 + sz * 4); | 
 | 	/* Wait for VAP to come to senses.. */ | 
 | 	/* there is no need to emit it multiple times, (only once before VAP is programmed, | 
 | 	   but this optimization is for later */ | 
 | 	OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); | 
 | 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); | 
 | 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); | 
 | 	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4); | 
 |  | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += sz * 16; | 
 | 	cmdbuf->bufsz -= sz * 16; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /** | 
 |  * Emit a clear packet from userspace. | 
 |  * Called by r300_emit_packet3. | 
 |  */ | 
 | static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv, | 
 | 				      drm_radeon_kcmd_buffer_t *cmdbuf) | 
 | { | 
 | 	RING_LOCALS; | 
 |  | 
 | 	if (8 * 4 > cmdbuf->bufsz) | 
 | 		return DRM_ERR(EINVAL); | 
 |  | 
 | 	BEGIN_RING(10); | 
 | 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); | 
 | 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | | 
 | 		 (1 << R300_PRIM_NUM_VERTICES_SHIFT)); | 
 | 	OUT_RING_TABLE((int *)cmdbuf->buf, 8); | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += 8 * 4; | 
 | 	cmdbuf->bufsz -= 8 * 4; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, | 
 | 					       drm_radeon_kcmd_buffer_t *cmdbuf, | 
 | 					       u32 header) | 
 | { | 
 | 	int count, i, k; | 
 | #define MAX_ARRAY_PACKET  64 | 
 | 	u32 payload[MAX_ARRAY_PACKET]; | 
 | 	u32 narrays; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	count = (header >> 16) & 0x3fff; | 
 |  | 
 | 	if ((count + 1) > MAX_ARRAY_PACKET) { | 
 | 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", | 
 | 			  count); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 | 	memset(payload, 0, MAX_ARRAY_PACKET * 4); | 
 | 	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4); | 
 |  | 
 | 	/* carefully check packet contents */ | 
 |  | 
 | 	narrays = payload[0]; | 
 | 	k = 0; | 
 | 	i = 1; | 
 | 	while ((k < narrays) && (i < (count + 1))) { | 
 | 		i++;		/* skip attribute field */ | 
 | 		if (r300_check_offset(dev_priv, payload[i])) { | 
 | 			DRM_ERROR | 
 | 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", | 
 | 			     k, i); | 
 | 			return DRM_ERR(EINVAL); | 
 | 		} | 
 | 		k++; | 
 | 		i++; | 
 | 		if (k == narrays) | 
 | 			break; | 
 | 		/* have one more to process, they come in pairs */ | 
 | 		if (r300_check_offset(dev_priv, payload[i])) { | 
 | 			DRM_ERROR | 
 | 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", | 
 | 			     k, i); | 
 | 			return DRM_ERR(EINVAL); | 
 | 		} | 
 | 		k++; | 
 | 		i++; | 
 | 	} | 
 | 	/* do the counts match what we expect ? */ | 
 | 	if ((k != narrays) || (i != (count + 1))) { | 
 | 		DRM_ERROR | 
 | 		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", | 
 | 		     k, i, narrays, count + 1); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 |  | 
 | 	/* all clear, output packet */ | 
 |  | 
 | 	BEGIN_RING(count + 2); | 
 | 	OUT_RING(header); | 
 | 	OUT_RING_TABLE(payload, count + 1); | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += (count + 2) * 4; | 
 | 	cmdbuf->bufsz -= (count + 2) * 4; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, | 
 | 					    drm_radeon_kcmd_buffer_t *cmdbuf) | 
 | { | 
 | 	u32 header; | 
 | 	int count; | 
 | 	RING_LOCALS; | 
 |  | 
 | 	if (4 > cmdbuf->bufsz) | 
 | 		return DRM_ERR(EINVAL); | 
 |  | 
 | 	/* Fixme !! This simply emits a packet without much checking. | 
 | 	   We need to be smarter. */ | 
 |  | 
 | 	/* obtain first word - actual packet3 header */ | 
 | 	header = *(u32 *) cmdbuf->buf; | 
 |  | 
 | 	/* Is it packet 3 ? */ | 
 | 	if ((header >> 30) != 0x3) { | 
 | 		DRM_ERROR("Not a packet3 header (0x%08x)\n", header); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 |  | 
 | 	count = (header >> 16) & 0x3fff; | 
 |  | 
 | 	/* Check again now that we know how much data to expect */ | 
 | 	if ((count + 2) * 4 > cmdbuf->bufsz) { | 
 | 		DRM_ERROR | 
 | 		    ("Expected packet3 of length %d but have only %d bytes left\n", | 
 | 		     (count + 2) * 4, cmdbuf->bufsz); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 |  | 
 | 	/* Is it a packet type we know about ? */ | 
 | 	switch (header & 0xff00) { | 
 | 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */ | 
 | 		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header); | 
 |  | 
 | 	case RADEON_CP_3D_DRAW_IMMD_2:	/* triggers drawing using in-packet vertex data */ | 
 | 	case RADEON_CP_3D_DRAW_VBUF_2:	/* triggers drawing of vertex buffers setup elsewhere */ | 
 | 	case RADEON_CP_3D_DRAW_INDX_2:	/* triggers drawing using indices to vertex buffer */ | 
 | 	case RADEON_CP_INDX_BUFFER:	/* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ | 
 | 	case RADEON_WAIT_FOR_IDLE: | 
 | 	case RADEON_CP_NOP: | 
 | 		/* these packets are safe */ | 
 | 		break; | 
 | 	default: | 
 | 		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); | 
 | 		return DRM_ERR(EINVAL); | 
 | 	} | 
 |  | 
 | 	BEGIN_RING(count + 2); | 
 | 	OUT_RING(header); | 
 | 	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); | 
 | 	ADVANCE_RING(); | 
 |  | 
 | 	cmdbuf->buf += (count + 2) * 4; | 
 | 	cmdbuf->bufsz -= (count + 2) * 4; | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /** | 
 |  * Emit a rendering packet3 from userspace. | 
 |  * Called by r300_do_cp_cmdbuf. | 
 |  */ | 
 | static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, | 
 | 					drm_radeon_kcmd_buffer_t *cmdbuf, | 
 | 					drm_r300_cmd_header_t header) | 
 | { | 
 | 	int n; | 
 | 	int ret; | 
 | 	char *orig_buf = cmdbuf->buf; | 
 | 	int orig_bufsz = cmdbuf->bufsz; | 
 |  | 
 | 	/* This is a do-while-loop so that we run the interior at least once, | 
 | 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. | 
 | 	 */ | 
 | 	n = 0; | 
 | 	do { | 
 | 		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) { | 
 | 			ret = r300_emit_cliprects(dev_priv, cmdbuf, n); | 
 | 			if (ret) | 
 | 				return ret; | 
 |  | 
 | 			cmdbuf->buf = orig_buf; | 
 | 			cmdbuf->bufsz = orig_bufsz; | 
 | 		} | 
 |  | 
 | 		switch (header.packet3.packet) { | 
 | 		case R300_CMD_PACKET3_CLEAR: | 
 | 			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n"); | 
 | 			ret = r300_emit_clear(dev_priv, cmdbuf); | 
 | 			if (ret) { | 
 | 				DRM_ERROR("r300_emit_clear failed\n"); | 
 | 				return ret; | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_PACKET3_RAW: | 
 | 			DRM_DEBUG("R300_CMD_PACKET3_RAW\n"); | 
 | 			ret = r300_emit_raw_packet3(dev_priv, cmdbuf); | 
 | 			if (ret) { | 
 | 				DRM_ERROR("r300_emit_raw_packet3 failed\n"); | 
 | 				return ret; | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		default: | 
 | 			DRM_ERROR("bad packet3 type %i at %p\n", | 
 | 				  header.packet3.packet, | 
 | 				  cmdbuf->buf - sizeof(header)); | 
 | 			return DRM_ERR(EINVAL); | 
 | 		} | 
 |  | 
 | 		n += R300_SIMULTANEOUS_CLIPRECTS; | 
 | 	} while (n < cmdbuf->nbox); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /* Some of the R300 chips seem to be extremely touchy about the two registers | 
 |  * that are configured in r300_pacify. | 
 |  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace | 
 |  * sends a command buffer that contains only state setting commands and a | 
 |  * vertex program/parameter upload sequence, this will eventually lead to a | 
 |  * lockup, unless the sequence is bracketed by calls to r300_pacify. | 
 |  * So we should take great care to *always* call r300_pacify before | 
 |  * *anything* 3D related, and again afterwards. This is what the | 
 |  * call bracket in r300_do_cp_cmdbuf is for. | 
 |  */ | 
 |  | 
 | /** | 
 |  * Emit the sequence to pacify R300. | 
 |  */ | 
 | static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) | 
 | { | 
 | 	RING_LOCALS; | 
 |  | 
 | 	BEGIN_RING(6); | 
 | 	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); | 
 | 	OUT_RING(0xa); | 
 | 	OUT_RING(CP_PACKET0(0x4f18, 0)); | 
 | 	OUT_RING(0x3); | 
 | 	OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); | 
 | 	OUT_RING(0x0); | 
 | 	ADVANCE_RING(); | 
 | } | 
 |  | 
 | /** | 
 |  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state. | 
 |  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must | 
 |  * be careful about how this function is called. | 
 |  */ | 
 | static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf) | 
 | { | 
 | 	drm_radeon_private_t *dev_priv = dev->dev_private; | 
 | 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private; | 
 |  | 
 | 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; | 
 | 	buf->pending = 1; | 
 | 	buf->used = 0; | 
 | } | 
 |  | 
 | /** | 
 |  * Parses and validates a user-supplied command buffer and emits appropriate | 
 |  * commands on the DMA ring buffer. | 
 |  * Called by the ioctl handler function radeon_cp_cmdbuf. | 
 |  */ | 
 | int r300_do_cp_cmdbuf(drm_device_t *dev, | 
 | 		      DRMFILE filp, | 
 | 		      drm_file_t *filp_priv, | 
 | 		      drm_radeon_kcmd_buffer_t *cmdbuf) | 
 | { | 
 | 	drm_radeon_private_t *dev_priv = dev->dev_private; | 
 | 	drm_device_dma_t *dma = dev->dma; | 
 | 	drm_buf_t *buf = NULL; | 
 | 	int emit_dispatch_age = 0; | 
 | 	int ret = 0; | 
 |  | 
 | 	DRM_DEBUG("\n"); | 
 |  | 
 | 	/* See the comment above r300_emit_begin3d for why this call must be here, | 
 | 	 * and what the cleanup gotos are for. */ | 
 | 	r300_pacify(dev_priv); | 
 |  | 
 | 	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { | 
 | 		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0); | 
 | 		if (ret) | 
 | 			goto cleanup; | 
 | 	} | 
 |  | 
 | 	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) { | 
 | 		int idx; | 
 | 		drm_r300_cmd_header_t header; | 
 |  | 
 | 		header.u = *(unsigned int *)cmdbuf->buf; | 
 |  | 
 | 		cmdbuf->buf += sizeof(header); | 
 | 		cmdbuf->bufsz -= sizeof(header); | 
 |  | 
 | 		switch (header.header.cmd_type) { | 
 | 		case R300_CMD_PACKET0: | 
 | 			DRM_DEBUG("R300_CMD_PACKET0\n"); | 
 | 			ret = r300_emit_packet0(dev_priv, cmdbuf, header); | 
 | 			if (ret) { | 
 | 				DRM_ERROR("r300_emit_packet0 failed\n"); | 
 | 				goto cleanup; | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_VPU: | 
 | 			DRM_DEBUG("R300_CMD_VPU\n"); | 
 | 			ret = r300_emit_vpu(dev_priv, cmdbuf, header); | 
 | 			if (ret) { | 
 | 				DRM_ERROR("r300_emit_vpu failed\n"); | 
 | 				goto cleanup; | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_PACKET3: | 
 | 			DRM_DEBUG("R300_CMD_PACKET3\n"); | 
 | 			ret = r300_emit_packet3(dev_priv, cmdbuf, header); | 
 | 			if (ret) { | 
 | 				DRM_ERROR("r300_emit_packet3 failed\n"); | 
 | 				goto cleanup; | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_END3D: | 
 | 			DRM_DEBUG("R300_CMD_END3D\n"); | 
 | 			/* TODO: | 
 | 			   Ideally userspace driver should not need to issue this call, | 
 | 			   i.e. the drm driver should issue it automatically and prevent | 
 | 			   lockups. | 
 |  | 
 | 			   In practice, we do not understand why this call is needed and what | 
 | 			   it does (except for some vague guesses that it has to do with cache | 
 | 			   coherence) and so the user space driver does it. | 
 |  | 
 | 			   Once we are sure which uses prevent lockups the code could be moved | 
 | 			   into the kernel and the userspace driver will not | 
 | 			   need to use this command. | 
 |  | 
 | 			   Note that issuing this command does not hurt anything | 
 | 			   except, possibly, performance */ | 
 | 			r300_pacify(dev_priv); | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_CP_DELAY: | 
 | 			/* simple enough, we can do it here */ | 
 | 			DRM_DEBUG("R300_CMD_CP_DELAY\n"); | 
 | 			{ | 
 | 				int i; | 
 | 				RING_LOCALS; | 
 |  | 
 | 				BEGIN_RING(header.delay.count); | 
 | 				for (i = 0; i < header.delay.count; i++) | 
 | 					OUT_RING(RADEON_CP_PACKET2); | 
 | 				ADVANCE_RING(); | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_DMA_DISCARD: | 
 | 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); | 
 | 			idx = header.dma.buf_idx; | 
 | 			if (idx < 0 || idx >= dma->buf_count) { | 
 | 				DRM_ERROR("buffer index %d (of %d max)\n", | 
 | 					  idx, dma->buf_count - 1); | 
 | 				ret = DRM_ERR(EINVAL); | 
 | 				goto cleanup; | 
 | 			} | 
 |  | 
 | 			buf = dma->buflist[idx]; | 
 | 			if (buf->filp != filp || buf->pending) { | 
 | 				DRM_ERROR("bad buffer %p %p %d\n", | 
 | 					  buf->filp, filp, buf->pending); | 
 | 				ret = DRM_ERR(EINVAL); | 
 | 				goto cleanup; | 
 | 			} | 
 |  | 
 | 			emit_dispatch_age = 1; | 
 | 			r300_discard_buffer(dev, buf); | 
 | 			break; | 
 |  | 
 | 		case R300_CMD_WAIT: | 
 | 			/* simple enough, we can do it here */ | 
 | 			DRM_DEBUG("R300_CMD_WAIT\n"); | 
 | 			if (header.wait.flags == 0) | 
 | 				break;	/* nothing to do */ | 
 |  | 
 | 			{ | 
 | 				RING_LOCALS; | 
 |  | 
 | 				BEGIN_RING(2); | 
 | 				OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); | 
 | 				OUT_RING((header.wait.flags & 0xf) << 14); | 
 | 				ADVANCE_RING(); | 
 | 			} | 
 | 			break; | 
 |  | 
 | 		default: | 
 | 			DRM_ERROR("bad cmd_type %i at %p\n", | 
 | 				  header.header.cmd_type, | 
 | 				  cmdbuf->buf - sizeof(header)); | 
 | 			ret = DRM_ERR(EINVAL); | 
 | 			goto cleanup; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	DRM_DEBUG("END\n"); | 
 |  | 
 |       cleanup: | 
 | 	r300_pacify(dev_priv); | 
 |  | 
 | 	/* We emit the vertex buffer age here, outside the pacifier "brackets" | 
 | 	 * for two reasons: | 
 | 	 *  (1) This may coalesce multiple age emissions into a single one and | 
 | 	 *  (2) more importantly, some chips lock up hard when scratch registers | 
 | 	 *      are written inside the pacifier bracket. | 
 | 	 */ | 
 | 	if (emit_dispatch_age) { | 
 | 		RING_LOCALS; | 
 |  | 
 | 		/* Emit the vertex buffer age */ | 
 | 		BEGIN_RING(2); | 
 | 		RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); | 
 | 		ADVANCE_RING(); | 
 | 	} | 
 |  | 
 | 	COMMIT_RING(); | 
 |  | 
 | 	return ret; | 
 | } |