| Michael Schmitz | a100501 | 2007-05-01 22:32:39 +0200 | [diff] [blame] | 1 | #ifndef _VIDEO_ATAFB_UTILS_H | 
 | 2 | #define _VIDEO_ATAFB_UTILS_H | 
 | 3 |  | 
 | 4 | /* ================================================================= */ | 
 | 5 | /*                      Utility Assembler Functions                  */ | 
 | 6 | /* ================================================================= */ | 
 | 7 |  | 
 | 8 | /* ====================================================================== */ | 
 | 9 |  | 
 | 10 | /* Those of a delicate disposition might like to skip the next couple of | 
 | 11 |  * pages. | 
 | 12 |  * | 
 | 13 |  * These functions are drop in replacements for memmove and | 
 | 14 |  * memset(_, 0, _). However their five instances add at least a kilobyte | 
 | 15 |  * to the object file. You have been warned. | 
 | 16 |  * | 
 | 17 |  * Not a great fan of assembler for the sake of it, but I think | 
 | 18 |  * that these routines are at least 10 times faster than their C | 
 | 19 |  * equivalents for large blits, and that's important to the lowest level of | 
 | 20 |  * a graphics driver. Question is whether some scheme with the blitter | 
 | 21 |  * would be faster. I suspect not for simple text system - not much | 
 | 22 |  * asynchrony. | 
 | 23 |  * | 
 | 24 |  * Code is very simple, just gruesome expansion. Basic strategy is to | 
 | 25 |  * increase data moved/cleared at each step to 16 bytes to reduce | 
 | 26 |  * instruction per data move overhead. movem might be faster still | 
 | 27 |  * For more than 15 bytes, we try to align the write direction on a | 
 | 28 |  * longword boundary to get maximum speed. This is even more gruesome. | 
 | 29 |  * Unaligned read/write used requires 68020+ - think this is a problem? | 
 | 30 |  * | 
 | 31 |  * Sorry! | 
 | 32 |  */ | 
 | 33 |  | 
 | 34 |  | 
 | 35 | /* ++roman: I've optimized Robert's original versions in some minor | 
 | 36 |  * aspects, e.g. moveq instead of movel, let gcc choose the registers, | 
 | 37 |  * use movem in some places... | 
 | 38 |  * For other modes than 1 plane, lots of more such assembler functions | 
 | 39 |  * were needed (e.g. the ones using movep or expanding color values). | 
 | 40 |  */ | 
 | 41 |  | 
 | 42 | /* ++andreas: more optimizations: | 
 | 43 |    subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc | 
 | 44 |    addal is faster than addaw | 
 | 45 |    movep is rather expensive compared to ordinary move's | 
 | 46 |    some functions rewritten in C for clarity, no speed loss */ | 
 | 47 |  | 
 | 48 | static inline void *fb_memclear_small(void *s, size_t count) | 
 | 49 | { | 
 | 50 | 	if (!count) | 
 | 51 | 		return 0; | 
 | 52 |  | 
 | 53 | 	asm volatile ("\n" | 
 | 54 | 		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n" | 
 | 55 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n" | 
 | 56 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n" | 
 | 57 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" | 
 | 58 | 		"1:" | 
 | 59 | 		: "=a" (s), "=d" (count) | 
 | 60 | 		: "d" (0), "0" ((char *)s + count), "1" (count)); | 
 | 61 | 	asm volatile ("\n" | 
 | 62 | 		"	subq.l  #1,%1\n" | 
 | 63 | 		"	jcs	3f\n" | 
 | 64 | 		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" | 
 | 65 | 		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n" | 
 | 66 | 		"	dbra	%1,2b\n" | 
 | 67 | 		"3:" | 
 | 68 | 		: "=a" (s), "=d" (count) | 
 | 69 | 		: "d" (0), "0" (s), "1" (count) | 
 | 70 | 		: "d4", "d5", "d6" | 
 | 71 | 		); | 
 | 72 |  | 
 | 73 | 	return 0; | 
 | 74 | } | 
 | 75 |  | 
 | 76 |  | 
 | 77 | static inline void *fb_memclear(void *s, size_t count) | 
 | 78 | { | 
 | 79 | 	if (!count) | 
 | 80 | 		return 0; | 
 | 81 |  | 
 | 82 | 	if (count < 16) { | 
 | 83 | 		asm volatile ("\n" | 
 | 84 | 			"	lsr.l	#1,%1 ; jcc 1f ; clr.b (%0)+\n" | 
 | 85 | 			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.w (%0)+\n" | 
 | 86 | 			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+\n" | 
 | 87 | 			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n" | 
 | 88 | 			"1:" | 
 | 89 | 			: "=a" (s), "=d" (count) | 
 | 90 | 			: "0" (s), "1" (count)); | 
 | 91 | 	} else { | 
 | 92 | 		long tmp; | 
 | 93 | 		asm volatile ("\n" | 
 | 94 | 			"	move.l	%1,%2\n" | 
 | 95 | 			"	lsr.l	#1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n" | 
 | 96 | 			"	lsr.l	#1,%2 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
 | 97 | 			"	clr.w	(%0)+  ; subq.w  #2,%1 ; jra 2f\n" | 
 | 98 | 			"1:	lsr.l	#1,%2 ; jcc 2f\n" | 
 | 99 | 			"	clr.w	(%0)+  ; subq.w  #2,%1\n" | 
 | 100 | 			"2:	move.w	%1,%2; lsr.l #2,%1 ; jeq 6f\n" | 
 | 101 | 			"	lsr.l	#1,%1 ; jcc 3f ; clr.l (%0)+\n" | 
 | 102 | 			"3:	lsr.l	#1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n" | 
 | 103 | 			"4:	subq.l	#1,%1 ; jcs 6f\n" | 
 | 104 | 			"5:	clr.l	(%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n" | 
 | 105 | 			"	dbra	%1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n" | 
 | 106 | 			"6:	move.w	%2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n" | 
 | 107 | 			"7:	btst	#0,%1 ; jeq 8f ; clr.b (%0)+\n" | 
 | 108 | 			"8:" | 
 | 109 | 			: "=a" (s), "=d" (count), "=d" (tmp) | 
 | 110 | 			: "0" (s), "1" (count)); | 
 | 111 | 	} | 
 | 112 |  | 
 | 113 | 	return 0; | 
 | 114 | } | 
 | 115 |  | 
 | 116 |  | 
 | 117 | static inline void *fb_memset255(void *s, size_t count) | 
 | 118 | { | 
 | 119 | 	if (!count) | 
 | 120 | 		return 0; | 
 | 121 |  | 
 | 122 | 	asm volatile ("\n" | 
 | 123 | 		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n" | 
 | 124 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n" | 
 | 125 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n" | 
 | 126 | 		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" | 
 | 127 | 		"1:" | 
 | 128 | 		: "=a" (s), "=d" (count) | 
 | 129 | 		: "d" (-1), "0" ((char *)s+count), "1" (count)); | 
 | 130 | 	asm volatile ("\n" | 
 | 131 | 		"	subq.l	#1,%1 ; jcs 3f\n" | 
 | 132 | 		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" | 
 | 133 | 		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n" | 
 | 134 | 		"	dbra	%1,2b\n" | 
 | 135 | 		"3:" | 
 | 136 | 		: "=a" (s), "=d" (count) | 
 | 137 | 		: "d" (-1), "0" (s), "1" (count) | 
 | 138 | 		: "d4", "d5", "d6"); | 
 | 139 |  | 
 | 140 | 	return 0; | 
 | 141 | } | 
 | 142 |  | 
 | 143 |  | 
 | 144 | static inline void *fb_memmove(void *d, const void *s, size_t count) | 
 | 145 | { | 
 | 146 | 	if (d < s) { | 
 | 147 | 		if (count < 16) { | 
 | 148 | 			asm volatile ("\n" | 
 | 149 | 				"	lsr.l	#1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n" | 
 | 150 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n" | 
 | 151 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n" | 
 | 152 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" | 
 | 153 | 				"1:" | 
 | 154 | 				: "=a" (d), "=a" (s), "=d" (count) | 
 | 155 | 				: "0" (d), "1" (s), "2" (count)); | 
 | 156 | 		} else { | 
 | 157 | 			long tmp; | 
 | 158 | 			asm volatile ("\n" | 
 | 159 | 				"	move.l	%0,%3\n" | 
 | 160 | 				"	lsr.l	#1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n" | 
 | 161 | 				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
 | 162 | 				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2 ; jra 2f\n" | 
 | 163 | 				"1:	lsr.l   #1,%3 ; jcc 2f\n" | 
 | 164 | 				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2\n" | 
 | 165 | 				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n" | 
 | 166 | 				"	lsr.l	#1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n" | 
 | 167 | 				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" | 
 | 168 | 				"4:	subq.l	#1,%2 ; jcs 6f\n" | 
 | 169 | 				"5:	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n" | 
 | 170 | 				"	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n" | 
 | 171 | 				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" | 
 | 172 | 				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n" | 
 | 173 | 				"7:	btst	#0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n" | 
 | 174 | 				"8:" | 
 | 175 | 				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) | 
 | 176 | 				: "0" (d), "1" (s), "2" (count)); | 
 | 177 | 		} | 
 | 178 | 	} else { | 
 | 179 | 		if (count < 16) { | 
 | 180 | 			asm volatile ("\n" | 
 | 181 | 				"	lsr.l	#1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n" | 
 | 182 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n" | 
 | 183 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n" | 
 | 184 | 				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" | 
 | 185 | 				"1:" | 
 | 186 | 				: "=a" (d), "=a" (s), "=d" (count) | 
 | 187 | 				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); | 
 | 188 | 		} else { | 
 | 189 | 			long tmp; | 
 | 190 |  | 
 | 191 | 			asm volatile ("\n" | 
 | 192 | 				"	move.l	%0,%3\n" | 
 | 193 | 				"	lsr.l	#1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n" | 
 | 194 | 				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
 | 195 | 				"	move.w	-(%1),-(%0) ; subqw  #2,%2 ; jra 2f\n" | 
 | 196 | 				"1:	lsr.l	#1,%3 ; jcc 2f\n" | 
 | 197 | 				"	move.w	-(%1),-(%0) ; subqw  #2,%2\n" | 
 | 198 | 				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n" | 
 | 199 | 				"	lsr.l	#1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n" | 
 | 200 | 				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" | 
 | 201 | 				"4:	subq.l	#1,%2 ; jcs 6f\n" | 
 | 202 | 				"5:	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n" | 
 | 203 | 				"	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n" | 
 | 204 | 				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" | 
 | 205 | 				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n" | 
 | 206 | 				"7:	btst	#0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n" | 
 | 207 | 				"8:" | 
 | 208 | 				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) | 
 | 209 | 				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); | 
 | 210 | 		} | 
 | 211 | 	} | 
 | 212 |  | 
 | 213 | 	return 0; | 
 | 214 | } | 
 | 215 |  | 
 | 216 |  | 
 | 217 | /* ++andreas: Simple and fast version of memmove, assumes size is | 
 | 218 |    divisible by 16, suitable for moving the whole screen bitplane */ | 
 | 219 | static inline void fast_memmove(char *dst, const char *src, size_t size) | 
 | 220 | { | 
 | 221 | 	if (!size) | 
 | 222 | 		return; | 
 | 223 | 	if (dst < src) | 
 | 224 | 		asm volatile ("\n" | 
 | 225 | 			"1:	movem.l	(%0)+,%%d0/%%d1/%%a0/%%a1\n" | 
 | 226 | 			"	movem.l	%%d0/%%d1/%%a0/%%a1,%1@\n" | 
 | 227 | 			"	addq.l	#8,%1; addq.l #8,%1\n" | 
 | 228 | 			"	dbra	%2,1b\n" | 
 | 229 | 			"	clr.w	%2; subq.l #1,%2\n" | 
 | 230 | 			"	jcc	1b" | 
 | 231 | 			: "=a" (src), "=a" (dst), "=d" (size) | 
 | 232 | 			: "0" (src), "1" (dst), "2" (size / 16 - 1) | 
 | 233 | 			: "d0", "d1", "a0", "a1", "memory"); | 
 | 234 | 	else | 
 | 235 | 		asm volatile ("\n" | 
 | 236 | 			"1:	subq.l	#8,%0; subq.l #8,%0\n" | 
 | 237 | 			"	movem.l	%0@,%%d0/%%d1/%%a0/%%a1\n" | 
 | 238 | 			"	movem.l	%%d0/%%d1/%%a0/%%a1,-(%1)\n" | 
 | 239 | 			"	dbra	%2,1b\n" | 
 | 240 | 			"	clr.w	%2; subq.l #1,%2\n" | 
 | 241 | 			"	jcc 1b" | 
 | 242 | 			: "=a" (src), "=a" (dst), "=d" (size) | 
 | 243 | 			: "0" (src + size), "1" (dst + size), "2" (size / 16 - 1) | 
 | 244 | 			: "d0", "d1", "a0", "a1", "memory"); | 
 | 245 | } | 
 | 246 |  | 
 | 247 | #ifdef BPL | 
 | 248 |  | 
 | 249 | /* | 
 | 250 |  * This expands a up to 8 bit color into two longs | 
 | 251 |  * for movel operations. | 
 | 252 |  */ | 
 | 253 | static const u32 four2long[] = { | 
 | 254 | 	0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | 
 | 255 | 	0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | 
 | 256 | 	0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | 
 | 257 | 	0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, | 
 | 258 | }; | 
 | 259 |  | 
 | 260 | static inline void expand8_col2mask(u8 c, u32 m[]) | 
 | 261 | { | 
 | 262 | 	m[0] = four2long[c & 15]; | 
 | 263 | #if BPL > 4 | 
 | 264 | 	m[1] = four2long[c >> 4]; | 
 | 265 | #endif | 
 | 266 | } | 
 | 267 |  | 
 | 268 | static inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) | 
 | 269 | { | 
 | 270 | 	fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]); | 
 | 271 | #if BPL > 4 | 
 | 272 | 	fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]); | 
 | 273 | #endif | 
 | 274 | } | 
 | 275 |  | 
 | 276 | /* | 
 | 277 |  * set an 8bit value to a color | 
 | 278 |  */ | 
 | 279 | static inline void fill8_col(u8 *dst, u32 m[]) | 
 | 280 | { | 
 | 281 | 	u32 tmp = m[0]; | 
 | 282 | 	dst[0] = tmp; | 
 | 283 | 	dst[2] = (tmp >>= 8); | 
 | 284 | #if BPL > 2 | 
 | 285 | 	dst[4] = (tmp >>= 8); | 
 | 286 | 	dst[6] = tmp >> 8; | 
 | 287 | #endif | 
 | 288 | #if BPL > 4 | 
 | 289 | 	tmp = m[1]; | 
 | 290 | 	dst[8] = tmp; | 
 | 291 | 	dst[10] = (tmp >>= 8); | 
 | 292 | 	dst[12] = (tmp >>= 8); | 
 | 293 | 	dst[14] = tmp >> 8; | 
 | 294 | #endif | 
 | 295 | } | 
 | 296 |  | 
 | 297 | /* | 
 | 298 |  * set an 8bit value according to foreground/background color | 
 | 299 |  */ | 
 | 300 | static inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask) | 
 | 301 | { | 
 | 302 | 	u32 fgm[2], bgm[2], tmp; | 
 | 303 |  | 
 | 304 | 	expand8_2col2mask(fg, bg, fgm, bgm); | 
 | 305 |  | 
 | 306 | 	mask |= mask << 8; | 
 | 307 | #if BPL > 2 | 
 | 308 | 	mask |= mask << 16; | 
 | 309 | #endif | 
 | 310 | 	tmp = (mask & fgm[0]) ^ bgm[0]; | 
 | 311 | 	dst[0] = tmp; | 
 | 312 | 	dst[2] = (tmp >>= 8); | 
 | 313 | #if BPL > 2 | 
 | 314 | 	dst[4] = (tmp >>= 8); | 
 | 315 | 	dst[6] = tmp >> 8; | 
 | 316 | #endif | 
 | 317 | #if BPL > 4 | 
 | 318 | 	tmp = (mask & fgm[1]) ^ bgm[1]; | 
 | 319 | 	dst[8] = tmp; | 
 | 320 | 	dst[10] = (tmp >>= 8); | 
 | 321 | 	dst[12] = (tmp >>= 8); | 
 | 322 | 	dst[14] = tmp >> 8; | 
 | 323 | #endif | 
 | 324 | } | 
 | 325 |  | 
 | 326 | static const u32 two2word[] = { | 
 | 327 | 	0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff | 
 | 328 | }; | 
 | 329 |  | 
 | 330 | static inline void expand16_col2mask(u8 c, u32 m[]) | 
 | 331 | { | 
 | 332 | 	m[0] = two2word[c & 3]; | 
 | 333 | #if BPL > 2 | 
 | 334 | 	m[1] = two2word[(c >> 2) & 3]; | 
 | 335 | #endif | 
 | 336 | #if BPL > 4 | 
 | 337 | 	m[2] = two2word[(c >> 4) & 3]; | 
 | 338 | 	m[3] = two2word[c >> 6]; | 
 | 339 | #endif | 
 | 340 | } | 
 | 341 |  | 
 | 342 | static inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) | 
 | 343 | { | 
 | 344 | 	bgm[0] = two2word[bg & 3]; | 
 | 345 | 	fgm[0] = two2word[fg & 3] ^ bgm[0]; | 
 | 346 | #if BPL > 2 | 
 | 347 | 	bgm[1] = two2word[(bg >> 2) & 3]; | 
 | 348 | 	fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1]; | 
 | 349 | #endif | 
 | 350 | #if BPL > 4 | 
 | 351 | 	bgm[2] = two2word[(bg >> 4) & 3]; | 
 | 352 | 	fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2]; | 
 | 353 | 	bgm[3] = two2word[bg >> 6]; | 
 | 354 | 	fgm[3] = two2word[fg >> 6] ^ bgm[3]; | 
 | 355 | #endif | 
 | 356 | } | 
 | 357 |  | 
 | 358 | static inline u32 *fill16_col(u32 *dst, int rows, u32 m[]) | 
 | 359 | { | 
 | 360 | 	while (rows) { | 
 | 361 | 		*dst++ = m[0]; | 
 | 362 | #if BPL > 2 | 
 | 363 | 		*dst++ = m[1]; | 
 | 364 | #endif | 
 | 365 | #if BPL > 4 | 
 | 366 | 		*dst++ = m[2]; | 
 | 367 | 		*dst++ = m[3]; | 
 | 368 | #endif | 
 | 369 | 		rows--; | 
 | 370 | 	} | 
 | 371 | 	return dst; | 
 | 372 | } | 
 | 373 |  | 
 | 374 | static inline void memmove32_col(void *dst, void *src, u32 mask, u32 h, u32 bytes) | 
 | 375 | { | 
 | 376 | 	u32 *s, *d, v; | 
 | 377 |  | 
 | 378 |         s = src; | 
 | 379 |         d = dst; | 
 | 380 |         do { | 
 | 381 |                 v = (*s++ & mask) | (*d  & ~mask); | 
 | 382 |                 *d++ = v; | 
 | 383 | #if BPL > 2 | 
 | 384 |                 v = (*s++ & mask) | (*d  & ~mask); | 
 | 385 |                 *d++ = v; | 
 | 386 | #endif | 
 | 387 | #if BPL > 4 | 
 | 388 |                 v = (*s++ & mask) | (*d  & ~mask); | 
 | 389 |                 *d++ = v; | 
 | 390 |                 v = (*s++ & mask) | (*d  & ~mask); | 
 | 391 |                 *d++ = v; | 
 | 392 | #endif | 
 | 393 |                 d = (u32 *)((u8 *)d + bytes); | 
 | 394 |                 s = (u32 *)((u8 *)s + bytes); | 
 | 395 |         } while (--h); | 
 | 396 | } | 
 | 397 |  | 
 | 398 | #endif | 
 | 399 |  | 
 | 400 | #endif /* _VIDEO_ATAFB_UTILS_H */ |