| Michael Schmitz | a100501 | 2007-05-01 22:32:39 +0200 | [diff] [blame] | 1 | #ifndef _VIDEO_ATAFB_UTILS_H | 
|  | 2 | #define _VIDEO_ATAFB_UTILS_H | 
|  | 3 |  | 
|  | 4 | /* ================================================================= */ | 
|  | 5 | /*                      Utility Assembler Functions                  */ | 
|  | 6 | /* ================================================================= */ | 
|  | 7 |  | 
|  | 8 | /* ====================================================================== */ | 
|  | 9 |  | 
|  | 10 | /* Those of a delicate disposition might like to skip the next couple of | 
|  | 11 | * pages. | 
|  | 12 | * | 
|  | 13 | * These functions are drop in replacements for memmove and | 
|  | 14 | * memset(_, 0, _). However their five instances add at least a kilobyte | 
|  | 15 | * to the object file. You have been warned. | 
|  | 16 | * | 
|  | 17 | * Not a great fan of assembler for the sake of it, but I think | 
|  | 18 | * that these routines are at least 10 times faster than their C | 
|  | 19 | * equivalents for large blits, and that's important to the lowest level of | 
|  | 20 | * a graphics driver. Question is whether some scheme with the blitter | 
|  | 21 | * would be faster. I suspect not for simple text system - not much | 
|  | 22 | * asynchrony. | 
|  | 23 | * | 
|  | 24 | * Code is very simple, just gruesome expansion. Basic strategy is to | 
|  | 25 | * increase data moved/cleared at each step to 16 bytes to reduce | 
|  | 26 | * instruction per data move overhead. movem might be faster still | 
|  | 27 | * For more than 15 bytes, we try to align the write direction on a | 
|  | 28 | * longword boundary to get maximum speed. This is even more gruesome. | 
|  | 29 | * Unaligned read/write used requires 68020+ - think this is a problem? | 
|  | 30 | * | 
|  | 31 | * Sorry! | 
|  | 32 | */ | 
|  | 33 |  | 
|  | 34 |  | 
|  | 35 | /* ++roman: I've optimized Robert's original versions in some minor | 
|  | 36 | * aspects, e.g. moveq instead of movel, let gcc choose the registers, | 
|  | 37 | * use movem in some places... | 
|  | 38 | * For other modes than 1 plane, lots of more such assembler functions | 
|  | 39 | * were needed (e.g. the ones using movep or expanding color values). | 
|  | 40 | */ | 
|  | 41 |  | 
|  | 42 | /* ++andreas: more optimizations: | 
|  | 43 | subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc | 
|  | 44 | addal is faster than addaw | 
|  | 45 | movep is rather expensive compared to ordinary move's | 
|  | 46 | some functions rewritten in C for clarity, no speed loss */ | 
|  | 47 |  | 
|  | 48 | static inline void *fb_memclear_small(void *s, size_t count) | 
|  | 49 | { | 
|  | 50 | if (!count) | 
|  | 51 | return 0; | 
|  | 52 |  | 
|  | 53 | asm volatile ("\n" | 
|  | 54 | "	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n" | 
|  | 55 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n" | 
|  | 56 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n" | 
|  | 57 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" | 
|  | 58 | "1:" | 
|  | 59 | : "=a" (s), "=d" (count) | 
|  | 60 | : "d" (0), "0" ((char *)s + count), "1" (count)); | 
|  | 61 | asm volatile ("\n" | 
|  | 62 | "	subq.l  #1,%1\n" | 
|  | 63 | "	jcs	3f\n" | 
|  | 64 | "	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" | 
|  | 65 | "2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n" | 
|  | 66 | "	dbra	%1,2b\n" | 
|  | 67 | "3:" | 
|  | 68 | : "=a" (s), "=d" (count) | 
|  | 69 | : "d" (0), "0" (s), "1" (count) | 
|  | 70 | : "d4", "d5", "d6" | 
|  | 71 | ); | 
|  | 72 |  | 
|  | 73 | return 0; | 
|  | 74 | } | 
|  | 75 |  | 
|  | 76 |  | 
|  | 77 | static inline void *fb_memclear(void *s, size_t count) | 
|  | 78 | { | 
|  | 79 | if (!count) | 
|  | 80 | return 0; | 
|  | 81 |  | 
|  | 82 | if (count < 16) { | 
|  | 83 | asm volatile ("\n" | 
|  | 84 | "	lsr.l	#1,%1 ; jcc 1f ; clr.b (%0)+\n" | 
|  | 85 | "1:	lsr.l	#1,%1 ; jcc 1f ; clr.w (%0)+\n" | 
|  | 86 | "1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+\n" | 
|  | 87 | "1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n" | 
|  | 88 | "1:" | 
|  | 89 | : "=a" (s), "=d" (count) | 
|  | 90 | : "0" (s), "1" (count)); | 
|  | 91 | } else { | 
|  | 92 | long tmp; | 
|  | 93 | asm volatile ("\n" | 
|  | 94 | "	move.l	%1,%2\n" | 
|  | 95 | "	lsr.l	#1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n" | 
|  | 96 | "	lsr.l	#1,%2 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
|  | 97 | "	clr.w	(%0)+  ; subq.w  #2,%1 ; jra 2f\n" | 
|  | 98 | "1:	lsr.l	#1,%2 ; jcc 2f\n" | 
|  | 99 | "	clr.w	(%0)+  ; subq.w  #2,%1\n" | 
|  | 100 | "2:	move.w	%1,%2; lsr.l #2,%1 ; jeq 6f\n" | 
|  | 101 | "	lsr.l	#1,%1 ; jcc 3f ; clr.l (%0)+\n" | 
|  | 102 | "3:	lsr.l	#1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n" | 
|  | 103 | "4:	subq.l	#1,%1 ; jcs 6f\n" | 
|  | 104 | "5:	clr.l	(%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n" | 
|  | 105 | "	dbra	%1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n" | 
|  | 106 | "6:	move.w	%2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n" | 
|  | 107 | "7:	btst	#0,%1 ; jeq 8f ; clr.b (%0)+\n" | 
|  | 108 | "8:" | 
|  | 109 | : "=a" (s), "=d" (count), "=d" (tmp) | 
|  | 110 | : "0" (s), "1" (count)); | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | return 0; | 
|  | 114 | } | 
|  | 115 |  | 
|  | 116 |  | 
|  | 117 | static inline void *fb_memset255(void *s, size_t count) | 
|  | 118 | { | 
|  | 119 | if (!count) | 
|  | 120 | return 0; | 
|  | 121 |  | 
|  | 122 | asm volatile ("\n" | 
|  | 123 | "	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n" | 
|  | 124 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n" | 
|  | 125 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n" | 
|  | 126 | "1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n" | 
|  | 127 | "1:" | 
|  | 128 | : "=a" (s), "=d" (count) | 
|  | 129 | : "d" (-1), "0" ((char *)s+count), "1" (count)); | 
|  | 130 | asm volatile ("\n" | 
|  | 131 | "	subq.l	#1,%1 ; jcs 3f\n" | 
|  | 132 | "	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n" | 
|  | 133 | "2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n" | 
|  | 134 | "	dbra	%1,2b\n" | 
|  | 135 | "3:" | 
|  | 136 | : "=a" (s), "=d" (count) | 
|  | 137 | : "d" (-1), "0" (s), "1" (count) | 
|  | 138 | : "d4", "d5", "d6"); | 
|  | 139 |  | 
|  | 140 | return 0; | 
|  | 141 | } | 
|  | 142 |  | 
|  | 143 |  | 
|  | 144 | static inline void *fb_memmove(void *d, const void *s, size_t count) | 
|  | 145 | { | 
|  | 146 | if (d < s) { | 
|  | 147 | if (count < 16) { | 
|  | 148 | asm volatile ("\n" | 
|  | 149 | "	lsr.l	#1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n" | 
|  | 150 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n" | 
|  | 151 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n" | 
|  | 152 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" | 
|  | 153 | "1:" | 
|  | 154 | : "=a" (d), "=a" (s), "=d" (count) | 
|  | 155 | : "0" (d), "1" (s), "2" (count)); | 
|  | 156 | } else { | 
|  | 157 | long tmp; | 
|  | 158 | asm volatile ("\n" | 
|  | 159 | "	move.l	%0,%3\n" | 
|  | 160 | "	lsr.l	#1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n" | 
|  | 161 | "	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
|  | 162 | "	move.w	(%1)+,(%0)+  ; subqw  #2,%2 ; jra 2f\n" | 
|  | 163 | "1:	lsr.l   #1,%3 ; jcc 2f\n" | 
|  | 164 | "	move.w	(%1)+,(%0)+  ; subqw  #2,%2\n" | 
|  | 165 | "2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n" | 
|  | 166 | "	lsr.l	#1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n" | 
|  | 167 | "3:	lsr.l	#1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n" | 
|  | 168 | "4:	subq.l	#1,%2 ; jcs 6f\n" | 
|  | 169 | "5:	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n" | 
|  | 170 | "	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n" | 
|  | 171 | "	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" | 
|  | 172 | "6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n" | 
|  | 173 | "7:	btst	#0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n" | 
|  | 174 | "8:" | 
|  | 175 | : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) | 
|  | 176 | : "0" (d), "1" (s), "2" (count)); | 
|  | 177 | } | 
|  | 178 | } else { | 
|  | 179 | if (count < 16) { | 
|  | 180 | asm volatile ("\n" | 
|  | 181 | "	lsr.l	#1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n" | 
|  | 182 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n" | 
|  | 183 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n" | 
|  | 184 | "1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" | 
|  | 185 | "1:" | 
|  | 186 | : "=a" (d), "=a" (s), "=d" (count) | 
|  | 187 | : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); | 
|  | 188 | } else { | 
|  | 189 | long tmp; | 
|  | 190 |  | 
|  | 191 | asm volatile ("\n" | 
|  | 192 | "	move.l	%0,%3\n" | 
|  | 193 | "	lsr.l	#1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n" | 
|  | 194 | "	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/ | 
|  | 195 | "	move.w	-(%1),-(%0) ; subqw  #2,%2 ; jra 2f\n" | 
|  | 196 | "1:	lsr.l	#1,%3 ; jcc 2f\n" | 
|  | 197 | "	move.w	-(%1),-(%0) ; subqw  #2,%2\n" | 
|  | 198 | "2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n" | 
|  | 199 | "	lsr.l	#1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n" | 
|  | 200 | "3:	lsr.l	#1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n" | 
|  | 201 | "4:	subq.l	#1,%2 ; jcs 6f\n" | 
|  | 202 | "5:	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n" | 
|  | 203 | "	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n" | 
|  | 204 | "	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n" | 
|  | 205 | "6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n" | 
|  | 206 | "7:	btst	#0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n" | 
|  | 207 | "8:" | 
|  | 208 | : "=a" (d), "=a" (s), "=d" (count), "=d" (tmp) | 
|  | 209 | : "0" ((char *) d + count), "1" ((char *) s + count), "2" (count)); | 
|  | 210 | } | 
|  | 211 | } | 
|  | 212 |  | 
|  | 213 | return 0; | 
|  | 214 | } | 
|  | 215 |  | 
|  | 216 |  | 
|  | 217 | /* ++andreas: Simple and fast version of memmove, assumes size is | 
|  | 218 | divisible by 16, suitable for moving the whole screen bitplane */ | 
|  | 219 | static inline void fast_memmove(char *dst, const char *src, size_t size) | 
|  | 220 | { | 
|  | 221 | if (!size) | 
|  | 222 | return; | 
|  | 223 | if (dst < src) | 
|  | 224 | asm volatile ("\n" | 
|  | 225 | "1:	movem.l	(%0)+,%%d0/%%d1/%%a0/%%a1\n" | 
|  | 226 | "	movem.l	%%d0/%%d1/%%a0/%%a1,%1@\n" | 
|  | 227 | "	addq.l	#8,%1; addq.l #8,%1\n" | 
|  | 228 | "	dbra	%2,1b\n" | 
|  | 229 | "	clr.w	%2; subq.l #1,%2\n" | 
|  | 230 | "	jcc	1b" | 
|  | 231 | : "=a" (src), "=a" (dst), "=d" (size) | 
|  | 232 | : "0" (src), "1" (dst), "2" (size / 16 - 1) | 
|  | 233 | : "d0", "d1", "a0", "a1", "memory"); | 
|  | 234 | else | 
|  | 235 | asm volatile ("\n" | 
|  | 236 | "1:	subq.l	#8,%0; subq.l #8,%0\n" | 
|  | 237 | "	movem.l	%0@,%%d0/%%d1/%%a0/%%a1\n" | 
|  | 238 | "	movem.l	%%d0/%%d1/%%a0/%%a1,-(%1)\n" | 
|  | 239 | "	dbra	%2,1b\n" | 
|  | 240 | "	clr.w	%2; subq.l #1,%2\n" | 
|  | 241 | "	jcc 1b" | 
|  | 242 | : "=a" (src), "=a" (dst), "=d" (size) | 
|  | 243 | : "0" (src + size), "1" (dst + size), "2" (size / 16 - 1) | 
|  | 244 | : "d0", "d1", "a0", "a1", "memory"); | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | #ifdef BPL | 
|  | 248 |  | 
|  | 249 | /* | 
|  | 250 | * This expands a up to 8 bit color into two longs | 
|  | 251 | * for movel operations. | 
|  | 252 | */ | 
|  | 253 | static const u32 four2long[] = { | 
|  | 254 | 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, | 
|  | 255 | 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, | 
|  | 256 | 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, | 
|  | 257 | 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, | 
|  | 258 | }; | 
|  | 259 |  | 
|  | 260 | static inline void expand8_col2mask(u8 c, u32 m[]) | 
|  | 261 | { | 
|  | 262 | m[0] = four2long[c & 15]; | 
|  | 263 | #if BPL > 4 | 
|  | 264 | m[1] = four2long[c >> 4]; | 
|  | 265 | #endif | 
|  | 266 | } | 
|  | 267 |  | 
|  | 268 | static inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) | 
|  | 269 | { | 
|  | 270 | fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]); | 
|  | 271 | #if BPL > 4 | 
|  | 272 | fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]); | 
|  | 273 | #endif | 
|  | 274 | } | 
|  | 275 |  | 
|  | 276 | /* | 
|  | 277 | * set an 8bit value to a color | 
|  | 278 | */ | 
|  | 279 | static inline void fill8_col(u8 *dst, u32 m[]) | 
|  | 280 | { | 
|  | 281 | u32 tmp = m[0]; | 
|  | 282 | dst[0] = tmp; | 
|  | 283 | dst[2] = (tmp >>= 8); | 
|  | 284 | #if BPL > 2 | 
|  | 285 | dst[4] = (tmp >>= 8); | 
|  | 286 | dst[6] = tmp >> 8; | 
|  | 287 | #endif | 
|  | 288 | #if BPL > 4 | 
|  | 289 | tmp = m[1]; | 
|  | 290 | dst[8] = tmp; | 
|  | 291 | dst[10] = (tmp >>= 8); | 
|  | 292 | dst[12] = (tmp >>= 8); | 
|  | 293 | dst[14] = tmp >> 8; | 
|  | 294 | #endif | 
|  | 295 | } | 
|  | 296 |  | 
|  | 297 | /* | 
|  | 298 | * set an 8bit value according to foreground/background color | 
|  | 299 | */ | 
|  | 300 | static inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask) | 
|  | 301 | { | 
|  | 302 | u32 fgm[2], bgm[2], tmp; | 
|  | 303 |  | 
|  | 304 | expand8_2col2mask(fg, bg, fgm, bgm); | 
|  | 305 |  | 
|  | 306 | mask |= mask << 8; | 
|  | 307 | #if BPL > 2 | 
|  | 308 | mask |= mask << 16; | 
|  | 309 | #endif | 
|  | 310 | tmp = (mask & fgm[0]) ^ bgm[0]; | 
|  | 311 | dst[0] = tmp; | 
|  | 312 | dst[2] = (tmp >>= 8); | 
|  | 313 | #if BPL > 2 | 
|  | 314 | dst[4] = (tmp >>= 8); | 
|  | 315 | dst[6] = tmp >> 8; | 
|  | 316 | #endif | 
|  | 317 | #if BPL > 4 | 
|  | 318 | tmp = (mask & fgm[1]) ^ bgm[1]; | 
|  | 319 | dst[8] = tmp; | 
|  | 320 | dst[10] = (tmp >>= 8); | 
|  | 321 | dst[12] = (tmp >>= 8); | 
|  | 322 | dst[14] = tmp >> 8; | 
|  | 323 | #endif | 
|  | 324 | } | 
|  | 325 |  | 
|  | 326 | static const u32 two2word[] = { | 
|  | 327 | 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff | 
|  | 328 | }; | 
|  | 329 |  | 
|  | 330 | static inline void expand16_col2mask(u8 c, u32 m[]) | 
|  | 331 | { | 
|  | 332 | m[0] = two2word[c & 3]; | 
|  | 333 | #if BPL > 2 | 
|  | 334 | m[1] = two2word[(c >> 2) & 3]; | 
|  | 335 | #endif | 
|  | 336 | #if BPL > 4 | 
|  | 337 | m[2] = two2word[(c >> 4) & 3]; | 
|  | 338 | m[3] = two2word[c >> 6]; | 
|  | 339 | #endif | 
|  | 340 | } | 
|  | 341 |  | 
|  | 342 | static inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[]) | 
|  | 343 | { | 
|  | 344 | bgm[0] = two2word[bg & 3]; | 
|  | 345 | fgm[0] = two2word[fg & 3] ^ bgm[0]; | 
|  | 346 | #if BPL > 2 | 
|  | 347 | bgm[1] = two2word[(bg >> 2) & 3]; | 
|  | 348 | fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1]; | 
|  | 349 | #endif | 
|  | 350 | #if BPL > 4 | 
|  | 351 | bgm[2] = two2word[(bg >> 4) & 3]; | 
|  | 352 | fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2]; | 
|  | 353 | bgm[3] = two2word[bg >> 6]; | 
|  | 354 | fgm[3] = two2word[fg >> 6] ^ bgm[3]; | 
|  | 355 | #endif | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | static inline u32 *fill16_col(u32 *dst, int rows, u32 m[]) | 
|  | 359 | { | 
|  | 360 | while (rows) { | 
|  | 361 | *dst++ = m[0]; | 
|  | 362 | #if BPL > 2 | 
|  | 363 | *dst++ = m[1]; | 
|  | 364 | #endif | 
|  | 365 | #if BPL > 4 | 
|  | 366 | *dst++ = m[2]; | 
|  | 367 | *dst++ = m[3]; | 
|  | 368 | #endif | 
|  | 369 | rows--; | 
|  | 370 | } | 
|  | 371 | return dst; | 
|  | 372 | } | 
|  | 373 |  | 
|  | 374 | static inline void memmove32_col(void *dst, void *src, u32 mask, u32 h, u32 bytes) | 
|  | 375 | { | 
|  | 376 | u32 *s, *d, v; | 
|  | 377 |  | 
|  | 378 | s = src; | 
|  | 379 | d = dst; | 
|  | 380 | do { | 
|  | 381 | v = (*s++ & mask) | (*d  & ~mask); | 
|  | 382 | *d++ = v; | 
|  | 383 | #if BPL > 2 | 
|  | 384 | v = (*s++ & mask) | (*d  & ~mask); | 
|  | 385 | *d++ = v; | 
|  | 386 | #endif | 
|  | 387 | #if BPL > 4 | 
|  | 388 | v = (*s++ & mask) | (*d  & ~mask); | 
|  | 389 | *d++ = v; | 
|  | 390 | v = (*s++ & mask) | (*d  & ~mask); | 
|  | 391 | *d++ = v; | 
|  | 392 | #endif | 
|  | 393 | d = (u32 *)((u8 *)d + bytes); | 
|  | 394 | s = (u32 *)((u8 *)s + bytes); | 
|  | 395 | } while (--h); | 
|  | 396 | } | 
|  | 397 |  | 
|  | 398 | #endif | 
|  | 399 |  | 
|  | 400 | #endif /* _VIDEO_ATAFB_UTILS_H */ |