| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #ifndef _I386_STRING_H_ | 
 | 2 | #define _I386_STRING_H_ | 
 | 3 |  | 
 | 4 | #ifdef __KERNEL__ | 
 | 5 | #include <linux/config.h> | 
 | 6 | /* | 
 | 7 |  * On a 486 or Pentium, we are better off not using the | 
 | 8 |  * byte string operations. But on a 386 or a PPro the | 
 | 9 |  * byte string ops are faster than doing it by hand | 
 | 10 |  * (MUCH faster on a Pentium). | 
 | 11 |  */ | 
 | 12 |  | 
 | 13 | /* | 
 | 14 |  * This string-include defines all string functions as inline | 
 | 15 |  * functions. Use gcc. It also assumes ds=es=data space, this should be | 
 | 16 |  * normal. Most of the string-functions are rather heavily hand-optimized, | 
 | 17 |  * see especially strsep,strstr,str[c]spn. They should work, but are not | 
 | 18 |  * very easy to understand. Everything is done entirely within the register | 
 | 19 |  * set, making the functions fast and clean. String instructions have been | 
 | 20 |  * used through-out, making for "slightly" unclear code :-) | 
 | 21 |  * | 
 | 22 |  *		NO Copyright (C) 1991, 1992 Linus Torvalds, | 
 | 23 |  *		consider these trivial functions to be PD. | 
 | 24 |  */ | 
 | 25 |  | 
 | 26 | /* AK: in fact I bet it would be better to move this stuff all out of line. | 
 | 27 |  */ | 
 | 28 |  | 
 | 29 | #define __HAVE_ARCH_STRCPY | 
 | 30 | static inline char * strcpy(char * dest,const char *src) | 
 | 31 | { | 
 | 32 | int d0, d1, d2; | 
 | 33 | __asm__ __volatile__( | 
 | 34 | 	"1:\tlodsb\n\t" | 
 | 35 | 	"stosb\n\t" | 
 | 36 | 	"testb %%al,%%al\n\t" | 
 | 37 | 	"jne 1b" | 
 | 38 | 	: "=&S" (d0), "=&D" (d1), "=&a" (d2) | 
 | 39 | 	:"0" (src),"1" (dest) : "memory"); | 
 | 40 | return dest; | 
 | 41 | } | 
 | 42 |  | 
 | 43 | #define __HAVE_ARCH_STRNCPY | 
 | 44 | static inline char * strncpy(char * dest,const char *src,size_t count) | 
 | 45 | { | 
 | 46 | int d0, d1, d2, d3; | 
 | 47 | __asm__ __volatile__( | 
 | 48 | 	"1:\tdecl %2\n\t" | 
 | 49 | 	"js 2f\n\t" | 
 | 50 | 	"lodsb\n\t" | 
 | 51 | 	"stosb\n\t" | 
 | 52 | 	"testb %%al,%%al\n\t" | 
 | 53 | 	"jne 1b\n\t" | 
 | 54 | 	"rep\n\t" | 
 | 55 | 	"stosb\n" | 
 | 56 | 	"2:" | 
 | 57 | 	: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) | 
 | 58 | 	:"0" (src),"1" (dest),"2" (count) : "memory"); | 
 | 59 | return dest; | 
 | 60 | } | 
 | 61 |  | 
 | 62 | #define __HAVE_ARCH_STRCAT | 
 | 63 | static inline char * strcat(char * dest,const char * src) | 
 | 64 | { | 
 | 65 | int d0, d1, d2, d3; | 
 | 66 | __asm__ __volatile__( | 
 | 67 | 	"repne\n\t" | 
 | 68 | 	"scasb\n\t" | 
 | 69 | 	"decl %1\n" | 
 | 70 | 	"1:\tlodsb\n\t" | 
 | 71 | 	"stosb\n\t" | 
 | 72 | 	"testb %%al,%%al\n\t" | 
 | 73 | 	"jne 1b" | 
 | 74 | 	: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) | 
 | 75 | 	: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory"); | 
 | 76 | return dest; | 
 | 77 | } | 
 | 78 |  | 
 | 79 | #define __HAVE_ARCH_STRNCAT | 
 | 80 | static inline char * strncat(char * dest,const char * src,size_t count) | 
 | 81 | { | 
 | 82 | int d0, d1, d2, d3; | 
 | 83 | __asm__ __volatile__( | 
 | 84 | 	"repne\n\t" | 
 | 85 | 	"scasb\n\t" | 
 | 86 | 	"decl %1\n\t" | 
 | 87 | 	"movl %8,%3\n" | 
 | 88 | 	"1:\tdecl %3\n\t" | 
 | 89 | 	"js 2f\n\t" | 
 | 90 | 	"lodsb\n\t" | 
 | 91 | 	"stosb\n\t" | 
 | 92 | 	"testb %%al,%%al\n\t" | 
 | 93 | 	"jne 1b\n" | 
 | 94 | 	"2:\txorl %2,%2\n\t" | 
 | 95 | 	"stosb" | 
 | 96 | 	: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) | 
 | 97 | 	: "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) | 
 | 98 | 	: "memory"); | 
 | 99 | return dest; | 
 | 100 | } | 
 | 101 |  | 
 | 102 | #define __HAVE_ARCH_STRCMP | 
 | 103 | static inline int strcmp(const char * cs,const char * ct) | 
 | 104 | { | 
 | 105 | int d0, d1; | 
 | 106 | register int __res; | 
 | 107 | __asm__ __volatile__( | 
 | 108 | 	"1:\tlodsb\n\t" | 
 | 109 | 	"scasb\n\t" | 
 | 110 | 	"jne 2f\n\t" | 
 | 111 | 	"testb %%al,%%al\n\t" | 
 | 112 | 	"jne 1b\n\t" | 
 | 113 | 	"xorl %%eax,%%eax\n\t" | 
 | 114 | 	"jmp 3f\n" | 
 | 115 | 	"2:\tsbbl %%eax,%%eax\n\t" | 
 | 116 | 	"orb $1,%%al\n" | 
 | 117 | 	"3:" | 
 | 118 | 	:"=a" (__res), "=&S" (d0), "=&D" (d1) | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 119 | 	:"1" (cs),"2" (ct) | 
 | 120 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | return __res; | 
 | 122 | } | 
 | 123 |  | 
 | 124 | #define __HAVE_ARCH_STRNCMP | 
 | 125 | static inline int strncmp(const char * cs,const char * ct,size_t count) | 
 | 126 | { | 
 | 127 | register int __res; | 
 | 128 | int d0, d1, d2; | 
 | 129 | __asm__ __volatile__( | 
 | 130 | 	"1:\tdecl %3\n\t" | 
 | 131 | 	"js 2f\n\t" | 
 | 132 | 	"lodsb\n\t" | 
 | 133 | 	"scasb\n\t" | 
 | 134 | 	"jne 3f\n\t" | 
 | 135 | 	"testb %%al,%%al\n\t" | 
 | 136 | 	"jne 1b\n" | 
 | 137 | 	"2:\txorl %%eax,%%eax\n\t" | 
 | 138 | 	"jmp 4f\n" | 
 | 139 | 	"3:\tsbbl %%eax,%%eax\n\t" | 
 | 140 | 	"orb $1,%%al\n" | 
 | 141 | 	"4:" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 142 | 	:"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) | 
 | 143 | 	:"1" (cs),"2" (ct),"3" (count) | 
 | 144 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 145 | return __res; | 
 | 146 | } | 
 | 147 |  | 
 | 148 | #define __HAVE_ARCH_STRCHR | 
 | 149 | static inline char * strchr(const char * s, int c) | 
 | 150 | { | 
 | 151 | int d0; | 
 | 152 | register char * __res; | 
 | 153 | __asm__ __volatile__( | 
 | 154 | 	"movb %%al,%%ah\n" | 
 | 155 | 	"1:\tlodsb\n\t" | 
 | 156 | 	"cmpb %%ah,%%al\n\t" | 
 | 157 | 	"je 2f\n\t" | 
 | 158 | 	"testb %%al,%%al\n\t" | 
 | 159 | 	"jne 1b\n\t" | 
 | 160 | 	"movl $1,%1\n" | 
 | 161 | 	"2:\tmovl %1,%0\n\t" | 
 | 162 | 	"decl %0" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 163 | 	:"=a" (__res), "=&S" (d0) | 
 | 164 | 	:"1" (s),"0" (c) | 
 | 165 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 166 | return __res; | 
 | 167 | } | 
 | 168 |  | 
 | 169 | #define __HAVE_ARCH_STRRCHR | 
 | 170 | static inline char * strrchr(const char * s, int c) | 
 | 171 | { | 
 | 172 | int d0, d1; | 
 | 173 | register char * __res; | 
 | 174 | __asm__ __volatile__( | 
 | 175 | 	"movb %%al,%%ah\n" | 
 | 176 | 	"1:\tlodsb\n\t" | 
 | 177 | 	"cmpb %%ah,%%al\n\t" | 
 | 178 | 	"jne 2f\n\t" | 
 | 179 | 	"leal -1(%%esi),%0\n" | 
 | 180 | 	"2:\ttestb %%al,%%al\n\t" | 
 | 181 | 	"jne 1b" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 182 | 	:"=g" (__res), "=&S" (d0), "=&a" (d1) | 
 | 183 | 	:"0" (0),"1" (s),"2" (c) | 
 | 184 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 185 | return __res; | 
 | 186 | } | 
 | 187 |  | 
 | 188 | #define __HAVE_ARCH_STRLEN | 
 | 189 | static inline size_t strlen(const char * s) | 
 | 190 | { | 
 | 191 | int d0; | 
 | 192 | register int __res; | 
 | 193 | __asm__ __volatile__( | 
 | 194 | 	"repne\n\t" | 
 | 195 | 	"scasb\n\t" | 
 | 196 | 	"notl %0\n\t" | 
 | 197 | 	"decl %0" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 198 | 	:"=c" (__res), "=&D" (d0) | 
 | 199 | 	:"1" (s),"a" (0), "0" (0xffffffffu) | 
 | 200 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 201 | return __res; | 
 | 202 | } | 
 | 203 |  | 
 | 204 | static inline void * __memcpy(void * to, const void * from, size_t n) | 
 | 205 | { | 
 | 206 | int d0, d1, d2; | 
 | 207 | __asm__ __volatile__( | 
 | 208 | 	"rep ; movsl\n\t" | 
| Denis Vlasenko | d5b63d7 | 2005-05-01 08:58:48 -0700 | [diff] [blame] | 209 | 	"movl %4,%%ecx\n\t" | 
 | 210 | 	"andl $3,%%ecx\n\t" | 
 | 211 | #if 1	/* want to pay 2 byte penalty for a chance to skip microcoded rep? */ | 
 | 212 | 	"jz 1f\n\t" | 
 | 213 | #endif | 
 | 214 | 	"rep ; movsb\n\t" | 
 | 215 | 	"1:" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 216 | 	: "=&c" (d0), "=&D" (d1), "=&S" (d2) | 
| Denis Vlasenko | d5b63d7 | 2005-05-01 08:58:48 -0700 | [diff] [blame] | 217 | 	: "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 218 | 	: "memory"); | 
 | 219 | return (to); | 
 | 220 | } | 
 | 221 |  | 
 | 222 | /* | 
| Denis Vlasenko | d5b63d7 | 2005-05-01 08:58:48 -0700 | [diff] [blame] | 223 |  * This looks ugly, but the compiler can optimize it totally, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 224 |  * as the count is constant. | 
 | 225 |  */ | 
 | 226 | static inline void * __constant_memcpy(void * to, const void * from, size_t n) | 
 | 227 | { | 
| Denis Vlasenko | d5b63d7 | 2005-05-01 08:58:48 -0700 | [diff] [blame] | 228 | 	long esi, edi; | 
 | 229 | 	if (!n) return to; | 
 | 230 | #if 1	/* want to do small copies with non-string ops? */ | 
 | 231 | 	switch (n) { | 
 | 232 | 		case 1: *(char*)to = *(char*)from; return to; | 
 | 233 | 		case 2: *(short*)to = *(short*)from; return to; | 
 | 234 | 		case 4: *(int*)to = *(int*)from; return to; | 
 | 235 | #if 1	/* including those doable with two moves? */ | 
 | 236 | 		case 3: *(short*)to = *(short*)from; | 
 | 237 | 			*((char*)to+2) = *((char*)from+2); return to; | 
 | 238 | 		case 5: *(int*)to = *(int*)from; | 
 | 239 | 			*((char*)to+4) = *((char*)from+4); return to; | 
 | 240 | 		case 6: *(int*)to = *(int*)from; | 
 | 241 | 			*((short*)to+2) = *((short*)from+2); return to; | 
 | 242 | 		case 8: *(int*)to = *(int*)from; | 
 | 243 | 			*((int*)to+1) = *((int*)from+1); return to; | 
 | 244 | #endif | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 245 | 	} | 
| Denis Vlasenko | d5b63d7 | 2005-05-01 08:58:48 -0700 | [diff] [blame] | 246 | #endif | 
 | 247 | 	esi = (long) from; | 
 | 248 | 	edi = (long) to; | 
 | 249 | 	if (n >= 5*4) { | 
 | 250 | 		/* large block: use rep prefix */ | 
 | 251 | 		int ecx; | 
 | 252 | 		__asm__ __volatile__( | 
 | 253 | 			"rep ; movsl" | 
 | 254 | 			: "=&c" (ecx), "=&D" (edi), "=&S" (esi) | 
 | 255 | 			: "0" (n/4), "1" (edi),"2" (esi) | 
 | 256 | 			: "memory" | 
 | 257 | 		); | 
 | 258 | 	} else { | 
 | 259 | 		/* small block: don't clobber ecx + smaller code */ | 
 | 260 | 		if (n >= 4*4) __asm__ __volatile__("movsl" | 
 | 261 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 262 | 		if (n >= 3*4) __asm__ __volatile__("movsl" | 
 | 263 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 264 | 		if (n >= 2*4) __asm__ __volatile__("movsl" | 
 | 265 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 266 | 		if (n >= 1*4) __asm__ __volatile__("movsl" | 
 | 267 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 268 | 	} | 
 | 269 | 	switch (n % 4) { | 
 | 270 | 		/* tail */ | 
 | 271 | 		case 0: return to; | 
 | 272 | 		case 1: __asm__ __volatile__("movsb" | 
 | 273 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 274 | 			return to; | 
 | 275 | 		case 2: __asm__ __volatile__("movsw" | 
 | 276 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 277 | 			return to; | 
 | 278 | 		default: __asm__ __volatile__("movsw\n\tmovsb" | 
 | 279 | 			:"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 
 | 280 | 			return to; | 
 | 281 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | } | 
 | 283 |  | 
 | 284 | #define __HAVE_ARCH_MEMCPY | 
 | 285 |  | 
 | 286 | #ifdef CONFIG_X86_USE_3DNOW | 
 | 287 |  | 
 | 288 | #include <asm/mmx.h> | 
 | 289 |  | 
 | 290 | /* | 
 | 291 |  *	This CPU favours 3DNow strongly (eg AMD Athlon) | 
 | 292 |  */ | 
 | 293 |  | 
 | 294 | static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) | 
 | 295 | { | 
 | 296 | 	if (len < 512) | 
 | 297 | 		return __constant_memcpy(to, from, len); | 
 | 298 | 	return _mmx_memcpy(to, from, len); | 
 | 299 | } | 
 | 300 |  | 
 | 301 | static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) | 
 | 302 | { | 
 | 303 | 	if (len < 512) | 
 | 304 | 		return __memcpy(to, from, len); | 
 | 305 | 	return _mmx_memcpy(to, from, len); | 
 | 306 | } | 
 | 307 |  | 
 | 308 | #define memcpy(t, f, n) \ | 
 | 309 | (__builtin_constant_p(n) ? \ | 
 | 310 |  __constant_memcpy3d((t),(f),(n)) : \ | 
 | 311 |  __memcpy3d((t),(f),(n))) | 
 | 312 |  | 
 | 313 | #else | 
 | 314 |  | 
 | 315 | /* | 
 | 316 |  *	No 3D Now! | 
 | 317 |  */ | 
 | 318 |   | 
 | 319 | #define memcpy(t, f, n) \ | 
 | 320 | (__builtin_constant_p(n) ? \ | 
 | 321 |  __constant_memcpy((t),(f),(n)) : \ | 
 | 322 |  __memcpy((t),(f),(n))) | 
 | 323 |  | 
 | 324 | #endif | 
 | 325 |  | 
 | 326 | #define __HAVE_ARCH_MEMMOVE | 
 | 327 | void *memmove(void * dest,const void * src, size_t n); | 
 | 328 |  | 
 | 329 | #define memcmp __builtin_memcmp | 
 | 330 |  | 
 | 331 | #define __HAVE_ARCH_MEMCHR | 
 | 332 | static inline void * memchr(const void * cs,int c,size_t count) | 
 | 333 | { | 
 | 334 | int d0; | 
 | 335 | register void * __res; | 
 | 336 | if (!count) | 
 | 337 | 	return NULL; | 
 | 338 | __asm__ __volatile__( | 
 | 339 | 	"repne\n\t" | 
 | 340 | 	"scasb\n\t" | 
 | 341 | 	"je 1f\n\t" | 
 | 342 | 	"movl $1,%0\n" | 
 | 343 | 	"1:\tdecl %0" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 344 | 	:"=D" (__res), "=&c" (d0) | 
 | 345 | 	:"a" (c),"0" (cs),"1" (count) | 
 | 346 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 347 | return __res; | 
 | 348 | } | 
 | 349 |  | 
 | 350 | static inline void * __memset_generic(void * s, char c,size_t count) | 
 | 351 | { | 
 | 352 | int d0, d1; | 
 | 353 | __asm__ __volatile__( | 
 | 354 | 	"rep\n\t" | 
 | 355 | 	"stosb" | 
 | 356 | 	: "=&c" (d0), "=&D" (d1) | 
 | 357 | 	:"a" (c),"1" (s),"0" (count) | 
 | 358 | 	:"memory"); | 
 | 359 | return s; | 
 | 360 | } | 
 | 361 |  | 
 | 362 | /* we might want to write optimized versions of these later */ | 
 | 363 | #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) | 
 | 364 |  | 
 | 365 | /* | 
 | 366 |  * memset(x,0,y) is a reasonably common thing to do, so we want to fill | 
 | 367 |  * things 32 bits at a time even when we don't know the size of the | 
 | 368 |  * area at compile-time.. | 
 | 369 |  */ | 
 | 370 | static inline void * __constant_c_memset(void * s, unsigned long c, size_t count) | 
 | 371 | { | 
 | 372 | int d0, d1; | 
 | 373 | __asm__ __volatile__( | 
 | 374 | 	"rep ; stosl\n\t" | 
 | 375 | 	"testb $2,%b3\n\t" | 
 | 376 | 	"je 1f\n\t" | 
 | 377 | 	"stosw\n" | 
 | 378 | 	"1:\ttestb $1,%b3\n\t" | 
 | 379 | 	"je 2f\n\t" | 
 | 380 | 	"stosb\n" | 
 | 381 | 	"2:" | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 382 | 	:"=&c" (d0), "=&D" (d1) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 383 | 	:"a" (c), "q" (count), "0" (count/4), "1" ((long) s) | 
 | 384 | 	:"memory"); | 
 | 385 | return (s);	 | 
 | 386 | } | 
 | 387 |  | 
 | 388 | /* Added by Gertjan van Wingerde to make minix and sysv module work */ | 
 | 389 | #define __HAVE_ARCH_STRNLEN | 
 | 390 | static inline size_t strnlen(const char * s, size_t count) | 
 | 391 | { | 
 | 392 | int d0; | 
 | 393 | register int __res; | 
 | 394 | __asm__ __volatile__( | 
 | 395 | 	"movl %2,%0\n\t" | 
 | 396 | 	"jmp 2f\n" | 
 | 397 | 	"1:\tcmpb $0,(%0)\n\t" | 
 | 398 | 	"je 3f\n\t" | 
 | 399 | 	"incl %0\n" | 
 | 400 | 	"2:\tdecl %1\n\t" | 
 | 401 | 	"cmpl $-1,%1\n\t" | 
 | 402 | 	"jne 1b\n" | 
 | 403 | 	"3:\tsubl %2,%0" | 
 | 404 | 	:"=a" (__res), "=&d" (d0) | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 405 | 	:"c" (s),"1" (count) | 
 | 406 | 	:"memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 407 | return __res; | 
 | 408 | } | 
 | 409 | /* end of additional stuff */ | 
 | 410 |  | 
 | 411 | #define __HAVE_ARCH_STRSTR | 
 | 412 |  | 
 | 413 | extern char *strstr(const char *cs, const char *ct); | 
 | 414 |  | 
 | 415 | /* | 
 | 416 |  * This looks horribly ugly, but the compiler can optimize it totally, | 
 | 417 |  * as we by now know that both pattern and count is constant.. | 
 | 418 |  */ | 
 | 419 | static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) | 
 | 420 | { | 
 | 421 | 	switch (count) { | 
 | 422 | 		case 0: | 
 | 423 | 			return s; | 
 | 424 | 		case 1: | 
 | 425 | 			*(unsigned char *)s = pattern; | 
 | 426 | 			return s; | 
 | 427 | 		case 2: | 
 | 428 | 			*(unsigned short *)s = pattern; | 
 | 429 | 			return s; | 
 | 430 | 		case 3: | 
 | 431 | 			*(unsigned short *)s = pattern; | 
 | 432 | 			*(2+(unsigned char *)s) = pattern; | 
 | 433 | 			return s; | 
 | 434 | 		case 4: | 
 | 435 | 			*(unsigned long *)s = pattern; | 
 | 436 | 			return s; | 
 | 437 | 	} | 
 | 438 | #define COMMON(x) \ | 
 | 439 | __asm__  __volatile__( \ | 
 | 440 | 	"rep ; stosl" \ | 
 | 441 | 	x \ | 
 | 442 | 	: "=&c" (d0), "=&D" (d1) \ | 
 | 443 | 	: "a" (pattern),"0" (count/4),"1" ((long) s) \ | 
 | 444 | 	: "memory") | 
 | 445 | { | 
 | 446 | 	int d0, d1; | 
 | 447 | 	switch (count % 4) { | 
 | 448 | 		case 0: COMMON(""); return s; | 
 | 449 | 		case 1: COMMON("\n\tstosb"); return s; | 
 | 450 | 		case 2: COMMON("\n\tstosw"); return s; | 
 | 451 | 		default: COMMON("\n\tstosw\n\tstosb"); return s; | 
 | 452 | 	} | 
 | 453 | } | 
 | 454 |    | 
 | 455 | #undef COMMON | 
 | 456 | } | 
 | 457 |  | 
 | 458 | #define __constant_c_x_memset(s, c, count) \ | 
 | 459 | (__builtin_constant_p(count) ? \ | 
 | 460 |  __constant_c_and_count_memset((s),(c),(count)) : \ | 
 | 461 |  __constant_c_memset((s),(c),(count))) | 
 | 462 |  | 
 | 463 | #define __memset(s, c, count) \ | 
 | 464 | (__builtin_constant_p(count) ? \ | 
 | 465 |  __constant_count_memset((s),(c),(count)) : \ | 
 | 466 |  __memset_generic((s),(c),(count))) | 
 | 467 |  | 
 | 468 | #define __HAVE_ARCH_MEMSET | 
 | 469 | #define memset(s, c, count) \ | 
 | 470 | (__builtin_constant_p(c) ? \ | 
 | 471 |  __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ | 
 | 472 |  __memset((s),(c),(count))) | 
 | 473 |  | 
 | 474 | /* | 
 | 475 |  * find the first occurrence of byte 'c', or 1 past the area if none | 
 | 476 |  */ | 
 | 477 | #define __HAVE_ARCH_MEMSCAN | 
 | 478 | static inline void * memscan(void * addr, int c, size_t size) | 
 | 479 | { | 
 | 480 | 	if (!size) | 
 | 481 | 		return addr; | 
 | 482 | 	__asm__("repnz; scasb\n\t" | 
 | 483 | 		"jnz 1f\n\t" | 
 | 484 | 		"dec %%edi\n" | 
 | 485 | 		"1:" | 
 | 486 | 		: "=D" (addr), "=c" (size) | 
| Linus Torvalds | 793ae77 | 2005-06-24 10:39:17 -0700 | [diff] [blame] | 487 | 		: "0" (addr), "1" (size), "a" (c) | 
 | 488 | 		: "memory"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 489 | 	return addr; | 
 | 490 | } | 
 | 491 |  | 
 | 492 | #endif /* __KERNEL__ */ | 
 | 493 |  | 
 | 494 | #endif |