| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #include <linux/string.h> | 
 | 2 | #include <linux/module.h> | 
 | 3 |  | 
 | 4 | #undef memcpy | 
 | 5 | #undef memset | 
 | 6 |  | 
 | 7 | void *memcpy(void *to, const void *from, size_t n) | 
 | 8 | { | 
 | 9 | #ifdef CONFIG_X86_USE_3DNOW | 
 | 10 | 	return __memcpy3d(to, from, n); | 
 | 11 | #else | 
 | 12 | 	return __memcpy(to, from, n); | 
 | 13 | #endif | 
 | 14 | } | 
 | 15 | EXPORT_SYMBOL(memcpy); | 
 | 16 |  | 
 | 17 | void *memset(void *s, int c, size_t count) | 
 | 18 | { | 
 | 19 | 	return __memset(s, c, count); | 
 | 20 | } | 
 | 21 | EXPORT_SYMBOL(memset); | 
 | 22 |  | 
 | 23 | void *memmove(void *dest, const void *src, size_t n) | 
 | 24 | { | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 25 | 	int d0,d1,d2,d3,d4,d5; | 
 | 26 | 	char *ret = dest; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 27 |  | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 28 | 	__asm__ __volatile__( | 
| Andy Shevchenko | d50ba36 | 2013-04-15 12:06:10 +0300 | [diff] [blame] | 29 | 		/* Handle more 16 bytes in loop */ | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 30 | 		"cmp $0x10, %0\n\t" | 
 | 31 | 		"jb	1f\n\t" | 
| Ma, Ling | fdf4289 | 2010-08-23 14:11:12 -0700 | [diff] [blame] | 32 |  | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 33 | 		/* Decide forward/backward copy mode */ | 
 | 34 | 		"cmp %2, %1\n\t" | 
 | 35 | 		"jb	2f\n\t" | 
 | 36 |  | 
 | 37 | 		/* | 
 | 38 | 		 * movs instruction have many startup latency | 
 | 39 | 		 * so we handle small size by general register. | 
 | 40 | 		 */ | 
 | 41 | 		"cmp  $680, %0\n\t" | 
 | 42 | 		"jb 3f\n\t" | 
 | 43 | 		/* | 
 | 44 | 		 * movs instruction is only good for aligned case. | 
 | 45 | 		 */ | 
 | 46 | 		"mov %1, %3\n\t" | 
 | 47 | 		"xor %2, %3\n\t" | 
 | 48 | 		"and $0xff, %3\n\t" | 
 | 49 | 		"jz 4f\n\t" | 
 | 50 | 		"3:\n\t" | 
 | 51 | 		"sub $0x10, %0\n\t" | 
 | 52 |  | 
 | 53 | 		/* | 
| Andy Shevchenko | bb916ff | 2013-04-15 12:06:09 +0300 | [diff] [blame] | 54 | 		 * We gobble 16 bytes forward in each loop. | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 55 | 		 */ | 
 | 56 | 		"3:\n\t" | 
 | 57 | 		"sub $0x10, %0\n\t" | 
 | 58 | 		"mov 0*4(%1), %3\n\t" | 
 | 59 | 		"mov 1*4(%1), %4\n\t" | 
 | 60 | 		"mov  %3, 0*4(%2)\n\t" | 
 | 61 | 		"mov  %4, 1*4(%2)\n\t" | 
 | 62 | 		"mov 2*4(%1), %3\n\t" | 
 | 63 | 		"mov 3*4(%1), %4\n\t" | 
 | 64 | 		"mov  %3, 2*4(%2)\n\t" | 
 | 65 | 		"mov  %4, 3*4(%2)\n\t" | 
 | 66 | 		"lea  0x10(%1), %1\n\t" | 
 | 67 | 		"lea  0x10(%2), %2\n\t" | 
 | 68 | 		"jae 3b\n\t" | 
 | 69 | 		"add $0x10, %0\n\t" | 
 | 70 | 		"jmp 1f\n\t" | 
 | 71 |  | 
 | 72 | 		/* | 
 | 73 | 		 * Handle data forward by movs. | 
 | 74 | 		 */ | 
 | 75 | 		".p2align 4\n\t" | 
 | 76 | 		"4:\n\t" | 
 | 77 | 		"mov -4(%1, %0), %3\n\t" | 
 | 78 | 		"lea -4(%2, %0), %4\n\t" | 
 | 79 | 		"shr $2, %0\n\t" | 
 | 80 | 		"rep movsl\n\t" | 
 | 81 | 		"mov %3, (%4)\n\t" | 
 | 82 | 		"jmp 11f\n\t" | 
 | 83 | 		/* | 
 | 84 | 		 * Handle data backward by movs. | 
 | 85 | 		 */ | 
 | 86 | 		".p2align 4\n\t" | 
 | 87 | 		"6:\n\t" | 
 | 88 | 		"mov (%1), %3\n\t" | 
 | 89 | 		"mov %2, %4\n\t" | 
 | 90 | 		"lea -4(%1, %0), %1\n\t" | 
 | 91 | 		"lea -4(%2, %0), %2\n\t" | 
 | 92 | 		"shr $2, %0\n\t" | 
 | 93 | 		"std\n\t" | 
 | 94 | 		"rep movsl\n\t" | 
 | 95 | 		"mov %3,(%4)\n\t" | 
 | 96 | 		"cld\n\t" | 
 | 97 | 		"jmp 11f\n\t" | 
 | 98 |  | 
 | 99 | 		/* | 
 | 100 | 		 * Start to prepare for backward copy. | 
 | 101 | 		 */ | 
 | 102 | 		".p2align 4\n\t" | 
 | 103 | 		"2:\n\t" | 
 | 104 | 		"cmp  $680, %0\n\t" | 
 | 105 | 		"jb 5f\n\t" | 
 | 106 | 		"mov %1, %3\n\t" | 
 | 107 | 		"xor %2, %3\n\t" | 
 | 108 | 		"and $0xff, %3\n\t" | 
 | 109 | 		"jz 6b\n\t" | 
 | 110 |  | 
 | 111 | 		/* | 
 | 112 | 		 * Calculate copy position to tail. | 
 | 113 | 		 */ | 
 | 114 | 		"5:\n\t" | 
 | 115 | 		"add %0, %1\n\t" | 
 | 116 | 		"add %0, %2\n\t" | 
 | 117 | 		"sub $0x10, %0\n\t" | 
 | 118 |  | 
 | 119 | 		/* | 
| Andy Shevchenko | bb916ff | 2013-04-15 12:06:09 +0300 | [diff] [blame] | 120 | 		 * We gobble 16 bytes backward in each loop. | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 121 | 		 */ | 
 | 122 | 		"7:\n\t" | 
 | 123 | 		"sub $0x10, %0\n\t" | 
 | 124 |  | 
 | 125 | 		"mov -1*4(%1), %3\n\t" | 
 | 126 | 		"mov -2*4(%1), %4\n\t" | 
 | 127 | 		"mov  %3, -1*4(%2)\n\t" | 
 | 128 | 		"mov  %4, -2*4(%2)\n\t" | 
 | 129 | 		"mov -3*4(%1), %3\n\t" | 
 | 130 | 		"mov -4*4(%1), %4\n\t" | 
 | 131 | 		"mov  %3, -3*4(%2)\n\t" | 
 | 132 | 		"mov  %4, -4*4(%2)\n\t" | 
 | 133 | 		"lea  -0x10(%1), %1\n\t" | 
 | 134 | 		"lea  -0x10(%2), %2\n\t" | 
 | 135 | 		"jae 7b\n\t" | 
 | 136 | 		/* | 
 | 137 | 		 * Calculate copy position to head. | 
 | 138 | 		 */ | 
 | 139 | 		"add $0x10, %0\n\t" | 
 | 140 | 		"sub %0, %1\n\t" | 
 | 141 | 		"sub %0, %2\n\t" | 
 | 142 |  | 
 | 143 | 		/* | 
 | 144 | 		 * Move data from 8 bytes to 15 bytes. | 
 | 145 | 		 */ | 
 | 146 | 		".p2align 4\n\t" | 
 | 147 | 		"1:\n\t" | 
 | 148 | 		"cmp $8, %0\n\t" | 
 | 149 | 		"jb 8f\n\t" | 
 | 150 | 		"mov 0*4(%1), %3\n\t" | 
 | 151 | 		"mov 1*4(%1), %4\n\t" | 
 | 152 | 		"mov -2*4(%1, %0), %5\n\t" | 
 | 153 | 		"mov -1*4(%1, %0), %1\n\t" | 
 | 154 |  | 
 | 155 | 		"mov  %3, 0*4(%2)\n\t" | 
 | 156 | 		"mov  %4, 1*4(%2)\n\t" | 
 | 157 | 		"mov  %5, -2*4(%2, %0)\n\t" | 
 | 158 | 		"mov  %1, -1*4(%2, %0)\n\t" | 
 | 159 | 		"jmp 11f\n\t" | 
 | 160 |  | 
 | 161 | 		/* | 
 | 162 | 		 * Move data from 4 bytes to 7 bytes. | 
 | 163 | 		 */ | 
 | 164 | 		".p2align 4\n\t" | 
 | 165 | 		"8:\n\t" | 
 | 166 | 		"cmp $4, %0\n\t" | 
 | 167 | 		"jb 9f\n\t" | 
 | 168 | 		"mov 0*4(%1), %3\n\t" | 
 | 169 | 		"mov -1*4(%1, %0), %4\n\t" | 
 | 170 | 		"mov  %3, 0*4(%2)\n\t" | 
 | 171 | 		"mov  %4, -1*4(%2, %0)\n\t" | 
 | 172 | 		"jmp 11f\n\t" | 
 | 173 |  | 
 | 174 | 		/* | 
 | 175 | 		 * Move data from 2 bytes to 3 bytes. | 
 | 176 | 		 */ | 
 | 177 | 		".p2align 4\n\t" | 
 | 178 | 		"9:\n\t" | 
 | 179 | 		"cmp $2, %0\n\t" | 
 | 180 | 		"jb 10f\n\t" | 
 | 181 | 		"movw 0*2(%1), %%dx\n\t" | 
 | 182 | 		"movw -1*2(%1, %0), %%bx\n\t" | 
 | 183 | 		"movw %%dx, 0*2(%2)\n\t" | 
 | 184 | 		"movw %%bx, -1*2(%2, %0)\n\t" | 
 | 185 | 		"jmp 11f\n\t" | 
 | 186 |  | 
 | 187 | 		/* | 
 | 188 | 		 * Move data for 1 byte. | 
 | 189 | 		 */ | 
 | 190 | 		".p2align 4\n\t" | 
 | 191 | 		"10:\n\t" | 
 | 192 | 		"cmp $1, %0\n\t" | 
 | 193 | 		"jb 11f\n\t" | 
 | 194 | 		"movb (%1), %%cl\n\t" | 
 | 195 | 		"movb %%cl, (%2)\n\t" | 
 | 196 | 		".p2align 4\n\t" | 
 | 197 | 		"11:" | 
 | 198 | 		: "=&c" (d0), "=&S" (d1), "=&D" (d2), | 
 | 199 | 		  "=r" (d3),"=r" (d4), "=r"(d5) | 
 | 200 | 		:"0" (n), | 
 | 201 | 		 "1" (src), | 
 | 202 | 		 "2" (dest) | 
 | 203 | 		:"memory"); | 
 | 204 |  | 
 | 205 | 	return ret; | 
 | 206 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 207 | } | 
 | 208 | EXPORT_SYMBOL(memmove); |