| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #include <linux/string.h> | 
|  | 2 | #include <linux/module.h> | 
|  | 3 |  | 
|  | 4 | #undef memcpy | 
|  | 5 | #undef memset | 
|  | 6 |  | 
|  | 7 | void *memcpy(void *to, const void *from, size_t n) | 
|  | 8 | { | 
|  | 9 | #ifdef CONFIG_X86_USE_3DNOW | 
|  | 10 | return __memcpy3d(to, from, n); | 
|  | 11 | #else | 
|  | 12 | return __memcpy(to, from, n); | 
|  | 13 | #endif | 
|  | 14 | } | 
|  | 15 | EXPORT_SYMBOL(memcpy); | 
|  | 16 |  | 
|  | 17 | void *memset(void *s, int c, size_t count) | 
|  | 18 | { | 
|  | 19 | return __memset(s, c, count); | 
|  | 20 | } | 
|  | 21 | EXPORT_SYMBOL(memset); | 
|  | 22 |  | 
|  | 23 | void *memmove(void *dest, const void *src, size_t n) | 
|  | 24 | { | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 25 | int d0,d1,d2,d3,d4,d5; | 
|  | 26 | char *ret = dest; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 27 |  | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 28 | __asm__ __volatile__( | 
|  | 29 | /* Handle more 16bytes in loop */ | 
|  | 30 | "cmp $0x10, %0\n\t" | 
|  | 31 | "jb	1f\n\t" | 
| Ma, Ling | fdf4289 | 2010-08-23 14:11:12 -0700 | [diff] [blame] | 32 |  | 
| Ma Ling | 3b4b682 | 2010-09-17 03:12:40 +0800 | [diff] [blame] | 33 | /* Decide forward/backward copy mode */ | 
|  | 34 | "cmp %2, %1\n\t" | 
|  | 35 | "jb	2f\n\t" | 
|  | 36 |  | 
|  | 37 | /* | 
|  | 38 | * movs instruction have many startup latency | 
|  | 39 | * so we handle small size by general register. | 
|  | 40 | */ | 
|  | 41 | "cmp  $680, %0\n\t" | 
|  | 42 | "jb 3f\n\t" | 
|  | 43 | /* | 
|  | 44 | * movs instruction is only good for aligned case. | 
|  | 45 | */ | 
|  | 46 | "mov %1, %3\n\t" | 
|  | 47 | "xor %2, %3\n\t" | 
|  | 48 | "and $0xff, %3\n\t" | 
|  | 49 | "jz 4f\n\t" | 
|  | 50 | "3:\n\t" | 
|  | 51 | "sub $0x10, %0\n\t" | 
|  | 52 |  | 
|  | 53 | /* | 
|  | 54 | * We gobble 16byts forward in each loop. | 
|  | 55 | */ | 
|  | 56 | "3:\n\t" | 
|  | 57 | "sub $0x10, %0\n\t" | 
|  | 58 | "mov 0*4(%1), %3\n\t" | 
|  | 59 | "mov 1*4(%1), %4\n\t" | 
|  | 60 | "mov  %3, 0*4(%2)\n\t" | 
|  | 61 | "mov  %4, 1*4(%2)\n\t" | 
|  | 62 | "mov 2*4(%1), %3\n\t" | 
|  | 63 | "mov 3*4(%1), %4\n\t" | 
|  | 64 | "mov  %3, 2*4(%2)\n\t" | 
|  | 65 | "mov  %4, 3*4(%2)\n\t" | 
|  | 66 | "lea  0x10(%1), %1\n\t" | 
|  | 67 | "lea  0x10(%2), %2\n\t" | 
|  | 68 | "jae 3b\n\t" | 
|  | 69 | "add $0x10, %0\n\t" | 
|  | 70 | "jmp 1f\n\t" | 
|  | 71 |  | 
|  | 72 | /* | 
|  | 73 | * Handle data forward by movs. | 
|  | 74 | */ | 
|  | 75 | ".p2align 4\n\t" | 
|  | 76 | "4:\n\t" | 
|  | 77 | "mov -4(%1, %0), %3\n\t" | 
|  | 78 | "lea -4(%2, %0), %4\n\t" | 
|  | 79 | "shr $2, %0\n\t" | 
|  | 80 | "rep movsl\n\t" | 
|  | 81 | "mov %3, (%4)\n\t" | 
|  | 82 | "jmp 11f\n\t" | 
|  | 83 | /* | 
|  | 84 | * Handle data backward by movs. | 
|  | 85 | */ | 
|  | 86 | ".p2align 4\n\t" | 
|  | 87 | "6:\n\t" | 
|  | 88 | "mov (%1), %3\n\t" | 
|  | 89 | "mov %2, %4\n\t" | 
|  | 90 | "lea -4(%1, %0), %1\n\t" | 
|  | 91 | "lea -4(%2, %0), %2\n\t" | 
|  | 92 | "shr $2, %0\n\t" | 
|  | 93 | "std\n\t" | 
|  | 94 | "rep movsl\n\t" | 
|  | 95 | "mov %3,(%4)\n\t" | 
|  | 96 | "cld\n\t" | 
|  | 97 | "jmp 11f\n\t" | 
|  | 98 |  | 
|  | 99 | /* | 
|  | 100 | * Start to prepare for backward copy. | 
|  | 101 | */ | 
|  | 102 | ".p2align 4\n\t" | 
|  | 103 | "2:\n\t" | 
|  | 104 | "cmp  $680, %0\n\t" | 
|  | 105 | "jb 5f\n\t" | 
|  | 106 | "mov %1, %3\n\t" | 
|  | 107 | "xor %2, %3\n\t" | 
|  | 108 | "and $0xff, %3\n\t" | 
|  | 109 | "jz 6b\n\t" | 
|  | 110 |  | 
|  | 111 | /* | 
|  | 112 | * Calculate copy position to tail. | 
|  | 113 | */ | 
|  | 114 | "5:\n\t" | 
|  | 115 | "add %0, %1\n\t" | 
|  | 116 | "add %0, %2\n\t" | 
|  | 117 | "sub $0x10, %0\n\t" | 
|  | 118 |  | 
|  | 119 | /* | 
|  | 120 | * We gobble 16byts backward in each loop. | 
|  | 121 | */ | 
|  | 122 | "7:\n\t" | 
|  | 123 | "sub $0x10, %0\n\t" | 
|  | 124 |  | 
|  | 125 | "mov -1*4(%1), %3\n\t" | 
|  | 126 | "mov -2*4(%1), %4\n\t" | 
|  | 127 | "mov  %3, -1*4(%2)\n\t" | 
|  | 128 | "mov  %4, -2*4(%2)\n\t" | 
|  | 129 | "mov -3*4(%1), %3\n\t" | 
|  | 130 | "mov -4*4(%1), %4\n\t" | 
|  | 131 | "mov  %3, -3*4(%2)\n\t" | 
|  | 132 | "mov  %4, -4*4(%2)\n\t" | 
|  | 133 | "lea  -0x10(%1), %1\n\t" | 
|  | 134 | "lea  -0x10(%2), %2\n\t" | 
|  | 135 | "jae 7b\n\t" | 
|  | 136 | /* | 
|  | 137 | * Calculate copy position to head. | 
|  | 138 | */ | 
|  | 139 | "add $0x10, %0\n\t" | 
|  | 140 | "sub %0, %1\n\t" | 
|  | 141 | "sub %0, %2\n\t" | 
|  | 142 |  | 
|  | 143 | /* | 
|  | 144 | * Move data from 8 bytes to 15 bytes. | 
|  | 145 | */ | 
|  | 146 | ".p2align 4\n\t" | 
|  | 147 | "1:\n\t" | 
|  | 148 | "cmp $8, %0\n\t" | 
|  | 149 | "jb 8f\n\t" | 
|  | 150 | "mov 0*4(%1), %3\n\t" | 
|  | 151 | "mov 1*4(%1), %4\n\t" | 
|  | 152 | "mov -2*4(%1, %0), %5\n\t" | 
|  | 153 | "mov -1*4(%1, %0), %1\n\t" | 
|  | 154 |  | 
|  | 155 | "mov  %3, 0*4(%2)\n\t" | 
|  | 156 | "mov  %4, 1*4(%2)\n\t" | 
|  | 157 | "mov  %5, -2*4(%2, %0)\n\t" | 
|  | 158 | "mov  %1, -1*4(%2, %0)\n\t" | 
|  | 159 | "jmp 11f\n\t" | 
|  | 160 |  | 
|  | 161 | /* | 
|  | 162 | * Move data from 4 bytes to 7 bytes. | 
|  | 163 | */ | 
|  | 164 | ".p2align 4\n\t" | 
|  | 165 | "8:\n\t" | 
|  | 166 | "cmp $4, %0\n\t" | 
|  | 167 | "jb 9f\n\t" | 
|  | 168 | "mov 0*4(%1), %3\n\t" | 
|  | 169 | "mov -1*4(%1, %0), %4\n\t" | 
|  | 170 | "mov  %3, 0*4(%2)\n\t" | 
|  | 171 | "mov  %4, -1*4(%2, %0)\n\t" | 
|  | 172 | "jmp 11f\n\t" | 
|  | 173 |  | 
|  | 174 | /* | 
|  | 175 | * Move data from 2 bytes to 3 bytes. | 
|  | 176 | */ | 
|  | 177 | ".p2align 4\n\t" | 
|  | 178 | "9:\n\t" | 
|  | 179 | "cmp $2, %0\n\t" | 
|  | 180 | "jb 10f\n\t" | 
|  | 181 | "movw 0*2(%1), %%dx\n\t" | 
|  | 182 | "movw -1*2(%1, %0), %%bx\n\t" | 
|  | 183 | "movw %%dx, 0*2(%2)\n\t" | 
|  | 184 | "movw %%bx, -1*2(%2, %0)\n\t" | 
|  | 185 | "jmp 11f\n\t" | 
|  | 186 |  | 
|  | 187 | /* | 
|  | 188 | * Move data for 1 byte. | 
|  | 189 | */ | 
|  | 190 | ".p2align 4\n\t" | 
|  | 191 | "10:\n\t" | 
|  | 192 | "cmp $1, %0\n\t" | 
|  | 193 | "jb 11f\n\t" | 
|  | 194 | "movb (%1), %%cl\n\t" | 
|  | 195 | "movb %%cl, (%2)\n\t" | 
|  | 196 | ".p2align 4\n\t" | 
|  | 197 | "11:" | 
|  | 198 | : "=&c" (d0), "=&S" (d1), "=&D" (d2), | 
|  | 199 | "=r" (d3),"=r" (d4), "=r"(d5) | 
|  | 200 | :"0" (n), | 
|  | 201 | "1" (src), | 
|  | 202 | "2" (dest) | 
|  | 203 | :"memory"); | 
|  | 204 |  | 
|  | 205 | return ret; | 
|  | 206 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 207 | } | 
|  | 208 | EXPORT_SYMBOL(memmove); |