| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2008 The Android Open Source Project | 
 | 3 |  * All rights reserved. | 
 | 4 |  * | 
 | 5 |  * Redistribution and use in source and binary forms, with or without | 
 | 6 |  * modification, are permitted provided that the following conditions | 
 | 7 |  * are met: | 
 | 8 |  *  * Redistributions of source code must retain the above copyright | 
 | 9 |  *    notice, this list of conditions and the following disclaimer. | 
 | 10 |  *  * Redistributions in binary form must reproduce the above copyright | 
 | 11 |  *    notice, this list of conditions and the following disclaimer in | 
 | 12 |  *    the documentation and/or other materials provided with the | 
 | 13 |  *    distribution. | 
 | 14 |  * | 
 | 15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 | 16 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | 17 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | 
 | 18 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | 
 | 19 |  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
 | 20 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | 
 | 21 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS | 
 | 22 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | 
 | 23 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | 
 | 24 |  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | 
 | 25 |  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
 | 26 |  * SUCH DAMAGE. | 
 | 27 |  */ | 
 | 28 |  | 
 | 29 | #include <machine/cpu-features.h> | 
| Elliott Hughes | 851e68a | 2014-02-19 16:53:20 -0800 | [diff] [blame] | 30 | #include <private/bionic_asm.h> | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 31 |  | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 32 |  | 
 | 33 | #ifdef HAVE_32_BYTE_CACHE_LINE | 
 | 34 | #define CACHE_LINE_SIZE     32 | 
 | 35 | #else | 
 | 36 | #define CACHE_LINE_SIZE     64 | 
 | 37 | #endif | 
 | 38 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 39 | /* | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 40 |  * Optimized memcmp() for Cortex-A9. | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 41 |  */ | 
 | 42 |  | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 43 | .syntax unified | 
 | 44 |  | 
| Kenny Root | 420878c | 2011-02-16 11:55:58 -0800 | [diff] [blame] | 45 | ENTRY(memcmp) | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 46 |         pld         [r0, #(CACHE_LINE_SIZE * 0)] | 
 | 47 |         pld         [r0, #(CACHE_LINE_SIZE * 1)] | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 48 |  | 
 | 49 |         /* take of the case where length is 0 or the buffers are the same */ | 
 | 50 |         cmp         r0, r1 | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 51 |         moveq       r0, #0 | 
 | 52 |         bxeq        lr | 
 | 53 |  | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 54 |         pld         [r1, #(CACHE_LINE_SIZE * 0)] | 
 | 55 |         pld         [r1, #(CACHE_LINE_SIZE * 1)] | 
 | 56 |  | 
 | 57 |         /* make sure we have at least 8+4 bytes, this simplify things below | 
 | 58 |          * and avoid some overhead for small blocks | 
 | 59 |          */ | 
 | 60 |         cmp        r2, #(8+4) | 
 | 61 |         bmi        10f | 
 | 62 | /* | 
 | 63 |  * Neon optimization | 
 | 64 |  * Comparing 32 bytes at a time | 
 | 65 |  */ | 
 | 66 | #if defined(__ARM_NEON__) && defined(NEON_UNALIGNED_ACCESS) | 
 | 67 |         subs        r2, r2, #32 | 
 | 68 |         blo         3f | 
 | 69 |  | 
 | 70 |         /* preload all the cache lines we need. */ | 
 | 71 |         pld         [r0, #(CACHE_LINE_SIZE * 2)] | 
 | 72 |         pld         [r1, #(CACHE_LINE_SIZE * 2)] | 
 | 73 |  | 
 | 74 | 1:      /* The main loop compares 32 bytes at a time */ | 
 | 75 |         vld1.8      {d0 - d3}, [r0]! | 
 | 76 |         pld         [r0, #(CACHE_LINE_SIZE * 2)] | 
 | 77 |         vld1.8      {d4 - d7}, [r1]! | 
 | 78 |         pld         [r1, #(CACHE_LINE_SIZE * 2)] | 
 | 79 |  | 
 | 80 |         /* Start subtracting the values and merge results */ | 
 | 81 |         vsub.i8     q0, q2 | 
 | 82 |         vsub.i8     q1, q3 | 
 | 83 |         vorr        q2, q0, q1 | 
 | 84 |         vorr        d4, d5 | 
 | 85 |         vmov        r3, ip, d4 | 
 | 86 |         /* Check if there are any differences among the 32 bytes */ | 
 | 87 |         orrs        r3, ip | 
 | 88 |         bne         2f | 
 | 89 |         subs        r2, r2, #32 | 
 | 90 |         bhs         1b | 
 | 91 |         b           3f | 
 | 92 | 2: | 
 | 93 |         /* Check if the difference was in the first or last 16 bytes */ | 
 | 94 |         sub         r0, #32 | 
 | 95 |         vorr        d0, d1 | 
 | 96 |         sub         r1, #32 | 
 | 97 |         vmov        r3, ip, d0 | 
 | 98 |         orrs        r3, ip | 
 | 99 |         /* if the first 16 bytes are equal, we only have to rewind 16 bytes */ | 
 | 100 |         ittt        eq | 
 | 101 |         subeq       r2, #16 | 
 | 102 |         addeq       r0, #16 | 
 | 103 |         addeq       r1, #16 | 
 | 104 |  | 
 | 105 | 3:      /* fix-up the remaining count */ | 
 | 106 |         add         r2, r2, #32 | 
 | 107 |  | 
 | 108 |         cmp        r2, #(8+4) | 
 | 109 |         bmi        10f | 
 | 110 | #endif | 
 | 111 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 112 |         /* save registers */ | 
 | 113 |         stmfd       sp!, {r4, lr} | 
| Christopher Ferris | ed45970 | 2013-12-02 17:44:53 -0800 | [diff] [blame] | 114 |         .cfi_def_cfa_offset 8 | 
 | 115 |         .cfi_rel_offset r4, 0 | 
 | 116 |         .cfi_rel_offset lr, 4 | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 117 |  | 
 | 118 |         /* since r0 hold the result, move the first source | 
 | 119 |          * pointer somewhere else | 
 | 120 |          */ | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 121 |          mov        r4, r0 | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 122 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 123 |         /* align first pointer to word boundary | 
 | 124 |          * offset = -src & 3 | 
 | 125 |          */ | 
 | 126 |         rsb         r3, r4, #0 | 
 | 127 |         ands        r3, r3, #3 | 
 | 128 |         beq         0f | 
 | 129 |  | 
 | 130 |         /* align first pointer  */ | 
 | 131 |         sub         r2, r2, r3 | 
 | 132 | 1:      ldrb        r0, [r4], #1 | 
 | 133 |         ldrb        ip, [r1], #1 | 
 | 134 |         subs        r0, r0, ip | 
 | 135 |         bne         9f | 
 | 136 |         subs        r3, r3, #1 | 
 | 137 |         bne         1b | 
 | 138 |  | 
 | 139 |  | 
 | 140 | 0:      /* here the first pointer is aligned, and we have at least 4 bytes | 
 | 141 |          * to process. | 
 | 142 |          */ | 
 | 143 |  | 
 | 144 |         /* see if the pointers are congruent */ | 
 | 145 |         eor         r0, r4, r1 | 
 | 146 |         ands        r0, r0, #3 | 
 | 147 |         bne         5f | 
 | 148 |  | 
 | 149 |         /* congruent case, 32 bytes per iteration | 
 | 150 |          * We need to make sure there are at least 32+4 bytes left | 
 | 151 |          * because we effectively read ahead one word, and we could | 
 | 152 |          * read past the buffer (and segfault) if we're not careful. | 
 | 153 |          */ | 
 | 154 |  | 
 | 155 |         ldr         ip, [r1] | 
 | 156 |         subs        r2, r2, #(32 + 4) | 
 | 157 |         bmi         1f | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 158 |  | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 159 | 0:      pld         [r4, #(CACHE_LINE_SIZE * 2)] | 
 | 160 |         pld         [r1, #(CACHE_LINE_SIZE * 2)] | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 161 |         ldr         r0, [r4], #4 | 
 | 162 |         ldr         lr, [r1, #4]! | 
 | 163 |         eors        r0, r0, ip | 
 | 164 |         ldreq       r0, [r4], #4 | 
 | 165 |         ldreq       ip, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 166 |         eorseq      r0, r0, lr | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 167 |         ldreq       r0, [r4], #4 | 
 | 168 |         ldreq       lr, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 169 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 170 |         ldreq       r0, [r4], #4 | 
 | 171 |         ldreq       ip, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 172 |         eorseq      r0, r0, lr | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 173 |         ldreq       r0, [r4], #4 | 
 | 174 |         ldreq       lr, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 175 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 176 |         ldreq       r0, [r4], #4 | 
 | 177 |         ldreq       ip, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 178 |         eorseq      r0, r0, lr | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 179 |         ldreq       r0, [r4], #4 | 
 | 180 |         ldreq       lr, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 181 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 182 |         ldreq       r0, [r4], #4 | 
 | 183 |         ldreq       ip, [r1, #4]! | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 184 |         eorseq      r0, r0, lr | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 185 |         bne         2f | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 186 |         subs        r2, r2, #32 | 
 | 187 |         bhs         0b | 
 | 188 |  | 
 | 189 |         /* do we have at least 4 bytes left? */ | 
 | 190 | 1:      adds        r2, r2, #(32 - 4 + 4) | 
 | 191 |         bmi         4f | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 192 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 193 |         /* finish off 4 bytes at a time */ | 
 | 194 | 3:      ldr         r0, [r4], #4 | 
 | 195 |         ldr         ip, [r1], #4 | 
 | 196 |         eors        r0, r0, ip | 
 | 197 |         bne         2f | 
 | 198 |         subs        r2, r2, #4 | 
 | 199 |         bhs         3b | 
 | 200 |  | 
 | 201 |         /* are we done? */ | 
 | 202 | 4:      adds        r2, r2, #4 | 
 | 203 |         moveq       r0, #0 | 
 | 204 |         beq         9f | 
 | 205 |  | 
 | 206 |         /* finish off the remaining bytes */ | 
 | 207 |         b           8f | 
 | 208 |  | 
 | 209 | 2:      /* the last 4 bytes are different, restart them */ | 
 | 210 |         sub         r4, r4, #4 | 
 | 211 |         sub         r1, r1, #4 | 
 | 212 |         mov         r2, #4 | 
 | 213 |  | 
 | 214 |         /* process the last few bytes */ | 
 | 215 | 8:      ldrb        r0, [r4], #1 | 
 | 216 |         ldrb        ip, [r1], #1 | 
 | 217 |         // stall | 
 | 218 |         subs        r0, r0, ip | 
 | 219 |         bne         9f | 
 | 220 |         subs        r2, r2, #1 | 
 | 221 |         bne         8b | 
 | 222 |  | 
 | 223 | 9:      /* restore registers and return */ | 
 | 224 |         ldmfd       sp!, {r4, lr} | 
 | 225 |         bx          lr | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 226 |  | 
 | 227 | 10:     /* process less than 12 bytes */ | 
 | 228 |         cmp         r2, #0 | 
 | 229 |         moveq       r0, #0 | 
 | 230 |         bxeq        lr | 
 | 231 |         mov         r3, r0 | 
 | 232 | 11: | 
 | 233 |         ldrb        r0, [r3], #1 | 
 | 234 |         ldrb        ip, [r1], #1 | 
 | 235 |         subs        r0, ip | 
 | 236 |         bxne        lr | 
 | 237 |         subs        r2, r2, #1 | 
 | 238 |         bne         11b | 
 | 239 |         bx          lr | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 240 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 241 | 5:      /*************** non-congruent case ***************/ | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 242 |         and         r0, r1, #3 | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 243 |         cmp         r0, #2 | 
 | 244 |         bne         4f | 
 | 245 |  | 
 | 246 |         /* here, offset is 2 (16-bits aligned, special cased) */ | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 247 |  | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 248 |         /* make sure we have at least 16 bytes to process */ | 
 | 249 |         subs        r2, r2, #16 | 
 | 250 |         addmi       r2, r2, #16 | 
 | 251 |         bmi         8b | 
 | 252 |  | 
 | 253 |         /* align the unaligned pointer */ | 
 | 254 |         bic         r1, r1, #3 | 
 | 255 |         ldr         lr, [r1], #4 | 
 | 256 |  | 
| Henrik Smiding | 3ebd31c | 2010-11-05 15:09:37 +0100 | [diff] [blame] | 257 | 6:      pld         [r1, #(CACHE_LINE_SIZE * 2)] | 
 | 258 |         pld         [r4, #(CACHE_LINE_SIZE * 2)] | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 259 |         mov         ip, lr, lsr #16 | 
 | 260 |         ldr         lr, [r1], #4 | 
 | 261 |         ldr         r0, [r4], #4 | 
 | 262 |         orr         ip, ip, lr, lsl #16 | 
 | 263 |         eors        r0, r0, ip | 
 | 264 |         moveq       ip, lr, lsr #16 | 
 | 265 |         ldreq       lr, [r1], #4 | 
 | 266 |         ldreq       r0, [r4], #4 | 
 | 267 |         orreq       ip, ip, lr, lsl #16 | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 268 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 269 |         moveq       ip, lr, lsr #16 | 
 | 270 |         ldreq       lr, [r1], #4 | 
 | 271 |         ldreq       r0, [r4], #4 | 
 | 272 |         orreq       ip, ip, lr, lsl #16 | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 273 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 274 |         moveq       ip, lr, lsr #16 | 
 | 275 |         ldreq       lr, [r1], #4 | 
 | 276 |         ldreq       r0, [r4], #4 | 
 | 277 |         orreq       ip, ip, lr, lsl #16 | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 278 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 279 |         bne         7f | 
 | 280 |         subs        r2, r2, #16 | 
 | 281 |         bhs         6b | 
 | 282 |         sub         r1, r1, #2 | 
 | 283 |         /* are we done? */ | 
 | 284 |         adds        r2, r2, #16 | 
 | 285 |         moveq       r0, #0 | 
 | 286 |         beq         9b | 
 | 287 |         /* finish off the remaining bytes */ | 
 | 288 |         b           8b | 
 | 289 |  | 
 | 290 | 7:      /* fix up the 2 pointers and fallthrough... */ | 
 | 291 |         sub         r1, r1, #(4+2) | 
 | 292 |         sub         r4, r4, #4 | 
 | 293 |         mov         r2, #4 | 
 | 294 |         b           8b | 
 | 295 |  | 
 | 296 |  | 
 | 297 | 4:      /*************** offset is 1 or 3 (less optimized) ***************/ | 
 | 298 |  | 
 | 299 | 		stmfd		sp!, {r5, r6, r7} | 
 | 300 |  | 
 | 301 |         // r5 = rhs | 
 | 302 |         // r6 = lhs | 
 | 303 |         // r7 = scratch | 
 | 304 |  | 
 | 305 |         mov         r5, r0, lsl #3		/* r5 = right shift */ | 
 | 306 |         rsb         r6, r5, #32         /* r6 = left shift */ | 
 | 307 |  | 
 | 308 |         /* align the unaligned pointer */ | 
 | 309 |         bic         r1, r1, #3 | 
 | 310 |         ldr         r7, [r1], #4 | 
 | 311 |         sub         r2, r2, #8 | 
 | 312 |  | 
 | 313 | 6:      mov         ip, r7, lsr r5 | 
 | 314 |         ldr         r7, [r1], #4 | 
 | 315 |         ldr         r0, [r4], #4 | 
 | 316 |         orr         ip, ip, r7, lsl r6 | 
 | 317 |         eors        r0, r0, ip | 
 | 318 |         moveq       ip, r7, lsr r5 | 
 | 319 |         ldreq       r7, [r1], #4 | 
 | 320 |         ldreq       r0, [r4], #4 | 
 | 321 |         orreq       ip, ip, r7, lsl r6 | 
| Chih-Hung Hsieh | 33f3351 | 2015-05-11 11:21:19 -0700 | [diff] [blame^] | 322 |         eorseq      r0, r0, ip | 
| The Android Open Source Project | 1dc9e47 | 2009-03-03 19:28:35 -0800 | [diff] [blame] | 323 |         bne         7f | 
 | 324 |         subs        r2, r2, #8 | 
 | 325 |         bhs         6b | 
 | 326 |  | 
 | 327 |         sub         r1, r1, r6, lsr #3 | 
 | 328 | 		ldmfd       sp!, {r5, r6, r7} | 
 | 329 |  | 
 | 330 |         /* are we done? */ | 
 | 331 |         adds        r2, r2, #8 | 
 | 332 |         moveq       r0, #0 | 
 | 333 |         beq         9b | 
 | 334 |  | 
 | 335 |         /* finish off the remaining bytes */ | 
 | 336 |         b           8b | 
 | 337 |  | 
 | 338 | 7:      /* fix up the 2 pointers and fallthrough... */ | 
 | 339 |         sub         r1, r1, #4 | 
 | 340 |         sub         r1, r1, r6, lsr #3 | 
 | 341 |         sub         r4, r4, #4 | 
 | 342 |         mov         r2, #4 | 
 | 343 | 		ldmfd		sp!, {r5, r6, r7} | 
 | 344 |         b           8b | 
| Elliott Hughes | 6719500 | 2013-02-13 15:12:32 -0800 | [diff] [blame] | 345 | END(memcmp) |