Add libc optimizations to bionic for MIPS
Test: Used bionic tests available under bionic/tests folder.
Tested for mips32r1/mips32r2/mips64r6 on emulators.
Change-Id: I589415ddc496df3f6067ae34cb33ca58b3a1f276
Signed-off-by: Prashant Patil <prashant.patil@imgtec.com>
diff --git a/libc/arch-mips/string/memcmp.c b/libc/arch-mips/string/memcmp.c
index 8640954..eb4ad07 100644
--- a/libc/arch-mips/string/memcmp.c
+++ b/libc/arch-mips/string/memcmp.c
@@ -1,51 +1,352 @@
/*
- * Copyright (C) 2008 The Android Open Source Project
+ * Copyright (c) 2017 Imagination Technologies.
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with
+ * the distribution.
+ * * Neither the name of Imagination Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
#include <string.h>
+#include <stdint.h>
-int memcmp(const void *s1, const void *s2, size_t n)
+#define ENABLE_PREFETCH 1
+
+#define STRNG(X) #X
+#define PREFETCH(src_ptr, offset) \
+ asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
+
+#if !defined(UNALIGNED_INSTR_SUPPORT)
+/* does target have unaligned lw/ld/ualw/uald instructions? */
+#define UNALIGNED_INSTR_SUPPORT 0
+#if __mips_isa_rev < 6 && !__mips1
+#undef UNALIGNED_INSTR_SUPPORT
+#define UNALIGNED_INSTR_SUPPORT 1
+#endif
+#endif
+
+#if !defined(HW_UNALIGNED_SUPPORT)
+/* Does target have hardware support for unaligned accesses? */
+#define HW_UNALIGNED_SUPPORT 0
+#if __mips_isa_rev >= 6
+#undef HW_UNALIGNED_SUPPORT
+#define HW_UNALIGNED_SUPPORT 1
+#endif
+#endif
+
+#define SIZEOF_reg_t 4
+#if _MIPS_SIM == _ABIO32
+typedef unsigned long reg_t;
+typedef struct bits
{
- const unsigned char* p1 = s1;
- const unsigned char* end1 = p1 + n;
- const unsigned char* p2 = s2;
- int d = 0;
+ reg_t B0:8, B1:8, B2:8, B3:8;
+} bits_t;
+#else
+#undef SIZEOF_reg_t
+#define SIZEOF_reg_t 8
+typedef unsigned long long reg_t;
+typedef struct bits
+{
+ reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
+} bits_t;
+#endif
- for (;;) {
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+/* This union assumes that small structures can be in registers. If
+ not, then memory accesses will be done - not optimal, but ok. */
+typedef union
+{
+ reg_t v;
+ bits_t b;
+} bitfields_t;
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+#define do_bitfield(__i) \
+ if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
+/* pull apart the words to find the first differing unsigned byte. */
+static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
+{
+ bitfields_t x, y;
+ x.v = a;
+ y.v = b;
+ do_bitfield (0);
+ do_bitfield (1);
+ do_bitfield (2);
+#if SIZEOF_reg_t == 4
+ return x.b.B3 - y.b.B3;
+#else
+ do_bitfield (3);
+ do_bitfield (4);
+ do_bitfield (5);
+ do_bitfield (6);
+ return x.b.B7 - y.b.B7;
+#endif
+}
- if (d || p1 >= end1) break;
- d = (int)*p1++ - (int)*p2++;
- }
- return d;
+/* This code is called when aligning a pointer, there are remaining bytes
+ after doing word compares, or architecture does not have some form
+ of unaligned support. */
+static inline int __attribute__ ((always_inline))
+do_bytes (const void *a, const void *b, unsigned long len)
+{
+ unsigned char *x = (unsigned char *) a;
+ unsigned char *y = (unsigned char *) b;
+ unsigned long i;
+
+ /* 'len' might be zero here, so preloading the first two values
+ before the loop may access unallocated memory. */
+ for (i = 0; i < len; i++) {
+ if (*x != *y)
+ return *x - *y;
+ x++;
+ y++;
+ }
+ return 0;
+}
+
+#if !HW_UNALIGNED_SUPPORT
+#if UNALIGNED_INSTR_SUPPORT
+/* for MIPS GCC, there are no unaligned builtins - so this struct forces
+ the compiler to treat the pointer access as unaligned. */
+struct ulw
+{
+ reg_t uli;
+} __attribute__ ((packed));
+
+/* first pointer is not aligned while second pointer is. */
+static int unaligned_words (const struct ulw *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+ /* prefetch pointer aligned to 32 byte boundary */
+ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+ const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+ for (; words >= 16; words -= 8) {
+#if ENABLE_PREFETCH
+ pref_ptr += 8;
+ PREFETCH(pref_ptr, 0);
+ PREFETCH(pref_ptr, 32);
+
+ pref_ptr_a += 8;
+ PREFETCH(pref_ptr_a, 0);
+ PREFETCH(pref_ptr_a, 32);
+#endif
+ reg_t x0 = a[0].uli, x1 = a[1].uli;
+ reg_t x2 = a[2].uli, x3 = a[3].uli;
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[4].uli; x1 = a[5].uli;
+ x2 = a[6].uli; x3 = a[7].uli;
+ y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ a += 8;
+ b += 8;
+ }
+
+ for (; words >= 4; words -= 4) {
+ reg_t x0 = a[0].uli, x1 = a[1].uli;
+ reg_t x2 = a[2].uli, x3 = a[3].uli;
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+ a += 4;
+ b += 4;
+ }
+
+ /* do remaining words. */
+ while (words--) {
+ reg_t x0 = a->uli;
+ reg_t y0 = *b;
+ a += 1;
+ b += 1;
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes (a, b, bytes);
+}
+#else
+/* no HW support or unaligned lw/ld/ualw/uald instructions. */
+static int unaligned_words (const reg_t *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+ return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
+}
+#endif /* UNALIGNED_INSTR_SUPPORT */
+#endif /* HW_UNALIGNED_SUPPORT */
+
+/* both pointers are aligned, or first isn't and HW support for unaligned. */
+static int aligned_words (const reg_t *a, const reg_t *b,
+ unsigned long words, unsigned long bytes)
+{
+#if ENABLE_PREFETCH
+ /* prefetch pointer aligned to 32 byte boundary */
+ const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
+ const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
+#endif
+
+ for (; words >= 24; words -= 12) {
+#if ENABLE_PREFETCH
+ pref_ptr += 12;
+ PREFETCH(pref_ptr, 0);
+ PREFETCH(pref_ptr, 32);
+ PREFETCH(pref_ptr, 64);
+
+ pref_ptr_a += 12;
+ PREFETCH(pref_ptr_a, 0);
+ PREFETCH(pref_ptr_a, 32);
+ PREFETCH(pref_ptr_a, 64);
+#endif
+ reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
+ y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
+ y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+
+ a += 12;
+ b += 12;
+ }
+
+ for (; words >= 4; words -= 4) {
+ reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
+ reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ if (x1 != y1)
+ return do_by_bitfields (x1, y1);
+ if (x2 != y2)
+ return do_by_bitfields (x2, y2);
+ if (x3 != y3)
+ return do_by_bitfields (x3, y3);
+ a += 4;
+ b += 4;
+ }
+
+ /* do remaining words. */
+ while (words--) {
+ reg_t x0 = *a;
+ reg_t y0 = *b;
+ a += 1;
+ b += 1;
+ if (x0 != y0)
+ return do_by_bitfields (x0, y0);
+ }
+
+ /* mop up any remaining bytes. */
+ return do_bytes (a, b, bytes);
+}
+
+int memcmp (const void *a, const void *b, size_t len)
+{
+ unsigned long bytes, words;
+
+ /* shouldn't hit that often. */
+ if (len < sizeof (reg_t) * 4) {
+ return do_bytes (a, b, len);
+ }
+
+ /* Align the second pointer to word/dword alignment.
+ Note that the pointer is only 32-bits for o32/n32 ABIs. For
+ n32, loads are done as 64-bit while address remains 32-bit. */
+ bytes = ((unsigned long) b) % sizeof (reg_t);
+ if (bytes) {
+ int res;
+ bytes = sizeof (reg_t) - bytes;
+ if (bytes > len)
+ bytes = len;
+ res = do_bytes (a, b, bytes);
+ if (res || len == bytes)
+ return res;
+ len -= bytes;
+ a = (const void *) (((unsigned char *) a) + bytes);
+ b = (const void *) (((unsigned char *) b) + bytes);
+ }
+
+ /* Second pointer now aligned. */
+ words = len / sizeof (reg_t);
+ bytes = len % sizeof (reg_t);
+
+#if HW_UNALIGNED_SUPPORT
+ /* treat possible unaligned first pointer as aligned. */
+ return aligned_words (a, b, words, bytes);
+#else
+ if (((unsigned long) a) % sizeof (reg_t) == 0) {
+ return aligned_words (a, b, words, bytes);
+ }
+ /* need to use unaligned instructions on first pointer. */
+ return unaligned_words (a, b, words, bytes);
+#endif
}