sha1: Use bswap* to optimize byte order
bionic libc already makes use of ARMv6+ rev/rev16 instruction for
endian conversion, and this patch rewrites some parts of SHA1
implementations with swap32 and swap64 routines, which is known to
bring performance improvements.
The reference sha1bench on Nexus S:
[before]
Rounds: 100000, size: 6250K, time: 1.183s, speed: 5.16 MB/s
Change-Id: Id04c0fa1467b3006b5a8736cbdd95855ed7c13e4
[after]
Rounds: 100000, size: 6250K, time: 1.025s, speed: 5.957 MB/sB
diff --git a/libc/bionic/sha1.c b/libc/bionic/sha1.c
index 7384812..a4fbd67 100644
--- a/libc/bionic/sha1.c
+++ b/libc/bionic/sha1.c
@@ -22,6 +22,7 @@
#include <assert.h>
#include <sha1.h>
#include <string.h>
+#include <endian.h>
#if !HAVE_SHA1_H
@@ -32,8 +33,7 @@
* I got the idea of expanding during the round function from SSLeay
*/
#if BYTE_ORDER == LITTLE_ENDIAN
-# define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
- |(rol(block->l[i],8)&0x00FF00FF))
+# define blk0(i) swap32(block->l[i])
#else
# define blk0(i) block->l[i]
#endif
@@ -50,14 +50,15 @@
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
typedef union {
- uint8_t c[64];
- uint32_t l[16];
+ uint8_t c[SHA1_BLOCK_SIZE];
+ uint32_t l[SHA1_BLOCK_SIZE/4];
} CHAR64LONG16;
/*
* Hash a single 512-bit block. This is the core of the algorithm.
*/
-void SHA1Transform(uint32_t state[5], const uint8_t buffer[64])
+void SHA1Transform(uint32_t state[SHA1_DIGEST_LENGTH/4],
+ const uint8_t buffer[SHA1_BLOCK_SIZE])
{
uint32_t a, b, c, d, e;
CHAR64LONG16 *block;
@@ -71,7 +72,7 @@
#ifdef SHA1HANDSOFF
block = &workspace;
- (void)memcpy(block, buffer, 64);
+ (void)memcpy(block, buffer, SHA1_BLOCK_SIZE);
#else
block = (CHAR64LONG16 *)(void *)buffer;
#endif
@@ -125,12 +126,16 @@
assert(context != 0);
/* SHA1 initialization constants */
- context->state[0] = 0x67452301;
- context->state[1] = 0xEFCDAB89;
- context->state[2] = 0x98BADCFE;
- context->state[3] = 0x10325476;
- context->state[4] = 0xC3D2E1F0;
- context->count[0] = context->count[1] = 0;
+ *context = (SHA1_CTX) {
+ .state = {
+ 0x67452301,
+ 0xEFCDAB89,
+ 0x98BADCFE,
+ 0x10325476,
+ 0xC3D2E1F0,
+ },
+ .count = 0,
+ };
}
@@ -140,51 +145,67 @@
void SHA1Update(SHA1_CTX *context, const uint8_t *data, unsigned int len)
{
unsigned int i, j;
+ unsigned int partial, done;
+ const uint8_t *src;
assert(context != 0);
assert(data != 0);
- j = context->count[0];
- if ((context->count[0] += len << 3) < j)
- context->count[1] += (len>>29)+1;
- j = (j >> 3) & 63;
- if ((j + len) > 63) {
- (void)memcpy(&context->buffer[j], data, (i = 64-j));
- SHA1Transform(context->state, context->buffer);
- for ( ; i + 63 < len; i += 64)
- SHA1Transform(context->state, &data[i]);
- j = 0;
- } else {
- i = 0;
+ partial = context->count % SHA1_BLOCK_SIZE;
+ context->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ if (partial) {
+ done = -partial;
+ memcpy(context->buffer + partial, data, done + SHA1_BLOCK_SIZE);
+ src = context->buffer;
+ }
+ do {
+ SHA1Transform(context->state, src);
+ done += SHA1_BLOCK_SIZE;
+ src = data + done;
+ } while (done + SHA1_BLOCK_SIZE <= len);
+ partial = 0;
}
- (void)memcpy(&context->buffer[j], &data[i], len - i);
+ memcpy(context->buffer + partial, src, len - done);
}
/*
* Add padding and return the message digest.
*/
-void SHA1Final(uint8_t digest[20], SHA1_CTX *context)
+void SHA1Final(uint8_t digest[SHA1_DIGEST_LENGTH], SHA1_CTX *context)
{
- unsigned int i;
- uint8_t finalcount[8];
+ uint32_t i, index, pad_len;
+ uint64_t bits;
+ static const uint8_t padding[SHA1_BLOCK_SIZE] = { 0x80, };
assert(digest != 0);
assert(context != 0);
- for (i = 0; i < 8; i++) {
- finalcount[i] = (uint8_t)((context->count[(i >= 4 ? 0 : 1)]
- >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
- }
- SHA1Update(context, (const uint8_t *)"\200", 1);
- while ((context->count[0] & 504) != 448)
- SHA1Update(context, (const uint8_t *)"\0", 1);
- SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ bits = swap64(context->count << 3);
+#else
+ bits = context->count << 3;
+#endif
+
+ /* Pad out to 56 mod 64 */
+ index = context->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64 + 56) - index);
+ SHA1Update(context, padding, pad_len);
+
+ /* Append length */
+ SHA1Update(context, (const uint8_t *)&bits, sizeof(bits));
if (digest) {
- for (i = 0; i < 20; i++)
- digest[i] = (uint8_t)
- ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+ for (i = 0; i < SHA1_DIGEST_LENGTH/4; i++)
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ((uint32_t *)digest)[i] = swap32(context->state[i]);
+#else
+ ((uint32_t *)digest)[i] = context->state[i];
+#endif
}
}