sha1: Use bswap* to optimize byte order
bionic libc already makes use of ARMv6+ rev/rev16 instruction for
endian conversion, and this patch rewrites some parts of SHA1
implementations with swap32 and swap64 routines, which is known to
bring performance improvements.
The reference sha1bench on Nexus S:
[before]
Rounds: 100000, size: 6250K, time: 1.183s, speed: 5.16 MB/s
Change-Id: Id04c0fa1467b3006b5a8736cbdd95855ed7c13e4
[after]
Rounds: 100000, size: 6250K, time: 1.025s, speed: 5.957 MB/sB
diff --git a/libc/bionic/sha1.c b/libc/bionic/sha1.c
index 7384812..a4fbd67 100644
--- a/libc/bionic/sha1.c
+++ b/libc/bionic/sha1.c
@@ -22,6 +22,7 @@
#include <assert.h>
#include <sha1.h>
#include <string.h>
+#include <endian.h>
#if !HAVE_SHA1_H
@@ -32,8 +33,7 @@
* I got the idea of expanding during the round function from SSLeay
*/
#if BYTE_ORDER == LITTLE_ENDIAN
-# define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
- |(rol(block->l[i],8)&0x00FF00FF))
+# define blk0(i) swap32(block->l[i])
#else
# define blk0(i) block->l[i]
#endif
@@ -50,14 +50,15 @@
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
typedef union {
- uint8_t c[64];
- uint32_t l[16];
+ uint8_t c[SHA1_BLOCK_SIZE];
+ uint32_t l[SHA1_BLOCK_SIZE/4];
} CHAR64LONG16;
/*
* Hash a single 512-bit block. This is the core of the algorithm.
*/
-void SHA1Transform(uint32_t state[5], const uint8_t buffer[64])
+void SHA1Transform(uint32_t state[SHA1_DIGEST_LENGTH/4],
+ const uint8_t buffer[SHA1_BLOCK_SIZE])
{
uint32_t a, b, c, d, e;
CHAR64LONG16 *block;
@@ -71,7 +72,7 @@
#ifdef SHA1HANDSOFF
block = &workspace;
- (void)memcpy(block, buffer, 64);
+ (void)memcpy(block, buffer, SHA1_BLOCK_SIZE);
#else
block = (CHAR64LONG16 *)(void *)buffer;
#endif
@@ -125,12 +126,16 @@
assert(context != 0);
/* SHA1 initialization constants */
- context->state[0] = 0x67452301;
- context->state[1] = 0xEFCDAB89;
- context->state[2] = 0x98BADCFE;
- context->state[3] = 0x10325476;
- context->state[4] = 0xC3D2E1F0;
- context->count[0] = context->count[1] = 0;
+ *context = (SHA1_CTX) {
+ .state = {
+ 0x67452301,
+ 0xEFCDAB89,
+ 0x98BADCFE,
+ 0x10325476,
+ 0xC3D2E1F0,
+ },
+ .count = 0,
+ };
}
@@ -140,51 +145,67 @@
void SHA1Update(SHA1_CTX *context, const uint8_t *data, unsigned int len)
{
unsigned int i, j;
+ unsigned int partial, done;
+ const uint8_t *src;
assert(context != 0);
assert(data != 0);
- j = context->count[0];
- if ((context->count[0] += len << 3) < j)
- context->count[1] += (len>>29)+1;
- j = (j >> 3) & 63;
- if ((j + len) > 63) {
- (void)memcpy(&context->buffer[j], data, (i = 64-j));
- SHA1Transform(context->state, context->buffer);
- for ( ; i + 63 < len; i += 64)
- SHA1Transform(context->state, &data[i]);
- j = 0;
- } else {
- i = 0;
+ partial = context->count % SHA1_BLOCK_SIZE;
+ context->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ if (partial) {
+ done = -partial;
+ memcpy(context->buffer + partial, data, done + SHA1_BLOCK_SIZE);
+ src = context->buffer;
+ }
+ do {
+ SHA1Transform(context->state, src);
+ done += SHA1_BLOCK_SIZE;
+ src = data + done;
+ } while (done + SHA1_BLOCK_SIZE <= len);
+ partial = 0;
}
- (void)memcpy(&context->buffer[j], &data[i], len - i);
+ memcpy(context->buffer + partial, src, len - done);
}
/*
* Add padding and return the message digest.
*/
-void SHA1Final(uint8_t digest[20], SHA1_CTX *context)
+void SHA1Final(uint8_t digest[SHA1_DIGEST_LENGTH], SHA1_CTX *context)
{
- unsigned int i;
- uint8_t finalcount[8];
+ uint32_t i, index, pad_len;
+ uint64_t bits;
+ static const uint8_t padding[SHA1_BLOCK_SIZE] = { 0x80, };
assert(digest != 0);
assert(context != 0);
- for (i = 0; i < 8; i++) {
- finalcount[i] = (uint8_t)((context->count[(i >= 4 ? 0 : 1)]
- >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
- }
- SHA1Update(context, (const uint8_t *)"\200", 1);
- while ((context->count[0] & 504) != 448)
- SHA1Update(context, (const uint8_t *)"\0", 1);
- SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ bits = swap64(context->count << 3);
+#else
+ bits = context->count << 3;
+#endif
+
+ /* Pad out to 56 mod 64 */
+ index = context->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64 + 56) - index);
+ SHA1Update(context, padding, pad_len);
+
+ /* Append length */
+ SHA1Update(context, (const uint8_t *)&bits, sizeof(bits));
if (digest) {
- for (i = 0; i < 20; i++)
- digest[i] = (uint8_t)
- ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
+ for (i = 0; i < SHA1_DIGEST_LENGTH/4; i++)
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ((uint32_t *)digest)[i] = swap32(context->state[i]);
+#else
+ ((uint32_t *)digest)[i] = context->state[i];
+#endif
}
}
diff --git a/libc/include/sha1.h b/libc/include/sha1.h
index adfa1fc..bc51ac0 100644
--- a/libc/include/sha1.h
+++ b/libc/include/sha1.h
@@ -13,16 +13,17 @@
#include <sys/types.h>
#define SHA1_DIGEST_LENGTH 20
-#define SHA1_DIGEST_STRING_LENGTH 41
+#define SHA1_BLOCK_SIZE 64
typedef struct {
- uint32_t state[5];
- uint32_t count[2];
- uint8_t buffer[64];
+ uint64_t count;
+ uint32_t state[SHA1_DIGEST_LENGTH / 4];
+ uint8_t buffer[SHA1_BLOCK_SIZE];
} SHA1_CTX;
__BEGIN_DECLS
-void SHA1Transform(uint32_t[5], const uint8_t[64]);
+void SHA1Transform(uint32_t[SHA1_DIGEST_LENGTH/4],
+ const uint8_t[SHA1_BLOCK_SIZE]);
void SHA1Init(SHA1_CTX *);
void SHA1Update(SHA1_CTX *, const uint8_t *, unsigned int);
void SHA1Final(uint8_t[SHA1_DIGEST_LENGTH], SHA1_CTX *);