From c0e5d9280dd156631bd398463dbb6a65aff37d19 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Thu, 15 Aug 2024 17:10:10 +0200 Subject: Implement SHA-1 --- c/sha1/.gitignore | 2 + c/sha1/Makefile | 15 +++ c/sha1/main.c | 37 +++++++ "c/sha1/sha1-na\303\257ve.c" | 140 ++++++++++++++++++++++++ c/sha1/sha1-x86.c | 255 +++++++++++++++++++++++++++++++++++++++++++ c/sha1/sha1.h | 21 ++++ 6 files changed, 470 insertions(+) create mode 100644 c/sha1/.gitignore create mode 100644 c/sha1/Makefile create mode 100644 c/sha1/main.c create mode 100644 "c/sha1/sha1-na\303\257ve.c" create mode 100644 c/sha1/sha1-x86.c create mode 100644 c/sha1/sha1.h diff --git a/c/sha1/.gitignore b/c/sha1/.gitignore new file mode 100644 index 0000000..d6e6ddc --- /dev/null +++ b/c/sha1/.gitignore @@ -0,0 +1,2 @@ +sha1-* +!sha1-*.c diff --git a/c/sha1/Makefile b/c/sha1/Makefile new file mode 100644 index 0000000..4012c1d --- /dev/null +++ b/c/sha1/Makefile @@ -0,0 +1,15 @@ +all: sha1-naïve sha1-x86 + +sha1-naïve: main.c sha1-naïve.c + cc -flto -O3 -o $@ main.c $@.c + +sha1-x86: main.c sha1-x86.c + cc -flto -O3 -msha -msse4.1 -o $@ main.c $@.c + +expected: + { yes 'a' | tr -d '\n' | head -c1000000; \ + yes 'b' | tr -d '\n' | head -c1000000; } \ + | sha1sum | cut -d' ' -f1 + +clean: + rm -f sha1-naïve sha1-x86 diff --git a/c/sha1/main.c b/c/sha1/main.c new file mode 100644 index 0000000..9ea4aab --- /dev/null +++ b/c/sha1/main.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +#include "sha1.h" + +static uint8_t bigbuf1[1000000]; +static uint8_t bigbuf2[1000000]; + +int +main(int argc, char **argv) +{ + (void)argc; + + int e = 0; + sha1_t s; + uint8_t dgst[SHA1DGSTSZ]; + + memset(bigbuf1, 'a', sizeof(bigbuf1)); + memset(bigbuf2, 'b', sizeof(bigbuf2)); + + sha1init(&s); + e |= sha1hash(&s, bigbuf1, sizeof(bigbuf1)); + e |= sha1hash(&s, bigbuf2, sizeof(bigbuf2)); + sha1end(&s, dgst); + + if (e != 0) { + fprintf(stderr, "%s: %s\n", argv[0], strerror(e)); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(dgst); i++) + printf("%02" PRIx8, dgst[i]); + putchar('\n'); + return EXIT_SUCCESS; +} diff --git "a/c/sha1/sha1-na\303\257ve.c" "b/c/sha1/sha1-na\303\257ve.c" new file mode 100644 index 0000000..6f1f257 --- /dev/null +++ "b/c/sha1/sha1-na\303\257ve.c" @@ -0,0 +1,140 @@ +#include +#include +#include + +#include "sha1.h" + +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static void sha1hashblk(sha1_t *, const uint8_t *); + +static const uint32_t K[] = { + 0x5A827999, + 0x6ED9EBA1, + 0x8F1BBCDC, + 0xCA62C1D6, +}; + +static inline uint32_t +rotl32(uint32_t x, uint8_t bits) +{ +#if (__GNUC__ || __TINYC__) && __x86_64__ + asm ("roll %1, %0" : "+r" (x) : "c" (bits) : "cc"); + return x; +#else + return (x << bits) | (x >> (32 - bits)); +#endif +} + +void +sha1init(sha1_t *s) +{ + static const uint32_t H[] = { + 0x67452301, + 0xEFCDAB89, + 0x98BADCFE, + 0x10325476, + 0xC3D2E1F0, + }; + memcpy(s->dgst, H, sizeof(H)); + s->msgsz = s->bufsz = 0; +} + +int +sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz) +{ + if (s->msgsz + (msgsz * 8) < s->msgsz) + return EOVERFLOW; + + s->msgsz += msgsz * 8; + + while (msgsz != 0) { + size_t free_space = SHA1BLKSZ - s->bufsz; + size_t ncpy = MIN(msgsz, free_space); + memcpy(s->buf + s->bufsz, msg, ncpy); + s->bufsz += ncpy; + msg += ncpy; + msgsz -= ncpy; + + if (s->bufsz == SHA1BLKSZ) { + sha1hashblk(s, s->buf); + s->bufsz = 0; + } + } + + return 0; +} + +void +sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ]) +{ + s->buf[s->bufsz++] = 0x80; + + if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) { + while (s->bufsz < SHA1BLKSZ) + s->buf[s->bufsz++] = 0; + sha1hashblk(s, s->buf); + s->bufsz = 0; + } + + while (s->bufsz < 56) + s->buf[s->bufsz++] = 0; + ((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz); + + sha1hashblk(s, s->buf); + + for (int i = 0; i < lengthof(s->dgst); i++) + ((uint32_t *)dgst)[i] = htobe32(s->dgst[i]); +} + +static void +sha1hashblk(sha1_t *s, const uint8_t *blk) +{ + uint32_t w[80]; + uint32_t a, b, c, d, e, tmp; + + for (int i = 0; i < 16; i++) + w[i] = htobe32(((uint32_t *)blk)[i]); + for (int i = 16; i < 32; i++) + w[i] = rotl32(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1); + for (int i = 32; i < 80; i++) + w[i] = rotl32(w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32], 2); + + a = s->dgst[0]; + b = s->dgst[1]; + c = s->dgst[2]; + d = s->dgst[3]; + e = s->dgst[4]; + + for (int i = 0; i < 80; i++) { + uint32_t f, k; + + if (i < 20) { + f = b&c | ~b&d; + k = K[0]; + } else if (i < 40) { + f = b ^ c ^ d; + k = K[1]; + } else if (i < 60) { + f = b&c | b&d | c&d; + k = K[2]; + } else { + f = b ^ c ^ d; + k = K[3]; + } + + tmp = rotl32(a, 5) + f + e + w[i] + k; + e = d; + d = c; + c = rotl32(b, 30); + b = a; + a = tmp; + } + + s->dgst[0] += a; + s->dgst[1] += b; + s->dgst[2] += c; + s->dgst[3] += d; + s->dgst[4] += e; +} diff --git a/c/sha1/sha1-x86.c b/c/sha1/sha1-x86.c new file mode 100644 index 0000000..b7cf000 --- /dev/null +++ b/c/sha1/sha1-x86.c @@ -0,0 +1,255 @@ +#include +#include +#include +#include + +#include "sha1.h" + +#define lengthof(xs) (sizeof(xs) / sizeof(*(xs))) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static void sha1hashblk(sha1_t *, const uint8_t *); + +void +sha1init(sha1_t *s) +{ + static const uint32_t H[] = { + 0x67452301, + 0xEFCDAB89, + 0x98BADCFE, + 0x10325476, + 0xC3D2E1F0, + }; + memcpy(s->dgst, H, sizeof(H)); + s->msgsz = s->bufsz = 0; +} + +int +sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz) +{ + if (s->msgsz + (msgsz * 8) < s->msgsz) + return EOVERFLOW; + + s->msgsz += msgsz * 8; + + while (msgsz != 0) { + size_t free_space = SHA1BLKSZ - s->bufsz; + size_t ncpy = MIN(msgsz, free_space); + memcpy(s->buf + s->bufsz, msg, ncpy); + s->bufsz += ncpy; + msg += ncpy; + msgsz -= ncpy; + + if (s->bufsz == SHA1BLKSZ) { + sha1hashblk(s, s->buf); + s->bufsz = 0; + } + } + + return 0; +} + +void +sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ]) +{ + s->buf[s->bufsz++] = 0x80; + + if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) { + while (s->bufsz < SHA1BLKSZ) + s->buf[s->bufsz++] = 0; + sha1hashblk(s, s->buf); + s->bufsz = 0; + } + + while (s->bufsz < 56) + s->buf[s->bufsz++] = 0; + ((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz); + + sha1hashblk(s, s->buf); + + for (int i = 0; i < lengthof(s->dgst); i++) + ((uint32_t *)dgst)[i] = htobe32(s->dgst[i]); +} + +static void +sha1hashblk(sha1_t *s, const uint8_t *blk) +{ + __m128i abcd, e0, e1; + __m128i abcd_save, e_save; + __m128i msg0, msg1, msg2, msg3; + + /* Masks for swapping endianness. We make BSWAPDMSK a macro to + please the compiler (it wants immediate values). */ +#define bswapdmsk 0x1B /* 0b00'01'10'11 */ + const __m128i bswapbmsk = _mm_set_epi64x( + 0x0001020304050607ULL, + 0x08090a0b0c0d0e0fULL + ); + + const __m128i *blkx = (const __m128i *)blk; + + abcd = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *)s->dgst), bswapdmsk); + e0 = _mm_set_epi32(s->dgst[4], 0, 0, 0); + + abcd_save = abcd; + e_save = e0; + + /* Rounds 0–3 */ + msg0 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 0), bswapbmsk); + e0 = _mm_add_epi32(e0, msg0); + e1 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + + /* Rounds 4–7 */ + msg1 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 1), bswapbmsk); + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e1, 0); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + + /* Rounds 8–11 */ + msg2 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 2), bswapbmsk); + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 12–15 */ + msg3 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 3), bswapbmsk); + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 0); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 16–19 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 20–23 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 24–27 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 1); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 28–31 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 32–35 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 1); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 36–39 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 40–43 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 44–47 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 2); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 48–51 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 52–55 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 2); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 56–59 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 60–63 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 64–67 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 3); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 68–71 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 72–75 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 3); + + /* Rounds 76–79 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + + e0 = _mm_sha1nexte_epu32(e0, e_save); + abcd = _mm_add_epi32(abcd, abcd_save); + + _mm_storeu_si128((__m128i *)s->dgst, _mm_shuffle_epi32(abcd, bswapdmsk)); + s->dgst[4] = _mm_extract_epi32(e0, 3); +#undef bswapdmsk +} diff --git a/c/sha1/sha1.h b/c/sha1/sha1.h new file mode 100644 index 0000000..a01d116 --- /dev/null +++ b/c/sha1/sha1.h @@ -0,0 +1,21 @@ +#ifndef SHA1_SHA1_H +#define SHA1_SHA1_H + +#include +#include + +#define SHA1DGSTSZ 20 +#define SHA1BLKSZ 64 + +typedef struct { + uint32_t dgst[5]; + uint64_t msgsz; + uint8_t buf[64]; + size_t bufsz; +} sha1_t; + +void sha1init(sha1_t *); +int sha1hash(sha1_t *, const uint8_t *, size_t); +void sha1end(sha1_t *, uint8_t[SHA1DGSTSZ]); + +#endif /* !SHA1_SHA1_H */ -- cgit v1.2.3