aboutsummaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
Diffstat (limited to 'c')
-rw-r--r--c/sha1/.gitignore2
-rw-r--r--c/sha1/Makefile15
-rw-r--r--c/sha1/main.c37
-rw-r--r--c/sha1/sha1-naïve.c140
-rw-r--r--c/sha1/sha1-x86.c255
-rw-r--r--c/sha1/sha1.h21
6 files changed, 470 insertions, 0 deletions
diff --git a/c/sha1/.gitignore b/c/sha1/.gitignore
new file mode 100644
index 0000000..d6e6ddc
--- /dev/null
+++ b/c/sha1/.gitignore
@@ -0,0 +1,2 @@
+sha1-*
+!sha1-*.c
diff --git a/c/sha1/Makefile b/c/sha1/Makefile
new file mode 100644
index 0000000..4012c1d
--- /dev/null
+++ b/c/sha1/Makefile
@@ -0,0 +1,15 @@
+all: sha1-naïve sha1-x86
+
+sha1-naïve: main.c sha1-naïve.c
+ cc -flto -O3 -o $@ main.c $@.c
+
+sha1-x86: main.c sha1-x86.c
+ cc -flto -O3 -msha -msse4.1 -o $@ main.c $@.c
+
+expected:
+ { yes 'a' | tr -d '\n' | head -c1000000; \
+ yes 'b' | tr -d '\n' | head -c1000000; } \
+ | sha1sum | cut -d' ' -f1
+
+clean:
+ rm -f sha1-naïve sha1-x86
diff --git a/c/sha1/main.c b/c/sha1/main.c
new file mode 100644
index 0000000..9ea4aab
--- /dev/null
+++ b/c/sha1/main.c
@@ -0,0 +1,37 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sha1.h"
+
+static uint8_t bigbuf1[1000000];
+static uint8_t bigbuf2[1000000];
+
+int
+main(int argc, char **argv)
+{
+ (void)argc;
+
+ int e = 0;
+ sha1_t s;
+ uint8_t dgst[SHA1DGSTSZ];
+
+ memset(bigbuf1, 'a', sizeof(bigbuf1));
+ memset(bigbuf2, 'b', sizeof(bigbuf2));
+
+ sha1init(&s);
+ e |= sha1hash(&s, bigbuf1, sizeof(bigbuf1));
+ e |= sha1hash(&s, bigbuf2, sizeof(bigbuf2));
+ sha1end(&s, dgst);
+
+ if (e != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0], strerror(e));
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof(dgst); i++)
+ printf("%02" PRIx8, dgst[i]);
+ putchar('\n');
+ return EXIT_SUCCESS;
+}
diff --git a/c/sha1/sha1-naïve.c b/c/sha1/sha1-naïve.c
new file mode 100644
index 0000000..6f1f257
--- /dev/null
+++ b/c/sha1/sha1-naïve.c
@@ -0,0 +1,140 @@
+#include <endian.h>
+#include <errno.h>
+#include <string.h>
+
+#include "sha1.h"
+
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+static void sha1hashblk(sha1_t *, const uint8_t *);
+
+static const uint32_t K[] = {
+ 0x5A827999,
+ 0x6ED9EBA1,
+ 0x8F1BBCDC,
+ 0xCA62C1D6,
+};
+
+static inline uint32_t
+rotl32(uint32_t x, uint8_t bits)
+{
+#if (__GNUC__ || __TINYC__) && __x86_64__
+ asm ("roll %1, %0" : "+r" (x) : "c" (bits) : "cc");
+ return x;
+#else
+ return (x << bits) | (x >> (32 - bits));
+#endif
+}
+
+void
+sha1init(sha1_t *s)
+{
+ static const uint32_t H[] = {
+ 0x67452301,
+ 0xEFCDAB89,
+ 0x98BADCFE,
+ 0x10325476,
+ 0xC3D2E1F0,
+ };
+ memcpy(s->dgst, H, sizeof(H));
+ s->msgsz = s->bufsz = 0;
+}
+
+int
+sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz)
+{
+ if (s->msgsz + (msgsz * 8) < s->msgsz)
+ return EOVERFLOW;
+
+ s->msgsz += msgsz * 8;
+
+ while (msgsz != 0) {
+ size_t free_space = SHA1BLKSZ - s->bufsz;
+ size_t ncpy = MIN(msgsz, free_space);
+ memcpy(s->buf + s->bufsz, msg, ncpy);
+ s->bufsz += ncpy;
+ msg += ncpy;
+ msgsz -= ncpy;
+
+ if (s->bufsz == SHA1BLKSZ) {
+ sha1hashblk(s, s->buf);
+ s->bufsz = 0;
+ }
+ }
+
+ return 0;
+}
+
+void
+sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ])
+{
+ s->buf[s->bufsz++] = 0x80;
+
+ if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) {
+ while (s->bufsz < SHA1BLKSZ)
+ s->buf[s->bufsz++] = 0;
+ sha1hashblk(s, s->buf);
+ s->bufsz = 0;
+ }
+
+ while (s->bufsz < 56)
+ s->buf[s->bufsz++] = 0;
+ ((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz);
+
+ sha1hashblk(s, s->buf);
+
+ for (int i = 0; i < lengthof(s->dgst); i++)
+ ((uint32_t *)dgst)[i] = htobe32(s->dgst[i]);
+}
+
+static void
+sha1hashblk(sha1_t *s, const uint8_t *blk)
+{
+ uint32_t w[80];
+ uint32_t a, b, c, d, e, tmp;
+
+ for (int i = 0; i < 16; i++)
+ w[i] = htobe32(((uint32_t *)blk)[i]);
+ for (int i = 16; i < 32; i++)
+ w[i] = rotl32(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1);
+ for (int i = 32; i < 80; i++)
+ w[i] = rotl32(w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32], 2);
+
+ a = s->dgst[0];
+ b = s->dgst[1];
+ c = s->dgst[2];
+ d = s->dgst[3];
+ e = s->dgst[4];
+
+ for (int i = 0; i < 80; i++) {
+ uint32_t f, k;
+
+ if (i < 20) {
+ f = b&c | ~b&d;
+ k = K[0];
+ } else if (i < 40) {
+ f = b ^ c ^ d;
+ k = K[1];
+ } else if (i < 60) {
+ f = b&c | b&d | c&d;
+ k = K[2];
+ } else {
+ f = b ^ c ^ d;
+ k = K[3];
+ }
+
+ tmp = rotl32(a, 5) + f + e + w[i] + k;
+ e = d;
+ d = c;
+ c = rotl32(b, 30);
+ b = a;
+ a = tmp;
+ }
+
+ s->dgst[0] += a;
+ s->dgst[1] += b;
+ s->dgst[2] += c;
+ s->dgst[3] += d;
+ s->dgst[4] += e;
+}
diff --git a/c/sha1/sha1-x86.c b/c/sha1/sha1-x86.c
new file mode 100644
index 0000000..b7cf000
--- /dev/null
+++ b/c/sha1/sha1-x86.c
@@ -0,0 +1,255 @@
+#include <endian.h>
+#include <errno.h>
+#include <immintrin.h>
+#include <string.h>
+
+#include "sha1.h"
+
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+static void sha1hashblk(sha1_t *, const uint8_t *);
+
+void
+sha1init(sha1_t *s)
+{
+ static const uint32_t H[] = {
+ 0x67452301,
+ 0xEFCDAB89,
+ 0x98BADCFE,
+ 0x10325476,
+ 0xC3D2E1F0,
+ };
+ memcpy(s->dgst, H, sizeof(H));
+ s->msgsz = s->bufsz = 0;
+}
+
+int
+sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz)
+{
+ if (s->msgsz + (msgsz * 8) < s->msgsz)
+ return EOVERFLOW;
+
+ s->msgsz += msgsz * 8;
+
+ while (msgsz != 0) {
+ size_t free_space = SHA1BLKSZ - s->bufsz;
+ size_t ncpy = MIN(msgsz, free_space);
+ memcpy(s->buf + s->bufsz, msg, ncpy);
+ s->bufsz += ncpy;
+ msg += ncpy;
+ msgsz -= ncpy;
+
+ if (s->bufsz == SHA1BLKSZ) {
+ sha1hashblk(s, s->buf);
+ s->bufsz = 0;
+ }
+ }
+
+ return 0;
+}
+
+void
+sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ])
+{
+ s->buf[s->bufsz++] = 0x80;
+
+ if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) {
+ while (s->bufsz < SHA1BLKSZ)
+ s->buf[s->bufsz++] = 0;
+ sha1hashblk(s, s->buf);
+ s->bufsz = 0;
+ }
+
+ while (s->bufsz < 56)
+ s->buf[s->bufsz++] = 0;
+ ((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz);
+
+ sha1hashblk(s, s->buf);
+
+ for (int i = 0; i < lengthof(s->dgst); i++)
+ ((uint32_t *)dgst)[i] = htobe32(s->dgst[i]);
+}
+
+static void
+sha1hashblk(sha1_t *s, const uint8_t *blk)
+{
+ __m128i abcd, e0, e1;
+ __m128i abcd_save, e_save;
+ __m128i msg0, msg1, msg2, msg3;
+
+ /* Masks for swapping endianness. We make BSWAPDMSK a macro to
+ please the compiler (it wants immediate values). */
+#define bswapdmsk 0x1B /* 0b00'01'10'11 */
+ const __m128i bswapbmsk = _mm_set_epi64x(
+ 0x0001020304050607ULL,
+ 0x08090a0b0c0d0e0fULL
+ );
+
+ const __m128i *blkx = (const __m128i *)blk;
+
+ abcd = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *)s->dgst), bswapdmsk);
+ e0 = _mm_set_epi32(s->dgst[4], 0, 0, 0);
+
+ abcd_save = abcd;
+ e_save = e0;
+
+ /* Rounds 0–3 */
+ msg0 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 0), bswapbmsk);
+ e0 = _mm_add_epi32(e0, msg0);
+ e1 = abcd;
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+
+ /* Rounds 4–7 */
+ msg1 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 1), bswapbmsk);
+ e1 = _mm_sha1nexte_epu32(e1, msg1);
+ e0 = abcd;
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
+ msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+
+ /* Rounds 8–11 */
+ msg2 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 2), bswapbmsk);
+ e0 = _mm_sha1nexte_epu32(e0, msg2);
+ e1 = abcd;
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+ msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+ msg0 = _mm_xor_si128(msg0, msg2);
+
+ /* Rounds 12–15 */
+ msg3 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 3), bswapbmsk);
+ e1 = _mm_sha1nexte_epu32(e1, msg3);
+ e0 = abcd;
+ msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
+ msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+ msg1 = _mm_xor_si128(msg1, msg3);
+
+ /* Rounds 16–19 */
+ e0 = _mm_sha1nexte_epu32(e0, msg0);
+ e1 = abcd;
+ msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+ msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+ msg2 = _mm_xor_si128(msg2, msg0);
+
+ /* Rounds 20–23 */
+ e1 = _mm_sha1nexte_epu32(e1, msg1);
+ e0 = abcd;
+ msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+ msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+ msg3 = _mm_xor_si128(msg3, msg1);
+
+ /* Rounds 24–27 */
+ e0 = _mm_sha1nexte_epu32(e0, msg2);
+ e1 = abcd;
+ msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
+ msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+ msg0 = _mm_xor_si128(msg0, msg2);
+
+ /* Rounds 28–31 */
+ e1 = _mm_sha1nexte_epu32(e1, msg3);
+ e0 = abcd;
+ msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+ msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+ msg1 = _mm_xor_si128(msg1, msg3);
+
+ /* Rounds 32–35 */
+ e0 = _mm_sha1nexte_epu32(e0, msg0);
+ e1 = abcd;
+ msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
+ msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+ msg2 = _mm_xor_si128(msg2, msg0);
+
+ /* Rounds 36–39 */
+ e1 = _mm_sha1nexte_epu32(e1, msg1);
+ e0 = abcd;
+ msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+ msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+ msg3 = _mm_xor_si128(msg3, msg1);
+
+ /* Rounds 40–43 */
+ e0 = _mm_sha1nexte_epu32(e0, msg2);
+ e1 = abcd;
+ msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+ msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+ msg0 = _mm_xor_si128(msg0, msg2);
+
+ /* Rounds 44–47 */
+ e1 = _mm_sha1nexte_epu32(e1, msg3);
+ e0 = abcd;
+ msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
+ msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+ msg1 = _mm_xor_si128(msg1, msg3);
+
+ /* Rounds 48–51 */
+ e0 = _mm_sha1nexte_epu32(e0, msg0);
+ e1 = abcd;
+ msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+ msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+ msg2 = _mm_xor_si128(msg2, msg0);
+
+ /* Rounds 52–55 */
+ e1 = _mm_sha1nexte_epu32(e1, msg1);
+ e0 = abcd;
+ msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
+ msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+ msg3 = _mm_xor_si128(msg3, msg1);
+
+ /* Rounds 56–59 */
+ e0 = _mm_sha1nexte_epu32(e0, msg2);
+ e1 = abcd;
+ msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+ msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+ msg0 = _mm_xor_si128(msg0, msg2);
+
+ /* Rounds 60–63 */
+ e1 = _mm_sha1nexte_epu32(e1, msg3);
+ e0 = abcd;
+ msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+ msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+ msg1 = _mm_xor_si128(msg1, msg3);
+
+ /* Rounds 64–67 */
+ e0 = _mm_sha1nexte_epu32(e0, msg0);
+ e1 = abcd;
+ msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
+ msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+ msg2 = _mm_xor_si128(msg2, msg0);
+
+ /* Rounds 68–71 */
+ e1 = _mm_sha1nexte_epu32(e1, msg1);
+ e0 = abcd;
+ msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+ msg3 = _mm_xor_si128(msg3, msg1);
+
+ /* Rounds 72–75 */
+ e0 = _mm_sha1nexte_epu32(e0, msg2);
+ e1 = abcd;
+ msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+ abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
+
+ /* Rounds 76–79 */
+ e1 = _mm_sha1nexte_epu32(e1, msg3);
+ e0 = abcd;
+ abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+
+ e0 = _mm_sha1nexte_epu32(e0, e_save);
+ abcd = _mm_add_epi32(abcd, abcd_save);
+
+ _mm_storeu_si128((__m128i *)s->dgst, _mm_shuffle_epi32(abcd, bswapdmsk));
+ s->dgst[4] = _mm_extract_epi32(e0, 3);
+#undef bswapdmsk
+}
diff --git a/c/sha1/sha1.h b/c/sha1/sha1.h
new file mode 100644
index 0000000..a01d116
--- /dev/null
+++ b/c/sha1/sha1.h
@@ -0,0 +1,21 @@
+#ifndef SHA1_SHA1_H
+#define SHA1_SHA1_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define SHA1DGSTSZ 20
+#define SHA1BLKSZ 64
+
+typedef struct {
+ uint32_t dgst[5];
+ uint64_t msgsz;
+ uint8_t buf[64];
+ size_t bufsz;
+} sha1_t;
+
+void sha1init(sha1_t *);
+int sha1hash(sha1_t *, const uint8_t *, size_t);
+void sha1end(sha1_t *, uint8_t[SHA1DGSTSZ]);
+
+#endif /* !SHA1_SHA1_H */