From c0e5d9280dd156631bd398463dbb6a65aff37d19 Mon Sep 17 00:00:00 2001
From: Thomas Voss <mail@thomasvoss.com>
Date: Thu, 15 Aug 2024 17:10:10 +0200
Subject: Implement SHA-1

---
 c/sha1/.gitignore            |   2 +
 c/sha1/Makefile              |  15 +++
 c/sha1/main.c                |  37 +++++++
 "c/sha1/sha1-na\303\257ve.c" | 140 ++++++++++++++++++++++++
 c/sha1/sha1-x86.c            | 255 +++++++++++++++++++++++++++++++++++++++++++
 c/sha1/sha1.h                |  21 ++++
 6 files changed, 470 insertions(+)
 create mode 100644 c/sha1/.gitignore
 create mode 100644 c/sha1/Makefile
 create mode 100644 c/sha1/main.c
 create mode 100644 "c/sha1/sha1-na\303\257ve.c"
 create mode 100644 c/sha1/sha1-x86.c
 create mode 100644 c/sha1/sha1.h

(limited to 'c')

diff --git a/c/sha1/.gitignore b/c/sha1/.gitignore
new file mode 100644
index 0000000..d6e6ddc
--- /dev/null
+++ b/c/sha1/.gitignore
@@ -0,0 +1,2 @@
+sha1-*
+!sha1-*.c
diff --git a/c/sha1/Makefile b/c/sha1/Makefile
new file mode 100644
index 0000000..4012c1d
--- /dev/null
+++ b/c/sha1/Makefile
@@ -0,0 +1,15 @@
+all: sha1-naïve sha1-x86
+
+sha1-naïve: main.c sha1-naïve.c
+	cc -flto -O3 -o $@ main.c $@.c
+
+sha1-x86: main.c sha1-x86.c
+	cc -flto -O3 -msha -msse4.1 -o $@ main.c $@.c
+
+expected:
+	{ yes 'a' | tr -d '\n' | head -c1000000;   \
+	  yes 'b' | tr -d '\n' | head -c1000000; } \
+	| sha1sum | cut -d' ' -f1
+
+clean:
+	rm -f sha1-naïve sha1-x86
diff --git a/c/sha1/main.c b/c/sha1/main.c
new file mode 100644
index 0000000..9ea4aab
--- /dev/null
+++ b/c/sha1/main.c
@@ -0,0 +1,37 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sha1.h"
+
+static uint8_t bigbuf1[1000000];
+static uint8_t bigbuf2[1000000];
+
+int
+main(int argc, char **argv)
+{
+	(void)argc;
+
+	int e = 0;
+	sha1_t s;
+	uint8_t dgst[SHA1DGSTSZ];
+
+	memset(bigbuf1, 'a', sizeof(bigbuf1));
+	memset(bigbuf2, 'b', sizeof(bigbuf2));
+
+	sha1init(&s);
+	e |= sha1hash(&s, bigbuf1, sizeof(bigbuf1));
+	e |= sha1hash(&s, bigbuf2, sizeof(bigbuf2));
+	sha1end(&s, dgst);
+
+	if (e != 0) {
+		fprintf(stderr, "%s: %s\n", argv[0], strerror(e));
+		exit(EXIT_FAILURE);
+	}
+
+	for (int i = 0; i < sizeof(dgst); i++)
+		printf("%02" PRIx8, dgst[i]);
+	putchar('\n');
+	return EXIT_SUCCESS;
+}
diff --git "a/c/sha1/sha1-na\303\257ve.c" "b/c/sha1/sha1-na\303\257ve.c"
new file mode 100644
index 0000000..6f1f257
--- /dev/null
+++ "b/c/sha1/sha1-na\303\257ve.c"
@@ -0,0 +1,140 @@
+#include <endian.h>
+#include <errno.h>
+#include <string.h>
+
+#include "sha1.h"
+
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+#define MIN(x, y)    ((x) < (y) ? (x) : (y))
+
+static void sha1hashblk(sha1_t *, const uint8_t *);
+
+static const uint32_t K[] = {
+	0x5A827999,
+	0x6ED9EBA1,
+	0x8F1BBCDC,
+	0xCA62C1D6,
+};
+
+static inline uint32_t
+rotl32(uint32_t x, uint8_t bits)
+{
+#if (__GNUC__ || __TINYC__) && __x86_64__
+	asm ("roll %1, %0" : "+r" (x) : "c" (bits) : "cc");
+	return x;
+#else
+	return (x << bits) | (x >> (32 - bits));
+#endif
+}
+
+void
+sha1init(sha1_t *s)
+{
+	static const uint32_t H[] = {
+		0x67452301,
+		0xEFCDAB89,
+		0x98BADCFE,
+		0x10325476,
+		0xC3D2E1F0,
+	};
+	memcpy(s->dgst, H, sizeof(H));
+	s->msgsz = s->bufsz = 0;
+}
+
+int
+sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz)
+{
+	if (s->msgsz + (msgsz * 8) < s->msgsz)
+		return EOVERFLOW;
+
+	s->msgsz += msgsz * 8;
+
+	while (msgsz != 0) {
+		size_t free_space = SHA1BLKSZ - s->bufsz;
+		size_t ncpy = MIN(msgsz, free_space);
+		memcpy(s->buf + s->bufsz, msg, ncpy);
+		s->bufsz += ncpy;
+		msg += ncpy;
+		msgsz -= ncpy;
+
+		if (s->bufsz == SHA1BLKSZ) {
+			sha1hashblk(s, s->buf);
+			s->bufsz = 0;
+		}
+	}
+
+	return 0;
+}
+
+void
+sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ])
+{
+	s->buf[s->bufsz++] = 0x80;
+
+	if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) {
+		while (s->bufsz < SHA1BLKSZ)
+			s->buf[s->bufsz++] = 0;
+		sha1hashblk(s, s->buf);
+		s->bufsz = 0;
+	}
+
+	while (s->bufsz < 56)
+		s->buf[s->bufsz++] = 0;
+	((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz);
+
+	sha1hashblk(s, s->buf);
+
+	for (int i = 0; i < lengthof(s->dgst); i++)
+		((uint32_t *)dgst)[i] = htobe32(s->dgst[i]);
+}
+
+static void
+sha1hashblk(sha1_t *s, const uint8_t *blk)
+{
+	uint32_t w[80];
+	uint32_t a, b, c, d, e, tmp;
+
+	for (int i = 0; i < 16; i++)
+		w[i] = htobe32(((uint32_t *)blk)[i]);
+	for (int i = 16; i < 32; i++)
+		w[i] = rotl32(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1);
+	for (int i = 32; i < 80; i++)
+		w[i] = rotl32(w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32], 2);
+
+	a = s->dgst[0];
+	b = s->dgst[1];
+	c = s->dgst[2];
+	d = s->dgst[3];
+	e = s->dgst[4];
+
+	for (int i = 0; i < 80; i++) {
+		uint32_t f, k;
+
+		if (i < 20) {
+			f = b&c | ~b&d;
+			k = K[0];
+		} else if (i < 40) {
+			f = b ^ c ^ d;
+			k = K[1];
+		} else if (i < 60) {
+			f = b&c | b&d | c&d;
+			k = K[2];
+		} else {
+			f = b ^ c ^ d;
+			k = K[3];
+		}
+
+		tmp = rotl32(a, 5) + f + e + w[i] + k;
+		e = d;
+		d = c;
+		c = rotl32(b, 30);
+		b = a;
+		a = tmp;
+	}
+
+	s->dgst[0] += a;
+	s->dgst[1] += b;
+	s->dgst[2] += c;
+	s->dgst[3] += d;
+	s->dgst[4] += e;
+}
diff --git a/c/sha1/sha1-x86.c b/c/sha1/sha1-x86.c
new file mode 100644
index 0000000..b7cf000
--- /dev/null
+++ b/c/sha1/sha1-x86.c
@@ -0,0 +1,255 @@
+#include <endian.h>
+#include <errno.h>
+#include <immintrin.h>
+#include <string.h>
+
+#include "sha1.h"
+
+#define lengthof(xs) (sizeof(xs) / sizeof(*(xs)))
+#define MIN(x, y)    ((x) < (y) ? (x) : (y))
+
+static void sha1hashblk(sha1_t *, const uint8_t *);
+
+void
+sha1init(sha1_t *s)
+{
+	static const uint32_t H[] = {
+		0x67452301,
+		0xEFCDAB89,
+		0x98BADCFE,
+		0x10325476,
+		0xC3D2E1F0,
+	};
+	memcpy(s->dgst, H, sizeof(H));
+	s->msgsz = s->bufsz = 0;
+}
+
+int
+sha1hash(sha1_t *s, const uint8_t *msg, size_t msgsz)
+{
+	if (s->msgsz + (msgsz * 8) < s->msgsz)
+		return EOVERFLOW;
+
+	s->msgsz += msgsz * 8;
+
+	while (msgsz != 0) {
+		size_t free_space = SHA1BLKSZ - s->bufsz;
+		size_t ncpy = MIN(msgsz, free_space);
+		memcpy(s->buf + s->bufsz, msg, ncpy);
+		s->bufsz += ncpy;
+		msg += ncpy;
+		msgsz -= ncpy;
+
+		if (s->bufsz == SHA1BLKSZ) {
+			sha1hashblk(s, s->buf);
+			s->bufsz = 0;
+		}
+	}
+
+	return 0;
+}
+
+void
+sha1end(sha1_t *s, uint8_t dgst[SHA1DGSTSZ])
+{
+	s->buf[s->bufsz++] = 0x80;
+
+	if (s->bufsz > SHA1BLKSZ - sizeof(uint64_t)) {
+		while (s->bufsz < SHA1BLKSZ)
+			s->buf[s->bufsz++] = 0;
+		sha1hashblk(s, s->buf);
+		s->bufsz = 0;
+	}
+
+	while (s->bufsz < 56)
+		s->buf[s->bufsz++] = 0;
+	((uint64_t *)s->buf)[SHA1BLKSZ/8 - 1] = htobe64(s->msgsz);
+
+	sha1hashblk(s, s->buf);
+
+	for (int i = 0; i < lengthof(s->dgst); i++)
+		((uint32_t *)dgst)[i] = htobe32(s->dgst[i]);
+}
+
+static void
+sha1hashblk(sha1_t *s, const uint8_t *blk)
+{
+    __m128i abcd, e0, e1;
+	__m128i abcd_save, e_save;
+    __m128i msg0, msg1, msg2, msg3;
+
+	/* Masks for swapping endianness.  We make BSWAPDMSK a macro to
+	   please the compiler (it wants immediate values). */
+#define bswapdmsk 0x1B  /* 0b00'01'10'11 */
+    const __m128i bswapbmsk = _mm_set_epi64x(
+		0x0001020304050607ULL,
+		0x08090a0b0c0d0e0fULL
+	);
+
+	const __m128i *blkx = (const __m128i *)blk;
+
+	abcd = _mm_shuffle_epi32(_mm_loadu_si128((__m128i *)s->dgst), bswapdmsk);
+    e0 = _mm_set_epi32(s->dgst[4], 0, 0, 0);
+
+	abcd_save = abcd;
+	e_save = e0;
+
+	/* Rounds 0–3 */
+	msg0 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 0), bswapbmsk);
+	e0 = _mm_add_epi32(e0, msg0);
+	e1 = abcd;
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+
+	/* Rounds 4–7 */
+	msg1 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 1), bswapbmsk);
+	e1 = _mm_sha1nexte_epu32(e1, msg1);
+	e0 = abcd;
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
+	msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+
+	/* Rounds 8–11 */
+	msg2 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 2), bswapbmsk);
+	e0 = _mm_sha1nexte_epu32(e0, msg2);
+	e1 = abcd;
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+	msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+	msg0 = _mm_xor_si128(msg0, msg2);
+
+	/* Rounds 12–15 */
+	msg3 = _mm_shuffle_epi8(_mm_loadu_si128(blkx + 3), bswapbmsk);
+	e1 = _mm_sha1nexte_epu32(e1, msg3);
+	e0 = abcd;
+	msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
+	msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+	msg1 = _mm_xor_si128(msg1, msg3);
+
+	/* Rounds 16–19 */
+	e0 = _mm_sha1nexte_epu32(e0, msg0);
+	e1 = abcd;
+	msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
+	msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+	msg2 = _mm_xor_si128(msg2, msg0);
+
+	/* Rounds 20–23 */
+	e1 = _mm_sha1nexte_epu32(e1, msg1);
+	e0 = abcd;
+	msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+	msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+	msg3 = _mm_xor_si128(msg3, msg1);
+
+	/* Rounds 24–27 */
+	e0 = _mm_sha1nexte_epu32(e0, msg2);
+	e1 = abcd;
+	msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
+	msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+	msg0 = _mm_xor_si128(msg0, msg2);
+
+	/* Rounds 28–31 */
+	e1 = _mm_sha1nexte_epu32(e1, msg3);
+	e0 = abcd;
+	msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+	msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+	msg1 = _mm_xor_si128(msg1, msg3);
+
+	/* Rounds 32–35 */
+	e0 = _mm_sha1nexte_epu32(e0, msg0);
+	e1 = abcd;
+	msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
+	msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+	msg2 = _mm_xor_si128(msg2, msg0);
+
+	/* Rounds 36–39 */
+	e1 = _mm_sha1nexte_epu32(e1, msg1);
+	e0 = abcd;
+	msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
+	msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+	msg3 = _mm_xor_si128(msg3, msg1);
+
+	/* Rounds 40–43 */
+	e0 = _mm_sha1nexte_epu32(e0, msg2);
+	e1 = abcd;
+	msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+	msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+	msg0 = _mm_xor_si128(msg0, msg2);
+
+	/* Rounds 44–47 */
+	e1 = _mm_sha1nexte_epu32(e1, msg3);
+	e0 = abcd;
+	msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
+	msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+	msg1 = _mm_xor_si128(msg1, msg3);
+
+	/* Rounds 48–51 */
+	e0 = _mm_sha1nexte_epu32(e0, msg0);
+	e1 = abcd;
+	msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+	msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+	msg2 = _mm_xor_si128(msg2, msg0);
+
+	/* Rounds 52–55 */
+	e1 = _mm_sha1nexte_epu32(e1, msg1);
+	e0 = abcd;
+	msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
+	msg0 = _mm_sha1msg1_epu32(msg0, msg1);
+	msg3 = _mm_xor_si128(msg3, msg1);
+
+	/* Rounds 56–59 */
+	e0 = _mm_sha1nexte_epu32(e0, msg2);
+	e1 = abcd;
+	msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
+	msg1 = _mm_sha1msg1_epu32(msg1, msg2);
+	msg0 = _mm_xor_si128(msg0, msg2);
+
+	/* Rounds 60–63 */
+	e1 = _mm_sha1nexte_epu32(e1, msg3);
+	e0 = abcd;
+	msg0 = _mm_sha1msg2_epu32(msg0, msg3);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+	msg2 = _mm_sha1msg1_epu32(msg2, msg3);
+	msg1 = _mm_xor_si128(msg1, msg3);
+
+	/* Rounds 64–67 */
+	e0 = _mm_sha1nexte_epu32(e0, msg0);
+	e1 = abcd;
+	msg1 = _mm_sha1msg2_epu32(msg1, msg0);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
+	msg3 = _mm_sha1msg1_epu32(msg3, msg0);
+	msg2 = _mm_xor_si128(msg2, msg0);
+
+	/* Rounds 68–71 */
+	e1 = _mm_sha1nexte_epu32(e1, msg1);
+	e0 = abcd;
+	msg2 = _mm_sha1msg2_epu32(msg2, msg1);
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+	msg3 = _mm_xor_si128(msg3, msg1);
+
+	/* Rounds 72–75 */
+	e0 = _mm_sha1nexte_epu32(e0, msg2);
+	e1 = abcd;
+	msg3 = _mm_sha1msg2_epu32(msg3, msg2);
+	abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
+
+	/* Rounds 76–79 */
+	e1 = _mm_sha1nexte_epu32(e1, msg3);
+	e0 = abcd;
+	abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
+
+	e0 = _mm_sha1nexte_epu32(e0, e_save);
+	abcd = _mm_add_epi32(abcd, abcd_save);
+
+	_mm_storeu_si128((__m128i *)s->dgst, _mm_shuffle_epi32(abcd, bswapdmsk));
+    s->dgst[4] = _mm_extract_epi32(e0, 3);
+#undef bswapdmsk
+}
diff --git a/c/sha1/sha1.h b/c/sha1/sha1.h
new file mode 100644
index 0000000..a01d116
--- /dev/null
+++ b/c/sha1/sha1.h
@@ -0,0 +1,21 @@
+#ifndef SHA1_SHA1_H
+#define SHA1_SHA1_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define SHA1DGSTSZ 20
+#define SHA1BLKSZ  64
+
+typedef struct {
+	uint32_t dgst[5];
+	uint64_t msgsz;
+	uint8_t buf[64];
+	size_t bufsz;
+} sha1_t;
+
+void sha1init(sha1_t *);
+int sha1hash(sha1_t *, const uint8_t *, size_t);
+void sha1end(sha1_t *, uint8_t[SHA1DGSTSZ]);
+
+#endif /* !SHA1_SHA1_H */
-- 
cgit v1.2.3