aboutsummaryrefslogtreecommitdiff
path: root/c/simd-isascii
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-08-25 00:24:53 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-08-25 00:24:53 +0200
commite6d02a085533134ee6bf1bca862b03968a83b591 (patch)
tree3ea799873b92e00cf05aa7844f69df65c76c5966 /c/simd-isascii
parent3db6b4427de43ec3ab54f6cec3e1a014780d6890 (diff)
Rename isascii-avx to simd-isascii
Diffstat (limited to 'c/simd-isascii')
-rw-r--r--c/simd-isascii/.gitignore1
-rw-r--r--c/simd-isascii/Makefile8
-rw-r--r--c/simd-isascii/isascii.c99
3 files changed, 108 insertions, 0 deletions
diff --git a/c/simd-isascii/.gitignore b/c/simd-isascii/.gitignore
new file mode 100644
index 0000000..4328e16
--- /dev/null
+++ b/c/simd-isascii/.gitignore
@@ -0,0 +1 @@
+isascii
diff --git a/c/simd-isascii/Makefile b/c/simd-isascii/Makefile
new file mode 100644
index 0000000..3c49e43
--- /dev/null
+++ b/c/simd-isascii/Makefile
@@ -0,0 +1,8 @@
+include ../base.mk
+
+all: isascii
+isascii: isascii.c
+ $(CC) $(CFLAGS) -lbsd -o $@ $<
+
+clean:
+ rm -f isascii
diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c
new file mode 100644
index 0000000..612aa49
--- /dev/null
+++ b/c/simd-isascii/isascii.c
@@ -0,0 +1,99 @@
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <immintrin.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bsd/stdlib.h>
+
+#define VECWDTH (512 / 8)
+
+static const unsigned char *readfile(const char *, size_t *);
+
+static const uint8_t charmsk[64] = {
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+};
+
+bool
+strisascii(const unsigned char *s, size_t n)
+{
+ __m512i msk = _mm512_loadu_epi8(charmsk);
+ while (n >= VECWDTH) {
+ if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0)
+ return false;
+ s += VECWDTH;
+ n -= VECWDTH;
+ }
+ for (size_t i = 0; i < n; i++) {
+ if (s[i] > 0x7F)
+ return false;
+ }
+ return true;
+}
+
+bool
+strisascii_dumb(const unsigned char *s, size_t n)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (s[i] > 0x7F)
+ return false;
+ }
+ return true;
+}
+
+int
+main(int argc, char **argv)
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s file\n", getprogname());
+ exit(EXIT_FAILURE);
+ }
+
+ size_t len;
+ int rv = EXIT_SUCCESS;
+ const unsigned char *beg = readfile(argv[1], &len);
+
+ clock_t tmbeg = clock();
+ if (!strisascii(beg, len))
+ puts("Non-ASCII");
+ printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC);
+
+ tmbeg = clock();
+ if (!strisascii_dumb((const unsigned char *)beg, len))
+ puts("Non-ASCII");
+ printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC);
+
+ munmap((void *)beg, len);
+ return rv;
+}
+
+const unsigned char *
+readfile(const char *filename, size_t *n)
+{
+ int fd = open(filename, O_RDONLY);
+ if (fd == -1)
+ err(1, "open: %s", filename);
+
+ struct stat sb;
+ if (fstat(fd, &sb) == -1)
+ err(1, "fstat: %s", filename);
+
+ *n = sb.st_size;
+ const unsigned char *p = mmap(NULL, *n, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED)
+ err(1, "mmap: %s", filename);
+ return p;
+}