From e6d02a085533134ee6bf1bca862b03968a83b591 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 25 Aug 2024 00:24:53 +0200 Subject: Rename isascii-avx to simd-isascii --- c/simd-isascii/.gitignore | 1 + c/simd-isascii/Makefile | 8 ++++ c/simd-isascii/isascii.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 c/simd-isascii/.gitignore create mode 100644 c/simd-isascii/Makefile create mode 100644 c/simd-isascii/isascii.c (limited to 'c/simd-isascii') diff --git a/c/simd-isascii/.gitignore b/c/simd-isascii/.gitignore new file mode 100644 index 0000000..4328e16 --- /dev/null +++ b/c/simd-isascii/.gitignore @@ -0,0 +1 @@ +isascii diff --git a/c/simd-isascii/Makefile b/c/simd-isascii/Makefile new file mode 100644 index 0000000..3c49e43 --- /dev/null +++ b/c/simd-isascii/Makefile @@ -0,0 +1,8 @@ +include ../base.mk + +all: isascii +isascii: isascii.c + $(CC) $(CFLAGS) -lbsd -o $@ $< + +clean: + rm -f isascii diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c new file mode 100644 index 0000000..612aa49 --- /dev/null +++ b/c/simd-isascii/isascii.c @@ -0,0 +1,99 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define VECWDTH (512 / 8) + +static const unsigned char *readfile(const char *, size_t *); + +static const uint8_t charmsk[64] = { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +bool +strisascii(const unsigned char *s, size_t n) +{ + __m512i msk = _mm512_loadu_epi8(charmsk); + while (n >= VECWDTH) { + if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0) + return false; + s += VECWDTH; + n -= VECWDTH; + } + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +bool +strisascii_dumb(const unsigned char *s, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s file\n", getprogname()); + exit(EXIT_FAILURE); + } + + size_t len; + int rv = EXIT_SUCCESS; + const unsigned char *beg = readfile(argv[1], &len); + + clock_t tmbeg = clock(); + if (!strisascii(beg, len)) + puts("Non-ASCII"); + printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + + tmbeg = clock(); + if (!strisascii_dumb((const unsigned char *)beg, len)) + puts("Non-ASCII"); + printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + + munmap((void *)beg, len); + return rv; +} + +const unsigned char * +readfile(const char *filename, size_t *n) +{ + int fd = open(filename, O_RDONLY); + if (fd == -1) + err(1, "open: %s", filename); + + struct stat sb; + if (fstat(fd, &sb) == -1) + err(1, "fstat: %s", filename); + + *n = sb.st_size; + const unsigned char *p = mmap(NULL, *n, PROT_READ, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) + err(1, "mmap: %s", filename); + return p; +} -- cgit v1.2.3