diff options
author | Thomas Voss <mail@thomasvoss.com> | 2024-08-25 00:24:53 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2024-08-25 00:24:53 +0200 |
commit | e6d02a085533134ee6bf1bca862b03968a83b591 (patch) | |
tree | 3ea799873b92e00cf05aa7844f69df65c76c5966 /c/simd-isascii | |
parent | 3db6b4427de43ec3ab54f6cec3e1a014780d6890 (diff) |
Rename isascii-avx to simd-isascii
Diffstat (limited to 'c/simd-isascii')
-rw-r--r-- | c/simd-isascii/.gitignore | 1 | ||||
-rw-r--r-- | c/simd-isascii/Makefile | 8 | ||||
-rw-r--r-- | c/simd-isascii/isascii.c | 99 |
3 files changed, 108 insertions, 0 deletions
diff --git a/c/simd-isascii/.gitignore b/c/simd-isascii/.gitignore new file mode 100644 index 0000000..4328e16 --- /dev/null +++ b/c/simd-isascii/.gitignore @@ -0,0 +1 @@ +isascii diff --git a/c/simd-isascii/Makefile b/c/simd-isascii/Makefile new file mode 100644 index 0000000..3c49e43 --- /dev/null +++ b/c/simd-isascii/Makefile @@ -0,0 +1,8 @@ +include ../base.mk + +all: isascii +isascii: isascii.c + $(CC) $(CFLAGS) -lbsd -o $@ $< + +clean: + rm -f isascii diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c new file mode 100644 index 0000000..612aa49 --- /dev/null +++ b/c/simd-isascii/isascii.c @@ -0,0 +1,99 @@ +#include <sys/mman.h> +#include <sys/stat.h> + +#include <err.h> +#include <fcntl.h> +#include <immintrin.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> + +#include <bsd/stdlib.h> + +#define VECWDTH (512 / 8) + +static const unsigned char *readfile(const char *, size_t *); + +static const uint8_t charmsk[64] = { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +}; + +bool +strisascii(const unsigned char *s, size_t n) +{ + __m512i msk = _mm512_loadu_epi8(charmsk); + while (n >= VECWDTH) { + if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0) + return false; + s += VECWDTH; + n -= VECWDTH; + } + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +bool +strisascii_dumb(const unsigned char *s, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s file\n", getprogname()); + exit(EXIT_FAILURE); + } + + size_t len; + int rv = EXIT_SUCCESS; + const unsigned char *beg = readfile(argv[1], &len); + + clock_t tmbeg = clock(); + if (!strisascii(beg, len)) + puts("Non-ASCII"); + printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + + tmbeg = clock(); + if (!strisascii_dumb((const unsigned char *)beg, len)) + puts("Non-ASCII"); + printf("Elapsed time: %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + + munmap((void *)beg, len); + return rv; +} + +const unsigned char * +readfile(const char *filename, size_t *n) +{ + int fd = open(filename, O_RDONLY); + if (fd == -1) + err(1, "open: %s", filename); + + struct stat sb; + if (fstat(fd, &sb) == -1) + err(1, "fstat: %s", filename); + + *n = sb.st_size; + const unsigned char *p = mmap(NULL, *n, PROT_READ, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) + err(1, "mmap: %s", filename); + return p; +} |