From 3b59bedbbb6e4aeaa584d14157aa92255b3a7031 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Tue, 27 Aug 2024 00:25:00 +0200 Subject: Add AVX-2 --- c/simd-isascii/README | 8 ++++++++ c/simd-isascii/isascii.c | 36 +++++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 c/simd-isascii/README diff --git a/c/simd-isascii/README b/c/simd-isascii/README new file mode 100644 index 0000000..22ba633 --- /dev/null +++ b/c/simd-isascii/README @@ -0,0 +1,8 @@ +You can generate test data via the following command, and then just +use ‘echo foo >>out’ to write some non-ASCII into the stream. + + $ tr -cd ' -~' out + +Findings: +1. GCC Doesn’t auto-vectorize the generic loop +2. The AVX-2 version out-performs AVX-512 by a huge margin diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c index 299894f..24ef4fc 100644 --- a/c/simd-isascii/isascii.c +++ b/c/simd-isascii/isascii.c @@ -12,19 +12,36 @@ #include -#define VECWDTH (512 / 8) - static const unsigned char *readfile(const char *, size_t *); bool -strisascii(const unsigned char *s, size_t n) +strisascii_avx512(const unsigned char *s, size_t n) { __m512i msk = _mm512_set1_epi8((char)(1 << 7)); - while (n >= VECWDTH) { + while (n >= sizeof(__m512i)) { if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0) return false; - s += VECWDTH; - n -= VECWDTH; + s += sizeof(__m512i); + n -= sizeof(__m512i); + } + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +bool +strisascii_avx2(const unsigned char *s, size_t n) +{ + __m256i msk = _mm256_set1_epi8((char)(1 << 7)); + while (n >= sizeof(__m256i)) { + __m256i v = + _mm256_and_si256(_mm256_loadu_si256((const __m256i *)s), msk); + if (_mm256_movemask_epi8(v) != 0) + return false; + s += sizeof(__m256i); + n -= sizeof(__m256i); } for (size_t i = 0; i < n; i++) { if (s[i] > 0x7F) @@ -56,10 +73,15 @@ main(int argc, char **argv) const unsigned char *beg = readfile(argv[1], &len); clock_t tmbeg = clock(); - if (!strisascii(beg, len)) + if (!strisascii_avx512(beg, len)) puts("Non-ASCII"); printf("Elapsed time (AVX-512): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + tmbeg = clock(); + if (!strisascii_avx2((const unsigned char *)beg, len)) + puts("Non-ASCII"); + printf("Elapsed time (AVX-2): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + tmbeg = clock(); if (!strisascii_dumb((const unsigned char *)beg, len)) puts("Non-ASCII"); -- cgit v1.2.3