diff options
Diffstat (limited to 'c')
-rw-r--r-- | c/simd-isascii/README | 8 | ||||
-rw-r--r-- | c/simd-isascii/isascii.c | 36 |
2 files changed, 37 insertions, 7 deletions
diff --git a/c/simd-isascii/README b/c/simd-isascii/README new file mode 100644 index 0000000..22ba633 --- /dev/null +++ b/c/simd-isascii/README @@ -0,0 +1,8 @@ +You can generate test data via the following command, and then just +use ‘echo foo >>out’ to write some non-ASCII into the stream. + + $ tr -cd ' -~' </dev/urandom | head -c BYTES >out + +Findings: +1. GCC Doesn’t auto-vectorize the generic loop +2. The AVX-2 version out-performs AVX-512 by a huge margin diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c index 299894f..24ef4fc 100644 --- a/c/simd-isascii/isascii.c +++ b/c/simd-isascii/isascii.c @@ -12,19 +12,36 @@ #include <bsd/stdlib.h> -#define VECWDTH (512 / 8) - static const unsigned char *readfile(const char *, size_t *); bool -strisascii(const unsigned char *s, size_t n) +strisascii_avx512(const unsigned char *s, size_t n) { __m512i msk = _mm512_set1_epi8((char)(1 << 7)); - while (n >= VECWDTH) { + while (n >= sizeof(__m512i)) { if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0) return false; - s += VECWDTH; - n -= VECWDTH; + s += sizeof(__m512i); + n -= sizeof(__m512i); + } + for (size_t i = 0; i < n; i++) { + if (s[i] > 0x7F) + return false; + } + return true; +} + +bool +strisascii_avx2(const unsigned char *s, size_t n) +{ + __m256i msk = _mm256_set1_epi8((char)(1 << 7)); + while (n >= sizeof(__m256i)) { + __m256i v = + _mm256_and_si256(_mm256_loadu_si256((const __m256i *)s), msk); + if (_mm256_movemask_epi8(v) != 0) + return false; + s += sizeof(__m256i); + n -= sizeof(__m256i); } for (size_t i = 0; i < n; i++) { if (s[i] > 0x7F) @@ -56,11 +73,16 @@ main(int argc, char **argv) const unsigned char *beg = readfile(argv[1], &len); clock_t tmbeg = clock(); - if (!strisascii(beg, len)) + if (!strisascii_avx512(beg, len)) puts("Non-ASCII"); printf("Elapsed time (AVX-512): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); tmbeg = clock(); + if (!strisascii_avx2((const unsigned char *)beg, len)) + puts("Non-ASCII"); + printf("Elapsed time (AVX-2): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); + + tmbeg = clock(); if (!strisascii_dumb((const unsigned char *)beg, len)) puts("Non-ASCII"); printf("Elapsed time (Generic): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC); |