aboutsummaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
Diffstat (limited to 'c')
-rw-r--r--c/simd-isascii/README8
-rw-r--r--c/simd-isascii/isascii.c36
2 files changed, 37 insertions, 7 deletions
diff --git a/c/simd-isascii/README b/c/simd-isascii/README
new file mode 100644
index 0000000..22ba633
--- /dev/null
+++ b/c/simd-isascii/README
@@ -0,0 +1,8 @@
+You can generate test data via the following command, and then just
+use ‘echo foo >>out’ to write some non-ASCII into the stream.
+
+ $ tr -cd ' -~' </dev/urandom | head -c BYTES >out
+
+Findings:
+1. GCC Doesn’t auto-vectorize the generic loop
+2. The AVX-2 version out-performs AVX-512 by a huge margin
diff --git a/c/simd-isascii/isascii.c b/c/simd-isascii/isascii.c
index 299894f..24ef4fc 100644
--- a/c/simd-isascii/isascii.c
+++ b/c/simd-isascii/isascii.c
@@ -12,19 +12,36 @@
#include <bsd/stdlib.h>
-#define VECWDTH (512 / 8)
-
static const unsigned char *readfile(const char *, size_t *);
bool
-strisascii(const unsigned char *s, size_t n)
+strisascii_avx512(const unsigned char *s, size_t n)
{
__m512i msk = _mm512_set1_epi8((char)(1 << 7));
- while (n >= VECWDTH) {
+ while (n >= sizeof(__m512i)) {
if (_mm512_test_epi8_mask(_mm512_loadu_epi8(s), msk) != 0)
return false;
- s += VECWDTH;
- n -= VECWDTH;
+ s += sizeof(__m512i);
+ n -= sizeof(__m512i);
+ }
+ for (size_t i = 0; i < n; i++) {
+ if (s[i] > 0x7F)
+ return false;
+ }
+ return true;
+}
+
+bool
+strisascii_avx2(const unsigned char *s, size_t n)
+{
+ __m256i msk = _mm256_set1_epi8((char)(1 << 7));
+ while (n >= sizeof(__m256i)) {
+ __m256i v =
+ _mm256_and_si256(_mm256_loadu_si256((const __m256i *)s), msk);
+ if (_mm256_movemask_epi8(v) != 0)
+ return false;
+ s += sizeof(__m256i);
+ n -= sizeof(__m256i);
}
for (size_t i = 0; i < n; i++) {
if (s[i] > 0x7F)
@@ -56,11 +73,16 @@ main(int argc, char **argv)
const unsigned char *beg = readfile(argv[1], &len);
clock_t tmbeg = clock();
- if (!strisascii(beg, len))
+ if (!strisascii_avx512(beg, len))
puts("Non-ASCII");
printf("Elapsed time (AVX-512): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC);
tmbeg = clock();
+ if (!strisascii_avx2((const unsigned char *)beg, len))
+ puts("Non-ASCII");
+ printf("Elapsed time (AVX-2): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC);
+
+ tmbeg = clock();
if (!strisascii_dumb((const unsigned char *)beg, len))
puts("Non-ASCII");
printf("Elapsed time (Generic): %.3fs\n", (double)(clock() - tmbeg) / CLOCKS_PER_SEC);