From f6e7e761f4a42df9b975cd8c3b1e551d845a6d46 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Mon, 24 Jun 2024 15:34:36 +0200 Subject: Utilize SSE4.1 to skip comments at 2x speed --- make.c | 71 +++++++++++++++++++++++++++++++++++++++++++---------- src/lexer-generic.c | 28 +++++++++++++++++++++ src/lexer-sse4_1.c | 43 ++++++++++++++++++++++++++++++++ src/lexer.c | 30 ++-------------------- 4 files changed, 131 insertions(+), 41 deletions(-) create mode 100644 src/lexer-generic.c create mode 100644 src/lexer-sse4_1.c diff --git a/make.c b/make.c index f96a039..598db1d 100644 --- a/make.c +++ b/make.c @@ -366,33 +366,78 @@ mkgmp(int nprocs) bool tagvalid(const char *file) { - if (strstr(file, "-avx2.") != NULL && (simd_flags & SIMD_AVX2) == 0) - return false; - if (strstr(file, "-neon.") != NULL && (simd_flags & SIMD_NEON) == 0) - return false; - if (strstr(file, "-sse4_1.") != NULL && (simd_flags & SIMD_SSE4_1) == 0) - return false; - return true; + /* No tag */ + if (strchr(file, '-') == NULL) + return true; + + char *want_and_have = NULL; + static struct pair { + char *tag; + int flag; + } tags[] = { + {"avx2", SIMD_AVX2 }, + {"sse4_1", SIMD_SSE4_1}, + {"neon", SIMD_NEON }, + {"generic", 0 }, + }; + + size_t len = strlen(file); + char *buf = malloc(len + sizeof("generic")); + assert(buf != NULL); + for (size_t i = 0; i < lengthof(tags); i++) { + char *sep = strrchr(file, '-'); + char *ext = strrchr(file, '.'); + assert(sep != NULL); + assert(ext != NULL); + sprintf(buf, "%.*s-%s%s", (int)(sep - file), file, tags[i].tag, ext); + + if (fexists(buf) + && ((simd_flags & tags[i].flag) != 0 || tags[i].flag == 0)) + { + want_and_have = buf; + break; + } + } + + bool ret; + if (want_and_have == NULL) + ret = false; + else + ret = strcmp(want_and_have, file) == 0; + + free(buf); + return ret; } void chk_cpu_flags(void) { + uint32_t exx; + (void)exx; /* Might be unused */ + if (!rflag) return; -#if __GNUC__ && __x86_64__ - uint32_t exx; + /* Test for AVX512 */ +#if __AVX512F__ + simd_flags |= SIMD_AVX2; +#elif __GNUC__ && __x86_64__ asm volatile("cpuid" : "=b"(exx) : "a"(7), "c"(0)); - if (exx & (1 << 5)) { + if (exx & (1 << 5)) simd_flags |= SIMD_AVX2; - return; - } +#endif + /* Test for SSE4.1 */ +#if __SSE4_1__ + simd_flags |= SIMD_SSE4_1; +#elif __GNUC__ && __x86_64__ asm volatile("cpuid" : "=c"(exx) : "a"(1), "c"(0)); if (exx & (1 << 19)) simd_flags |= SIMD_SSE4_1; -#elif __ARM_NEON +#endif + + /* Test for NEON */ +#if __ARM_NEON simd_flags |= SIMD_NEON; #endif } diff --git a/src/lexer-generic.c b/src/lexer-generic.c new file mode 100644 index 0000000..b841886 --- /dev/null +++ b/src/lexer-generic.c @@ -0,0 +1,28 @@ +#include + +#include "common.h" +#include "types.h" + +bool +skpcmnt(const uchar **ptr, const uchar *end) +{ + int nst = 1; + const uchar *p = *ptr; + + for (p++; likely(p < end); p++) { + if (p + 1 < end) { + if (p[0] == '*' && p[1] == '/') { + p++; + if (--nst == 0) { + *ptr = ++p; + return true; + } + } else if (p[0] == '/' && p[1] == '*') { + p++; + nst++; + } + } + } + + return false; +} diff --git a/src/lexer-sse4_1.c b/src/lexer-sse4_1.c new file mode 100644 index 0000000..16df370 --- /dev/null +++ b/src/lexer-sse4_1.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#include "common.h" +#include "types.h" + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +bool +skpcmnt(const uchar **ptr, const uchar *end) +{ + int nst = 1; + const uchar *p = *ptr, needles[] = {'/', '*', 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + const __m128i set = _mm_loadu_si128((const __m128i *)needles); + + while (likely(p < end)) { + ptrdiff_t len = end - p; + size_t blksz = MIN(len, 16); + __m128i blk = _mm_loadu_si128((const __m128i *)p); + int off = _mm_cmpestri(set, 2, blk, blksz, _SIDD_CMP_EQUAL_ANY); + + if (off == 16) { + p += 16; + continue; + } + + if (p[off] == '*' && p[off + 1] == '/') { + p += off + 2; + if (--nst == 0) { + *ptr = p; + return true; + } + } else if (p[off] == '/' && p[off + 1] == '*') { + p += off + 2; + nst++; + } else + p += off + 1; + } + + return false; +} diff --git a/src/lexer.c b/src/lexer.c index ed2414a..0ed057d 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -32,7 +32,7 @@ static void lexemesresz(lexemes_t *toks) /* Advance PTR (which points to the start of a comment) to the end of the comment, or END. Returns true if the comment was well-formed and false if the comment was unterminated. Handles nested comments. */ -static bool skip_comment(const uchar **ptr, const uchar *end) +bool skpcmnt(const uchar **ptr, const uchar *end) __attribute__((nonnull)); static const bool is_numeric_lookup[UCHAR_MAX + 1] = { @@ -88,7 +88,7 @@ lexstring(const uchar *code, size_t codesz) /* Single- or double-byte literals */ case '/': if (code < end && code[0] == '*') { - if (!skip_comment(&code, end)) + if (!skpcmnt(&code, end)) err("Unterminated comment at byte %td", code - start); continue; } @@ -172,32 +172,6 @@ fallback: return data; } -bool -skip_comment(const uchar **ptr, const uchar *end) -{ - int nst = 1; - const uchar *p = *ptr; - - for (p++; likely(p < end); p++) { - if (p + 1 < end) { - if (p[0] == '*' && p[1] == '/') { - p++; - if (--nst == 0) - goto out; - } else if (p[0] == '/' && p[1] == '*') { - p++; - nst++; - } - } - } - - return false; - -out: - *ptr = ++p; - return true; -} - lexemes_t mklexemes(void) { -- cgit v1.2.3