aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2024-06-24 15:34:36 +0200
committerThomas Voss <mail@thomasvoss.com> 2024-06-24 15:34:36 +0200
commitf6e7e761f4a42df9b975cd8c3b1e551d845a6d46 (patch)
tree25d1b23d5ddf6f1df02a651291e72b4a2a8442c8 /src
parent0696c3d2ab59166e7519c2a9de273f7f498b1eaa (diff)
Utilize SSE4.1 to skip comments at 2x speed
Diffstat (limited to 'src')
-rw-r--r--src/lexer-generic.c28
-rw-r--r--src/lexer-sse4_1.c43
-rw-r--r--src/lexer.c30
3 files changed, 73 insertions, 28 deletions
diff --git a/src/lexer-generic.c b/src/lexer-generic.c
new file mode 100644
index 0000000..b841886
--- /dev/null
+++ b/src/lexer-generic.c
@@ -0,0 +1,28 @@
+#include <stdbool.h>
+
+#include "common.h"
+#include "types.h"
+
+bool
+skpcmnt(const uchar **ptr, const uchar *end)
+{
+ int nst = 1;
+ const uchar *p = *ptr;
+
+ for (p++; likely(p < end); p++) {
+ if (p + 1 < end) {
+ if (p[0] == '*' && p[1] == '/') {
+ p++;
+ if (--nst == 0) {
+ *ptr = ++p;
+ return true;
+ }
+ } else if (p[0] == '/' && p[1] == '*') {
+ p++;
+ nst++;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/src/lexer-sse4_1.c b/src/lexer-sse4_1.c
new file mode 100644
index 0000000..16df370
--- /dev/null
+++ b/src/lexer-sse4_1.c
@@ -0,0 +1,43 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <x86intrin.h>
+
+#include "common.h"
+#include "types.h"
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+bool
+skpcmnt(const uchar **ptr, const uchar *end)
+{
+ int nst = 1;
+ const uchar *p = *ptr, needles[] = {'/', '*', 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ const __m128i set = _mm_loadu_si128((const __m128i *)needles);
+
+ while (likely(p < end)) {
+ ptrdiff_t len = end - p;
+ size_t blksz = MIN(len, 16);
+ __m128i blk = _mm_loadu_si128((const __m128i *)p);
+ int off = _mm_cmpestri(set, 2, blk, blksz, _SIDD_CMP_EQUAL_ANY);
+
+ if (off == 16) {
+ p += 16;
+ continue;
+ }
+
+ if (p[off] == '*' && p[off + 1] == '/') {
+ p += off + 2;
+ if (--nst == 0) {
+ *ptr = p;
+ return true;
+ }
+ } else if (p[off] == '/' && p[off + 1] == '*') {
+ p += off + 2;
+ nst++;
+ } else
+ p += off + 1;
+ }
+
+ return false;
+}
diff --git a/src/lexer.c b/src/lexer.c
index ed2414a..0ed057d 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -32,7 +32,7 @@ static void lexemesresz(lexemes_t *toks)
/* Advance PTR (which points to the start of a comment) to the end of the
comment, or END. Returns true if the comment was well-formed and
false if the comment was unterminated. Handles nested comments. */
-static bool skip_comment(const uchar **ptr, const uchar *end)
+bool skpcmnt(const uchar **ptr, const uchar *end)
__attribute__((nonnull));
static const bool is_numeric_lookup[UCHAR_MAX + 1] = {
@@ -88,7 +88,7 @@ lexstring(const uchar *code, size_t codesz)
/* Single- or double-byte literals */
case '/':
if (code < end && code[0] == '*') {
- if (!skip_comment(&code, end))
+ if (!skpcmnt(&code, end))
err("Unterminated comment at byte %td", code - start);
continue;
}
@@ -172,32 +172,6 @@ fallback:
return data;
}
-bool
-skip_comment(const uchar **ptr, const uchar *end)
-{
- int nst = 1;
- const uchar *p = *ptr;
-
- for (p++; likely(p < end); p++) {
- if (p + 1 < end) {
- if (p[0] == '*' && p[1] == '/') {
- p++;
- if (--nst == 0)
- goto out;
- } else if (p[0] == '/' && p[1] == '*') {
- p++;
- nst++;
- }
- }
- }
-
- return false;
-
-out:
- *ptr = ++p;
- return true;
-}
-
lexemes_t
mklexemes(void)
{