diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/unicode.c | 59 |
1 files changed, 57 insertions, 2 deletions
diff --git a/src/unicode.c b/src/unicode.c index b32b413..6ad34a3 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -1,3 +1,7 @@ +#if ORYX_BMI2 +# include <immintrin.h> +#endif + #include "common.h" #include "types.h" #include "unicode-data.h" @@ -45,9 +49,19 @@ RUNE_IS_GEN(rune_is_xidc, xidc_stage1, xidc_stage2, 128) static const char lengths[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0}; static const rune mins[] = {RUNE_C(4194304), 0, 128, 2048, RUNE_C(65536)}; +static const int shifte[] = {0, 6, 4, 2, 0}; + +#if ORYX_BMI2 +static const uint32_t pextmsk[] = { + 0x7F000000, + 0x1F3F0000, + 0x0F3F3F00, + 0x073F3F3F, +}; +#else static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const int shiftc[] = {0, 18, 12, 6, 0}; -static const int shifte[] = {0, 6, 4, 2, 0}; +#endif rune utf8_decode(const uchar **buf) @@ -55,12 +69,32 @@ utf8_decode(const uchar **buf) const uchar *s = *buf; int len = lengths[s[0] >> 3]; *buf = s + len + !len; - +#if ORYX_BMI2 + rune c = 0; + switch (len) { + case 4: + c |= (rune)s[3] << 0; + /* fallthrough */ + case 3: + c |= (rune)s[2] << 8; + /* fallthrough */ + case 2: + c |= (rune)s[1] << 16; + /* fallthrough */ + case 1: + c |= (rune)s[0] << 24; + break; + default: + __builtin_unreachable(); + } + return _pext_u32(c, pextmsk[len - 1]); +#else rune c = (rune)(s[0] & masks[len]) << 18; c |= (rune)(s[1] & 0x3f) << 12; c |= (rune)(s[2] & 0x3f) << 6; c |= (rune)(s[3] & 0x3f) << 0; return c >> shiftc[len]; +#endif } size_t @@ -72,11 +106,32 @@ utf8_validate_off(const uchar *s, size_t len) const uchar *next = s + len + !len; +#if ORYX_BMI2 + rune c = 0; + switch (len) { + case 4: + c |= (rune)s[3] << 0; + /* fallthrough */ + case 3: + c |= (rune)s[2] << 8; + /* fallthrough */ + case 2: + c |= (rune)s[1] << 16; + /* fallthrough */ + case 1: + c |= (rune)s[0] << 24; + break; + default: + __builtin_unreachable(); + } + c = _pext_u32(c, pextmsk[len - 1]); +#else rune c = (rune)(s[0] & masks[len]) << 18; c |= (rune)(s[1] & 0x3f) << 12; c |= (rune)(s[2] & 0x3f) << 6; c |= (rune)(s[3] & 0x3f) << 0; c >>= shiftc[len]; +#endif int e = (c < mins[len]) << 6; e |= ((c >> 11) == 0x1B) << 7; |