aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.c
blob: 970202a854c03c0a66db7874fac34c51c5fc592c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>

#include "errors.h"
#include "lexer.h"
#include "unicode.h"

struct lexeme *
lexstring(const char *code, size_t codesz, size_t *lcnt)
{
	struct {
		struct lexeme *p;
		size_t len, buf;
	} data = {0};

#if ORYX_SIMD
	if (!utf8_validate_simd(code, codesz)) {
#endif
		size_t off = utf8_validate_off(code, codesz);
		if (off != 0)
			err("Invalid UTF-8 at byte-offset %zu", off - 1);
#if ORYX_SIMD
	}
#endif

	const char *end = code + codesz;
	while (code < end) {
		rune ch = utf8_decode(&code);
	}

	*lcnt = data.len;
	return data.p;
}