diff options
43 files changed, 993 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b79c58 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.cache/ +*.o +compile_commands.json @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright © 2024 Thomas Voss + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8da53a4 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +MAKEFLAGS = -j8 +srcs := $(shell find . -name '*.c') +objs := $(srcs:.c=.o) + +all: $(objs) +%.o: %.c + gcc-13 -std=c2x -Iinclude -c -o $@ $< + +clean: + rm -f $(objs) + +.PHONY: clean @@ -0,0 +1,24 @@ +MLib — Useful Standard Library Extensions + +MLib is a collection of various headers and libraries that I find useful +in a large enough number of my personal C projects. This includes for +example: common datastructures, useful non-POSIX APIs, and 21st-century +strings. + +This does not aim to be a LibC replacement, but rather a supplimentary +library. It is a C23 library with no plans to support older standards. + +The headers as of now are: + • alloc.h — memory allocation functions + • bitset.h — macros for implementing a bitset + • bob.h — bob the string-builder + • dynarr.h — macros for implementing a dynamic array + • errors.h — err.h-inspired diagnostics functions + • macros.h — miscellaneous utility macros (MIN/MAX/lengthof/etc.) + • mbstring.h — multibyte-strings + • rune.h — inttypes.h but for runes + +The headers planned for inclusion are: + • optparse.h — option parsing functions + • unicode/gbrk.h — grapheme breaking + • unicode/props.h — unicode character properties diff --git a/include/__charN_t.h b/include/__charN_t.h new file mode 100644 index 0000000..eca16b9 --- /dev/null +++ b/include/__charN_t.h @@ -0,0 +1,10 @@ +#ifndef MLIB___CHARN_T_H +#define MLIB___CHARN_T_H + +#include <stdint.h> + +typedef unsigned char char8_t; +typedef uint_least16_t char16_t; +typedef uint_least32_t char32_t; + +#endif /* !MLIB___CHARN_T_H */ diff --git a/include/__qmacros.h b/include/__qmacros.h new file mode 100644 index 0000000..d765864 --- /dev/null +++ b/include/__qmacros.h @@ -0,0 +1,25 @@ +#ifndef MLIB___QMACROS_H +#define MLIB___QMACROS_H + +/* Macros for qualifier-preserving functions. These are wrappers around some + functions declared above which will return a const-qualified pointer if the + input string is const-qualified, and a non-const-qualified pointer otherwise. + + The macros are taken from the N3020 proposal for C23. */ + +/* clang-format off */ +#define _RUNE_PTR_IS_CONST(P) \ + _Generic(1 ? (P) : (void *)(P), \ + const void *: 1, \ + default: 0) +#define _RUNE_STATIC_IF(P, T, E) \ + _Generic(&(char[!!(P) + 1]){0}, \ + char(*)[2]: T, \ + char(*)[1]: E) +#define _RUNE_Q_PTR(T, F, S, ...) \ + _RUNE_STATIC_IF(_RUNE_PTR_IS_CONST((S)), \ + (const T *)(F)(__VA_ARGS__), \ + (T *)(F)(__VA_ARGS__)) +/* clang-format on */ + +#endif /* !MLIB___QMACROS_H */ diff --git a/include/__rune.h b/include/__rune.h new file mode 100644 index 0000000..4011411 --- /dev/null +++ b/include/__rune.h @@ -0,0 +1,8 @@ +#ifndef MLIB___RUNE_H +#define MLIB___RUNE_H + +#include <stdint.h> + +typedef uint_least32_t rune; + +#endif /* !MLIB___RUNE_H */ diff --git a/include/alloc.h b/include/alloc.h new file mode 100644 index 0000000..920a9a8 --- /dev/null +++ b/include/alloc.h @@ -0,0 +1,8 @@ +#ifndef MLIB_ALLOC_H +#define MLIB_ALLOC_H + +#include <stddef.h> + +void *bufalloc(void *, size_t, size_t); + +#endif /* !MLIB_ALLOC_H */ diff --git a/include/bitset.h b/include/bitset.h new file mode 100644 index 0000000..397435f --- /dev/null +++ b/include/bitset.h @@ -0,0 +1,17 @@ +#ifndef MLIB_BITSET_H +#define MLIB_BITSET_H + +#include <limits.h> + +#define __MLIB_BITSLOT(x) ((x) / CHAR_BIT) +#define __MLIB_BITMASK(x) (1 << ((x) % CHAR_BIT)) +#define __MLIB_BITIMPL(bs, x, op) ((bs)[__MLIB_BITSLOT(x)] op __MLIB_BITMASK(x)) + +#define bitset(name, n) unsigned char name[(n + CHAR_BIT - 1) / CHAR_BIT] + +#define BITCLR(bs, x) __MLIB_BITIMPL(bs, x, &=~) +#define BITSET(bs, x) __MLIB_BITIMPL(bs, x, |=) +#define BITTOGL(bs, x) __MLIB_BITIMPL(bs, x, ^=) +#define BITTEST(bs, x) (bool)__MLIB_BITIMPL(bs, x, &) + +#endif /* !MLIB_BITSET_H */ diff --git a/include/bob.h b/include/bob.h new file mode 100644 index 0000000..f2fb429 --- /dev/null +++ b/include/bob.h @@ -0,0 +1,48 @@ +#ifndef MLIB_BOB_H +#define MLIB_BOB_H + +#include <stddef.h> + +#include "__charN_t.h" +#include "__rune.h" + +#if !__MLIB_HAS_U8VIEW +# define __MLIB_HAS_U8VIEW 1 + +struct u8view { + const char8_t *p; + size_t len; +}; +#endif + +struct u8str { + char8_t *p; + size_t len, cap; +}; + +struct u8str *u8strinit(struct u8str *, size_t); +struct u8str *u8strgrow(struct u8str *, size_t); +struct u8str *u8strfit(struct u8str *); +void u8strfree(struct u8str); + +struct u8str *u8strpushr(struct u8str *, rune); +struct u8str *u8strpushstr(struct u8str *, const char *); +struct u8str *u8strpushu8(struct u8str *, struct u8view); + +[[gnu::always_inline]] +static inline struct u8view +u8strtou8(struct u8str s) +{ + return (struct u8view){.p = s.p, .len = s.len}; +} + +#define u8strpush(sb, x) \ + _Generic((x), \ + char: u8strpushr, \ + int: u8strpushr, \ + rune: u8strpushr, \ + char *: u8strpushstr, \ + struct u8view: u8strpushu8 \ + )((sb), (x)) + +#endif /* !MLIB_BOB_H */ diff --git a/include/dynarr.h b/include/dynarr.h new file mode 100644 index 0000000..3785e18 --- /dev/null +++ b/include/dynarr.h @@ -0,0 +1,49 @@ +#ifndef MLIB_DYNARR_H +#define MLIB_DYNARR_H + +#include <string.h> + +#include "alloc.h" + +#define DAGROW(da, n) \ + do { \ + if ((n) > (a)->cap) { \ + (a)->cap = (n); \ + (a)->buf = bufalloc((a)->buf, (a)->cap, sizeof(*(a)->buf)); \ + } \ + } while (false) + +#define DAPUSH(da, x) \ + do { \ + if ((da)->len >= (da)->cap) { \ + (da)->cap = (da)->cap ? (da)->cap * 2 : 1; \ + (da)->buf = bufalloc((da)->buf, (da)->cap, sizeof(*(da)->buf)); \ + } \ + (da)->buf[(da)->len++] = (x); \ + } while (false) + +#define DAEXTEND(da, xs, n) \ + do { \ + if ((da)->len + (n) >= (da)->cap) { \ + do \ + (da)->cap = (da)->cap ? (da)->cap * 2 : 1; \ + while ((da)->len + (n) >= (da)->cap); \ + (da)->buf = bufalloc((da)->buf, (da)->cap, sizeof(*(da)->buf)); \ + } \ + memcpy((da)->buf + (da)->len, (xs), (n)); \ + (a)->len += (n); \ + } while (false) + +#define DAREMOVE(da, i) DA_REMOVE_RANGE((a), (i), (i) + 1) + +#define DA_REMOVE_RANGE(da, i, j) \ + do { \ + memmove((da)->buf + (i), (da)->buf + (j), \ + ((da)->len - (j)) * sizeof(*(da)->buf)); \ + (da)->len -= j - i; \ + } while (false) + +#define da_foreach(da, p) \ + for (auto p = (da)->buf; (size_t)(p - (da)->buf) < (da)->len; p++) + +#endif /* !MLIB_DYNARR_H */ diff --git a/include/errors.h b/include/errors.h new file mode 100644 index 0000000..00fe6dc --- /dev/null +++ b/include/errors.h @@ -0,0 +1,22 @@ +#ifndef MLIB_ERRORS_H +#define MLIB_ERRORS_H + +#include <stdarg.h> + +void setprogname(const char *); + +[[gnu::format(printf, 1, 2)]] void warn(const char *, ...); +[[gnu::format(printf, 1, 2)]] void warnx(const char *, ...); +void vwarn(const char *, va_list); +void vwarnx(const char *, va_list); + +[[noreturn, gnu::format(printf, 1, 2)]] void err(const char *, ...); +[[noreturn, gnu::format(printf, 1, 2)]] void errx(const char *, ...); +[[noreturn, gnu::format(printf, 2, 3)]] void cerr(int, const char *, ...); +[[noreturn, gnu::format(printf, 2, 3)]] void cerrx(int, const char *, ...); + +extern const char *__mlib_errors_progname; + +#define progname() (__mlib_errors_progname) + +#endif /* !MLIB_ERRORS_H */ diff --git a/include/macros.h b/include/macros.h new file mode 100644 index 0000000..2b62625 --- /dev/null +++ b/include/macros.h @@ -0,0 +1,22 @@ +#ifndef MLIB_MACROS_H +#define MLIB_MACROS_H + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +#define lengthof(a) (sizeof(a) / sizeof(*(a))) + +#define memeq(x, y, n) (!memcmp(x, y, n)) +#define streq(x, y) (!strcmp(x, y)) +#define u8eq(x, y) (!u8cmp(x, y)) + +#ifdef NDEBUG +# include <stddef.h> +# define ASSUME(p) ((p) ? (void)0 : unreachable()) +#else +# include "errors.h" +# define ASSUME(p) ((p) ? (void)0 : warnx("%s:%s:%d: assumption ‘%s’ failed", \ + __FILE__, __func__, __LINE__, #p)) +#endif + +#endif /* !MLIB_MACROS_H */ diff --git a/include/mbstring.h b/include/mbstring.h new file mode 100644 index 0000000..ddeb514 --- /dev/null +++ b/include/mbstring.h @@ -0,0 +1,53 @@ +#ifndef MLIB_MBSTRING_H +#define MLIB_MBSTRING_H + +#include <stddef.h> + +#include "__charN_t.h" +#include "__qmacros.h" +#include "__rune.h" + +#if !__MLIB_HAS_U8VIEW +# define __MLIB_HAS_U8VIEW 1 + +struct u8view { + const char8_t *p; + size_t len; +}; +#endif + +#define U8_BYTE_1(x) (((x) & 0x80) == 0x00) +#define U8_BYTE_2(x) (((x) & 0xE0) == 0xC0) +#define U8_BYTE_3(x) (((x) & 0xF0) == 0xE0) +#define U8_BYTE_4(x) (((x) & 0xF8) == 0xF0) +#define U8_BYTE_C(x) (((x) & 0xC0) == 0x80) + +static const rune U8_1B_MAX = 0x00007FL; +static const rune U8_2B_MAX = 0x0007FFL; +static const rune U8_3B_MAX = 0x00FFFFL; +static const rune U8_4B_MAX = 0x10FFFFL; + +#define PRIsU8 ".*s" +#define U8_PRI_ARGS(sv) ((int)(sv).len), ((sv).p) + +char8_t *u8chk(const char8_t *, size_t); +char8_t *u8chr(const char8_t *, rune, size_t); +char8_t *u8rchr(const char8_t *, rune, size_t); +int rtou8(char8_t *, rune, size_t); +int u8cmp(struct u8view, struct u8view); +int u8next(rune *, const char8_t **, size_t *); +int u8prev(rune *, const char8_t **, const char8_t *); +int u8tor(rune *, const char8_t *); +size_t u8cspn(const char8_t *, size_t, const rune *, size_t); +size_t u8len(const char8_t *, size_t); +size_t u8spn(const char8_t *, size_t, const rune *, size_t); + +#if !_RUNE_NO_MACRO_WRAPPER +# define u8chk(s, n) _RUNE_Q_PTR(char8_t, u8chk, (s), (s), (n)) +# define u8chr(s, ch, n) _RUNE_Q_PTR(char8_t, u8chr, (s), (s), (ch), (n)) +# define u8rchr(s, ch, n) _RUNE_Q_PTR(char8_t, u8rchr, (s), (s), (ch), (n)) +#endif + +static const int U8_LEN_MAX = 4; + +#endif /* !MLIB_MBSTRING_H */ diff --git a/include/rune.h b/include/rune.h new file mode 100644 index 0000000..2f8ce59 --- /dev/null +++ b/include/rune.h @@ -0,0 +1,36 @@ +#ifndef MLIB_RUNE_H +#define MLIB_RUNE_H + +#include <inttypes.h> + +#include "__rune.h" + +#define _RUNE_PRIDEF(c) PRI##c##LEAST32 +#define _RUNE_SCNDEF(c) SCN##c##LEAST32 + +#ifdef PRIBLEAST32 +# define PRIBRUNE _RUNE_PRIDEF(B) +#endif +#define PRIbRUNE _RUNE_PRIDEF(b) +#define PRIdRUNE _RUNE_PRIDEF(d) +#define PRIiRUNE _RUNE_PRIDEF(i) +#define PRIoRUNE _RUNE_PRIDEF(o) +#define PRIuRUNE _RUNE_PRIDEF(u) +#define PRIxRUNE _RUNE_PRIDEF(x) +#define PRIXRUNE _RUNE_PRIDEF(X) + +#define SCNbRUNE _RUNE_SCNDEF(b) +#define SCNdRUNE _RUNE_SCNDEF(d) +#define SCNiRUNE _RUNE_SCNDEF(i) +#define SCNuRUNE _RUNE_SCNDEF(u) +#define SCNoRUNE _RUNE_SCNDEF(o) +#define SCNxRUNE _RUNE_SCNDEF(x) + +#define RUNE_C(x) UINT32_C(x) + +static const rune ASCII_MAX = RUNE_C(0x00007F); +static const rune LATIN1_MAX = RUNE_C(0x0000FF); +static const rune RUNE_ERROR = RUNE_C(0x00FFFD); +static const rune RUNE_MAX = RUNE_C(0x10FFFF); + +#endif /* !MLIB_RUNE_H */ diff --git a/lib/alloc/bufalloc.c b/lib/alloc/bufalloc.c new file mode 100644 index 0000000..a81f5b9 --- /dev/null +++ b/lib/alloc/bufalloc.c @@ -0,0 +1,29 @@ +#include <errno.h> +#if __has_include(<stdckdint.h>) +# include <stdckdint.h> +# warning "stdckdint.h now available; remove manual ckd_*() implementations" +#elifdef __GNUC__ +# define ckd_add(r, a, b) ((bool)__builtin_add_overflow(a, b, r)) +# define ckd_mul(r, a, b) ((bool)__builtin_mul_overflow(a, b, r)) +#else +# define ckd_add(r, a, b) (*(r) = (a) + (b)) +# define ckd_mul(r, a, b) (*(r) = (a) * (b)) +# warning "ckd_*() not supported on the current platform" +#endif +#include <stdlib.h> + +#include "alloc.h" +#include "errors.h" + +void * +bufalloc(void *p, size_t n, size_t m) +{ + if (ckd_mul(&n, n, m)) { + errno = EOVERFLOW; + err(__func__); + } + + if (!(p = realloc(p, n))) + err(__func__); + return p; +} diff --git a/lib/bob/u8strfit.c b/lib/bob/u8strfit.c new file mode 100644 index 0000000..332a5d7 --- /dev/null +++ b/lib/bob/u8strfit.c @@ -0,0 +1,9 @@ +#include <stdlib.h> + +#include "bob.h" + +struct u8str * +u8strfit(struct u8str *b) +{ + return (b->p = realloc(b->p, b->len)) ? b : nullptr; +} diff --git a/lib/bob/u8strfree.c b/lib/bob/u8strfree.c new file mode 100644 index 0000000..7e25ca8 --- /dev/null +++ b/lib/bob/u8strfree.c @@ -0,0 +1,9 @@ +#include <stdlib.h> + +#include "bob.h" + +void +u8strfree(struct u8str b) +{ + free(b.p); +} diff --git a/lib/bob/u8strgrow.c b/lib/bob/u8strgrow.c new file mode 100644 index 0000000..f1f86d3 --- /dev/null +++ b/lib/bob/u8strgrow.c @@ -0,0 +1,39 @@ +#include <stdlib.h> + +#include "bob.h" + +static size_t nextpow2(size_t); + +struct u8str * +u8strgrow(struct u8str *b, size_t n) +{ + if (n > b->cap) { + b->cap = nextpow2(n); + if (!(b->p = realloc(b->p, b->cap))) + return nullptr; + } + return b; +} + +size_t +nextpow2(size_t x) +{ +#if defined(__has_builtin) && __has_builtin(__builtin_clzl) + x = x <= 1 ? 1 : 1 << (64 - __builtin_clzl(x - 1)); +#else + if (x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + if (sizeof(size_t) >= 4) + x |= x >> 16; + if (sizeof(size_t) >= 8) + x |= x >> 32; + } + x++; +#endif + + return x; +} diff --git a/lib/bob/u8strinit.c b/lib/bob/u8strinit.c new file mode 100644 index 0000000..60423c1 --- /dev/null +++ b/lib/bob/u8strinit.c @@ -0,0 +1,16 @@ +#include <stdlib.h> + +#include "bob.h" + +struct u8str * +u8strinit(struct u8str *b, size_t n) +{ + if (n) { + if (!(b->p = malloc(n))) + return nullptr; + } else + b->p = nullptr; + b->len = 0; + b->cap = n; + return b; +} diff --git a/lib/bob/u8strpushr.c b/lib/bob/u8strpushr.c new file mode 100644 index 0000000..6fe5fd9 --- /dev/null +++ b/lib/bob/u8strpushr.c @@ -0,0 +1,11 @@ +#include "bob.h" +#include "mbstring.h" + +struct u8str * +u8strpushr(struct u8str *b, rune ch) +{ + if (!u8strgrow(b, b->len + rtou8(nullptr, ch, 0))) + return nullptr; + b->len += rtou8(b->p + b->len, ch, b->cap - b->len); + return b; +} diff --git a/lib/bob/u8strpushstr.c b/lib/bob/u8strpushstr.c new file mode 100644 index 0000000..64b123d --- /dev/null +++ b/lib/bob/u8strpushstr.c @@ -0,0 +1,9 @@ +#include <string.h> + +#include "bob.h" + +struct u8str * +u8strpushstr(struct u8str *b, const char *s) +{ + return u8strpushu8(b, (struct u8view){.p = s, .len = strlen(s)}); +} diff --git a/lib/bob/u8strpushu8.c b/lib/bob/u8strpushu8.c new file mode 100644 index 0000000..8358e01 --- /dev/null +++ b/lib/bob/u8strpushu8.c @@ -0,0 +1,14 @@ +#include <string.h> + +#include "bob.h" +#include "mbstring.h" + +struct u8str * +u8strpushu8(struct u8str *b, struct u8view v) +{ + if (!u8strgrow(b, b->len + v.len)) + return nullptr; + memcpy(b->p + b->len, v.p, v.len); + b->len += v.len; + return b; +} diff --git a/lib/errors/cerr.c b/lib/errors/cerr.c new file mode 100644 index 0000000..bf5a98d --- /dev/null +++ b/lib/errors/cerr.c @@ -0,0 +1,14 @@ +#include <stdarg.h> +#include <stdlib.h> + +#include "errors.h" + +void +cerr(int code, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(code); +} diff --git a/lib/errors/cerrx.c b/lib/errors/cerrx.c new file mode 100644 index 0000000..b614f47 --- /dev/null +++ b/lib/errors/cerrx.c @@ -0,0 +1,14 @@ +#include <stdarg.h> +#include <stdlib.h> + +#include "errors.h" + +void +cerrx(int code, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + exit(code); +} diff --git a/lib/errors/err.c b/lib/errors/err.c new file mode 100644 index 0000000..906e661 --- /dev/null +++ b/lib/errors/err.c @@ -0,0 +1,14 @@ +#include <stdarg.h> +#include <stdlib.h> + +#include "errors.h" + +void +err(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} diff --git a/lib/errors/errx.c b/lib/errors/errx.c new file mode 100644 index 0000000..1fd55af --- /dev/null +++ b/lib/errors/errx.c @@ -0,0 +1,14 @@ +#include <stdarg.h> +#include <stdlib.h> + +#include "errors.h" + +void +errx(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} diff --git a/lib/errors/setprogname.c b/lib/errors/setprogname.c new file mode 100644 index 0000000..ab2de3e --- /dev/null +++ b/lib/errors/setprogname.c @@ -0,0 +1,12 @@ +#include <string.h> + +#include "errors.h" + +const char *__mlib_errors_progname; + +void +setprogname(const char *s) +{ + const char *p = strrchr(s, '/'); + __mlib_errors_progname = p ? p + 1 : s; +} diff --git a/lib/errors/vwarn.c b/lib/errors/vwarn.c new file mode 100644 index 0000000..bc6bd16 --- /dev/null +++ b/lib/errors/vwarn.c @@ -0,0 +1,15 @@ +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +#include "errors.h" + +void +vwarn(const char *fmt, va_list ap) +{ + int save = errno; + fprintf(stderr, "%s: ", progname()); + vfprintf(stderr, fmt, ap); + fprintf(stderr, ": %s\n", strerror(save)); +} diff --git a/lib/errors/vwarnx.c b/lib/errors/vwarnx.c new file mode 100644 index 0000000..72d8fa7 --- /dev/null +++ b/lib/errors/vwarnx.c @@ -0,0 +1,12 @@ +#include <stdarg.h> +#include <stdio.h> + +#include "errors.h" + +void +vwarnx(const char *fmt, va_list ap) +{ + fprintf(stderr, "%s: ", progname()); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); +} diff --git a/lib/errors/warn.c b/lib/errors/warn.c new file mode 100644 index 0000000..78f3e36 --- /dev/null +++ b/lib/errors/warn.c @@ -0,0 +1,12 @@ +#include <stdarg.h> + +#include "errors.h" + +void +warn(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); +} diff --git a/lib/errors/warnx.c b/lib/errors/warnx.c new file mode 100644 index 0000000..ea50299 --- /dev/null +++ b/lib/errors/warnx.c @@ -0,0 +1,12 @@ +#include <stdarg.h> + +#include "errors.h" + +void +warnx(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarnx(fmt, ap); + va_end(ap); +} diff --git a/lib/mbstring/rtou8.c b/lib/mbstring/rtou8.c new file mode 100644 index 0000000..0ddac89 --- /dev/null +++ b/lib/mbstring/rtou8.c @@ -0,0 +1,37 @@ +#include <stddef.h> + +#include "mbstring.h" +#include "rune.h" + +int +rtou8(char8_t *s, rune ch, size_t n) +{ + if (ch <= U8_1B_MAX) { + if (n >= 1) + s[0] = ch; + return 1; + } else if (ch <= U8_2B_MAX) { + if (n >= 2) { + s[0] = (ch >> 6) | 0xC0; + s[1] = (ch & 0x3F) | 0x80; + } + return 2; + } else if (ch <= U8_3B_MAX) { + if (n >= 3) { + s[0] = (ch >> 12) | 0xE0; + s[1] = ((ch >> 6) & 0x3F) | 0x80; + s[2] = (ch & 0x3F) | 0x80; + } + return 3; + } else if (ch <= U8_4B_MAX) { + if (n >= 4) { + s[0] = (ch >> 18) | 0xF0; + s[1] = ((ch >> 12) & 0x3F) | 0x80; + s[2] = ((ch >> 6) & 0x3F) | 0x80; + s[3] = (ch & 0x3F) | 0x80; + } + return 4; + } + + return rtou8(s, RUNE_ERROR, n); +} diff --git a/lib/mbstring/u8chk.c b/lib/mbstring/u8chk.c new file mode 100644 index 0000000..79ef5ee --- /dev/null +++ b/lib/mbstring/u8chk.c @@ -0,0 +1,19 @@ +#include "rune.h" +#define _RUNE_NO_MACRO_WRAPPER 1 +#include "mbstring.h" + +char8_t * +u8chk(const char8_t *s, size_t n) +{ + while (n) { + rune ch; + int m = u8tor(&ch, s); + + if (ch == RUNE_ERROR) + return (char8_t *)s; + n -= m; + s += m; + } + + return nullptr; +} diff --git a/lib/mbstring/u8chr.c b/lib/mbstring/u8chr.c new file mode 100644 index 0000000..008b7d7 --- /dev/null +++ b/lib/mbstring/u8chr.c @@ -0,0 +1,97 @@ +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#define _RUNE_NO_MACRO_WRAPPER 1 +#include "mbstring.h" + +/* NOTE: The memmem*() functions were taken directly from the memmem() + implementation on OpenBSD. As a result, these functions are licensed under + OpenBSDs 2-Clause BSD License instead of this libraries 0-Clause BSD License. + + The license for these functions is as follows: + + Copyright © 2005–2020 Rich Felker, et al. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + “Software”), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +static char8_t * +memmem2(const char8_t *h, size_t k, const char8_t *n) +{ + uint16_t hw, nw; + hw = h[0] << 8 | h[1]; + nw = n[0] << 8 | n[1]; + + for (h += 2, k -= 2; k; k--, hw = hw << 8 | *h++) { + if (hw == nw) + return (char8_t *)h - 2; + } + return hw == nw ? (char8_t *)h - 2 : nullptr; +} + +static char8_t * +memmem3(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + hw = h[0] << 24 | h[1] << 16 | h[2] << 8; + nw = n[0] << 24 | n[1] << 16 | n[2] << 8; + + for (h += 3, k -= 3; k; k--, hw = (hw | *h++) << 8) { + if (hw == nw) + return (char8_t *)h - 3; + } + return hw == nw ? (char8_t *)h - 3 : nullptr; +} + +static char8_t * +memmem4(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + hw = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]; + nw = n[0] << 24 | n[1] << 16 | n[2] << 8 | n[3]; + + for (h += 4, k -= 4; k; k--, hw = hw << 8 | *h++) { + if (hw == nw) + return (char8_t *)h - 4; + } + return hw == nw ? (char8_t *)h - 4 : nullptr; +} + +char8_t * +u8chr(const char8_t *s, rune ch, size_t n) +{ + char8_t buf[U8_LEN_MAX]; + int m = rtou8(buf, ch, sizeof(buf)); + + if (n < (size_t)m) + return nullptr; + switch (m) { + case 1: + return memchr(s, ch, n); + case 2: + return memmem2(s, n, buf); + case 3: + return memmem3(s, n, buf); + case 4: + return memmem4(s, n, buf); + } + + unreachable(); +} diff --git a/lib/mbstring/u8cmp.c b/lib/mbstring/u8cmp.c new file mode 100644 index 0000000..732896c --- /dev/null +++ b/lib/mbstring/u8cmp.c @@ -0,0 +1,11 @@ +#include <string.h> + +#include "mbstring.h" + +int +u8cmp(struct u8view x, struct u8view y) +{ + if (x.len != y.len) + return x.len > y.len ? +1 : -1; + return memcmp(x.p, y.p, x.len); +} diff --git a/lib/mbstring/u8cspn.c b/lib/mbstring/u8cspn.c new file mode 100644 index 0000000..4892de4 --- /dev/null +++ b/lib/mbstring/u8cspn.c @@ -0,0 +1,18 @@ +#include "mbstring.h" + +size_t +u8cspn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k, l; + + for (k = 0; (l = u8next(&ch, &s, &n)); k += l) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) + goto found; + } + } + +found: + return k; +} diff --git a/lib/mbstring/u8len.c b/lib/mbstring/u8len.c new file mode 100644 index 0000000..217ab66 --- /dev/null +++ b/lib/mbstring/u8len.c @@ -0,0 +1,10 @@ +#include "mbstring.h" + +size_t +u8len(const char8_t *s, size_t n) +{ + size_t m = 0; + while (u8next(nullptr, &s, &n)) + m++; + return m; +} diff --git a/lib/mbstring/u8next.c b/lib/mbstring/u8next.c new file mode 100644 index 0000000..82d2ad7 --- /dev/null +++ b/lib/mbstring/u8next.c @@ -0,0 +1,16 @@ +#include "mbstring.h" + +int +u8next(rune *ch, const char8_t **s, size_t *n) +{ + rune _; + int m = 0; + + if (*n) { + m = u8tor(ch ? ch : &_, *s); + *n -= m; + *s += m; + } + + return m; +} diff --git a/lib/mbstring/u8prev.c b/lib/mbstring/u8prev.c new file mode 100644 index 0000000..507f7e1 --- /dev/null +++ b/lib/mbstring/u8prev.c @@ -0,0 +1,45 @@ +#include <stddef.h> + +#include "mbstring.h" +#include "rune.h" + +int +u8prev(rune *ch, const char8_t **p, const char8_t *start) +{ + int off; + const char8_t *s = *p; + ptrdiff_t d = s - start; + + if (d <= 0) { + return 0; + } else if (U8_BYTE_1(s[-1])) { + if (ch) + *ch = s[-1]; + off = 1; + } else if (d > 1 && U8_BYTE_C(s[-1]) && U8_BYTE_2(s[-2])) { + if (ch) + *ch = ((s[-2] & 0x1F) << 6) | (s[-1] & 0x3F); + off = 2; + } else if (d > 2 && U8_BYTE_C(s[-1]) && U8_BYTE_C(s[-2]) + && U8_BYTE_3(s[-3])) { + if (ch) { + *ch = + ((s[-3] & 0x0F) << 12) | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); + } + off = 3; + } else if (d > 3 && U8_BYTE_C(s[-1]) && U8_BYTE_C(s[-2]) && U8_BYTE_C(s[-3]) + && U8_BYTE_4(s[-4])) { + if (ch) { + *ch = ((s[-4] & 0x07) << 18) | ((s[-3] & 0x3F) << 12) + | ((s[-2] & 0x3F) << 6) | (s[-1] & 0x3F); + } + off = 4; + } else { + if (ch) + *ch = RUNE_ERROR; + off = 1; + } + + *p -= off; + return off; +} diff --git a/lib/mbstring/u8rchr.c b/lib/mbstring/u8rchr.c new file mode 100644 index 0000000..b92c323 --- /dev/null +++ b/lib/mbstring/u8rchr.c @@ -0,0 +1,87 @@ +#include <stddef.h> +#include <stdint.h> + +#define _RUNE_NO_MACRO_WRAPPER 1 +#include "mbstring.h" + +static char8_t * +memrchr1(const char8_t *s, size_t k, const char8_t *n) +{ + for (const char8_t *p = s + k - 1; k-- > 0; p--) { + if (*p == *n) + return (char8_t *)p; + } + return nullptr; +} + +static char8_t * +memrchr2(const char8_t *h, size_t k, const char8_t *n) +{ + uint16_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-1] << 8 | H[-0]; + nw = n[+0] << 8 | n[+1]; + + for (H -= 2, k -= 2; k; k--, hw = hw >> 8 | (*H-- << 8)) { + if (hw == nw) + return (char8_t *)H + 1; + } + + return hw == nw ? (char8_t *)H + 1 : nullptr; +} + +static char8_t * +memrchr3(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-2] << 24 | H[-1] << 16 | H[-0] << 8; + nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8; + + for (H -= 3, k -= 3; k; + k--, hw = (hw >> 8 | (*H-- << 24)) & UINT32_C(0xFFFFFF00)) + { + if (hw == nw) + return (char8_t *)H + 1; + } + + return hw == nw ? (char8_t *)H + 1 : nullptr; +} + +static char8_t * +memrchr4(const char8_t *h, size_t k, const char8_t *n) +{ + uint32_t hw, nw; + const char8_t *H = h + k - 1; + hw = H[-3] << 24 | H[-2] << 16 | H[-1] << 8 | H[-0]; + nw = n[+0] << 24 | n[+1] << 16 | n[+2] << 8 | n[+3]; + + for (H -= 4, k -= 4; k; k--, hw = hw >> 8 | (*H-- << 24)) { + if (hw == nw) + return (char8_t *)H + 1; + } + + return hw == nw ? (char8_t *)H + 1 : nullptr; +} + +char8_t * +u8rchr(const char8_t *s, rune ch, size_t n) +{ + char8_t buf[U8_LEN_MAX]; + int m = rtou8(buf, ch, sizeof(buf)); + + if (n < (size_t)m) + return nullptr; + switch (m) { + case 1: + return (char8_t *)memrchr1(s, n, buf); + case 2: + return (char8_t *)memrchr2(s, n, buf); + case 3: + return (char8_t *)memrchr3(s, n, buf); + case 4: + return (char8_t *)memrchr4(s, n, buf); + } + + unreachable(); +} diff --git a/lib/mbstring/u8spn.c b/lib/mbstring/u8spn.c new file mode 100644 index 0000000..1cf45f2 --- /dev/null +++ b/lib/mbstring/u8spn.c @@ -0,0 +1,22 @@ +#include "mbstring.h" + +size_t +u8spn(const char8_t *s, size_t n, const rune *p, size_t m) +{ + rune ch; + size_t k = 0, l; + + while ((l = u8next(&ch, &s, &n))) { + for (size_t i = 0; i < m; i++) { + if (p[i] == ch) { + k += l; + goto found; + } + } + + break; +found:; + } + + return k; +} diff --git a/lib/mbstring/u8tor.c b/lib/mbstring/u8tor.c new file mode 100644 index 0000000..85c4c19 --- /dev/null +++ b/lib/mbstring/u8tor.c @@ -0,0 +1,25 @@ +#include "mbstring.h" +#include "rune.h" + +int +u8tor(rune *ch, const char8_t *s) +{ + if (U8_BYTE_1(s[0])) { + *ch = s[0]; + return 1; + } else if (U8_BYTE_2(s[0]) && U8_BYTE_C(s[1])) { + *ch = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F); + return 2; + } else if (U8_BYTE_3(s[0]) && U8_BYTE_C(s[1]) && U8_BYTE_C(s[2])) { + *ch = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + return 3; + } else if (U8_BYTE_4(s[0]) && U8_BYTE_C(s[1]) && U8_BYTE_C(s[2]) + && U8_BYTE_C(s[3])) { + *ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) + | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); + return 4; + } + + *ch = RUNE_ERROR; + return 1; +} |