aboutsummaryrefslogtreecommitdiff
path: root/grab.c
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2023-12-09 20:14:08 +0100
committerThomas Voss <mail@thomasvoss.com> 2023-12-09 20:18:01 +0100
commite8e03e7eda2c6d5fc9e0e5ea13bd46beb4417798 (patch)
tree44d785912a09ebd3151bc285a9ce983857c3e216 /grab.c
Genesis commit
Diffstat (limited to 'grab.c')
-rw-r--r--grab.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/grab.c b/grab.c
new file mode 100644
index 0000000..9fdfbd0
--- /dev/null
+++ b/grab.c
@@ -0,0 +1,274 @@
+#include <assert.h>
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "da.h"
+
+#ifndef REG_STARTEND
+# error "REG_STARTEND not defined"
+#endif
+
+#define die(...) err(EXIT_FAILURE, __VA_ARGS__);
+#define diex(...) errx(EXIT_FAILURE, __VA_ARGS__);
+
+#define EEARLY "Input string terminated prematurely"
+
+struct op {
+ char c;
+ regex_t pat;
+};
+
+struct ops {
+ struct op *buf;
+ size_t len, cap;
+};
+
+struct chars {
+ char *buf;
+ size_t len, cap;
+};
+
+struct sv {
+ char *p;
+ size_t len;
+};
+
+typedef unsigned char uchar;
+typedef void (*cmd_func)(struct sv, struct ops, size_t);
+
+static void cmdg(struct sv, struct ops, size_t);
+static void cmdx(struct sv, struct ops, size_t);
+
+static void grab(struct ops, FILE *, const char *);
+static void putsv(struct sv);
+static regex_t mkregex(char *, size_t);
+static struct ops comppat(char *);
+
+static bool xisspace(char);
+static char *xstrchrnul(const char *, char);
+
+static int rv = EXIT_SUCCESS;
+static const char *delim = "\n";
+static const cmd_func op_table[UCHAR_MAX] = {
+ ['g'] = cmdg,
+ ['v'] = cmdg,
+ ['x'] = cmdx,
+ // ['y'] = cmdy,
+};
+
+static void
+usage(const char *s)
+{
+ fprintf(stderr, "Usage: %s [-d string] pattern [file ...]\n", s);
+ exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char **argv)
+{
+ int rv, opt;
+ struct ops ops;
+
+ if (argc < 2)
+ usage(argv[0]);
+
+ setlocale(LC_ALL, "");
+
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ delim = optarg;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ ops = comppat(argv[0]);
+ if (argc == 1)
+ grab(ops, stdin, "-");
+ else {
+ for (int i = 1; i < argc; i++) {
+ FILE *fp;
+
+ if (strcmp(argv[i], "-") == 0) {
+ grab(ops, stdin, "-");
+ } else if ((fp = fopen(argv[i], "r")) == NULL) {
+ warn("fopen: %s", argv[i]);
+ rv = EXIT_FAILURE;
+ } else {
+ grab(ops, fp, argv[i]);
+ fclose(fp);
+ }
+ }
+ }
+
+#ifdef GRAB_DEBUG
+ for (size_t i = 0; i < ops.len; i++)
+ regfree(&ops.buf[i].pat);
+ free(ops.buf);
+#endif
+
+ return rv;
+}
+
+struct ops
+comppat(char *s)
+{
+#define skip_ws(p) for (; *(p) && xisspace(*(p)); (p)++)
+ struct ops ops;
+
+ da_init(&ops, 8);
+ skip_ws(s);
+ if (!*s)
+ diex(EEARLY);
+
+ do {
+ char delim;
+ char *p;
+ struct op op;
+
+ op.c = *s;
+ if (!op_table[(uchar)op.c])
+ diex("Invalid operator ā€˜%cā€™", *s);
+ if (!(delim = *++s))
+ diex(EEARLY);
+
+ p = ++s;
+ s = xstrchrnul(s, delim);
+ op.pat = mkregex(p, s - p);
+ da_append(&ops, op);
+
+ if (*s)
+ s++;
+ skip_ws(s);
+ } while (*s && *(s + 1));
+
+ return ops;
+#undef skip_ws
+}
+
+void
+grab(struct ops ops, FILE *stream, const char *filename)
+{
+ size_t n;
+ struct chars chars = {0};
+
+ do {
+ static_assert(sizeof(char) == 1, "sizeof(char) != 1; wtf?");
+ chars.cap += BUFSIZ;
+ if ((chars.buf = realloc(chars.buf, chars.cap)) == NULL)
+ die("realloc");
+ chars.len += n = fread(chars.buf + chars.len, 1, BUFSIZ, stream);
+ } while (n == BUFSIZ);
+
+ if (ferror(stream)) {
+ warn("fread: %s", filename);
+ rv = EXIT_FAILURE;
+ } else
+ cmdx((struct sv){.p = chars.buf, .len = chars.len}, ops, 0);
+
+ free(chars.buf);
+}
+
+void
+cmdx(struct sv sv, struct ops ops, size_t i)
+{
+ regmatch_t pm = {
+ .rm_so = 0,
+ .rm_eo = sv.len,
+ };
+ struct op op = ops.buf[i];
+
+ do {
+ struct sv nsv;
+
+ if (regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND) == REG_NOMATCH)
+ break;
+ nsv = (struct sv){.p = sv.p + pm.rm_so, .len = pm.rm_eo - pm.rm_so};
+ if (i + 1 == ops.len)
+ putsv(nsv);
+ else
+ op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1);
+
+ if (pm.rm_so == pm.rm_eo)
+ pm.rm_eo++;
+ pm = (regmatch_t){
+ .rm_so = pm.rm_eo,
+ .rm_eo = sv.len,
+ };
+ } while (pm.rm_so < pm.rm_eo);
+}
+
+void
+cmdg(struct sv sv, struct ops ops, size_t i)
+{
+ int r;
+ regmatch_t pm = {
+ .rm_so = 0,
+ .rm_eo = sv.len,
+ };
+ struct op op = ops.buf[i];
+
+ r = regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND);
+ if ((r == REG_NOMATCH && op.c == 'g') || (r != REG_NOMATCH && op.c == 'v'))
+ return;
+
+ if (i + 1 == ops.len)
+ putsv(sv);
+ else
+ op_table[(uchar)ops.buf[i + 1].c](sv, ops, i + 1);
+}
+
+void
+putsv(struct sv sv)
+{
+ fwrite(sv.p, 1, sv.len, stdout);
+ fputs(delim, stdout);
+}
+
+regex_t
+mkregex(char *s, size_t n)
+{
+ char c = s[n];
+ int ret;
+ regex_t r;
+
+ s[n] = 0;
+ if ((ret = regcomp(&r, s, REG_EXTENDED | REG_NEWLINE)) != 0) {
+ char emsg[128];
+ regerror(ret, &r, emsg, sizeof(emsg));
+ diex("Failed to compile regex: %s", emsg);
+ }
+ s[n] = c;
+
+ return r;
+}
+
+bool
+xisspace(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n';
+}
+
+char *
+xstrchrnul(const char *s, char c)
+{
+ for (; *s; s++) {
+ if (*s == '\\')
+ s++;
+ else if (*s == c)
+ break;
+ }
+ return (char *)s;
+}