From 86eb99482e04b18d4890c737fc6155fde52f8d73 Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sat, 13 Jan 2024 01:08:57 +0100 Subject: Organize files into directories --- da.h | 64 -------- git-grab.1 | 1 - grab.1 | 189 ---------------------- grab.c | 492 --------------------------------------------------------- make.c | 6 +- man/git-grab.1 | 1 + man/grab.1 | 189 ++++++++++++++++++++++ src/da.h | 64 ++++++++ src/grab.c | 492 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 749 insertions(+), 749 deletions(-) delete mode 100644 da.h delete mode 100644 git-grab.1 delete mode 100644 grab.1 delete mode 100644 grab.c create mode 100644 man/git-grab.1 create mode 100644 man/grab.1 create mode 100644 src/da.h create mode 100644 src/grab.c diff --git a/da.h b/da.h deleted file mode 100644 index 1587ece..0000000 --- a/da.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Simple & stupid dynamic array single-header implementation. You can use the - * macros defined in this file with any structure that has the following fields: - * - * struct dyn_array { - * T *buf // Array of items - * N len // Length of array - * N cap // Capacity of array - * } - * - * The type ‘T’ is whatever type you want to store. The type ‘N’ is any numeric - * type, most likely ‘size_t’. - * - * You should include ‘err.h’ and ‘stdlib.h’ along with this file. If you want - * to use da_remove(), include ‘string.h’. The da_remove() macro also doesn’t - * bother with shrinking your array when the length is far lower than the - * capacity. If you care about that, do it yourself. - * - * - * Macro Overview - * ―――――――――――――― - * The argument ‘a’ to all of the below macros is a pointer to the dynamic array - * structure. - * - * da_init(a, n) Initialize the array with a capacity of ‘n’ items. - * da_append(a, x) Append the item ‘x’ to the array - * da_remove(a, x) Remove the item ‘x’ from the array - * da_remove_range(a, x, y) Remove the items between the range [x, y) - */ - -#ifndef MANGO_DA_H -#define MANGO_DA_H - -#define __da_s(a) (sizeof(*(a)->buf)) - -#define da_init(a, n) \ - do { \ - (a)->cap = n; \ - (a)->len = 0; \ - (a)->buf = malloc((a)->cap * __da_s(a)); \ - if ((a)->buf == NULL) \ - err(EXIT_FAILURE, "malloc"); \ - } while (0) - -#define da_append(a, x) \ - do { \ - if ((a)->len >= (a)->cap) { \ - (a)->cap = (a)->cap * 2 + 1; \ - (a)->buf = realloc((a)->buf, (a)->cap * __da_s(a)); \ - if ((a)->buf == NULL) \ - err(EXIT_FAILURE, "realloc"); \ - } \ - (a)->buf[(a)->len++] = (x); \ - } while (0) - -#define da_remove(a, i) da_remove_range((a), (i), (i) + 1) - -#define da_remove_range(a, i, j) \ - do { \ - memmove((a)->buf + (i), (a)->buf + (j), ((a)->len - (j)) * __da_s(a)); \ - (a)->len -= j - i; \ - } while (0) - -#endif /* !MANGO_DA_H */ diff --git a/git-grab.1 b/git-grab.1 deleted file mode 100644 index 64c35cc..0000000 --- a/git-grab.1 +++ /dev/null @@ -1 +0,0 @@ -.so grab.1 diff --git a/grab.1 b/grab.1 deleted file mode 100644 index 8bd2e9e..0000000 --- a/grab.1 +++ /dev/null @@ -1,189 +0,0 @@ -.Dd January 12 2024 -.Dt GRAB 1 -.Os -.Sh NAME -.Nm grab -.Nd search for patterns in files -.Sh SYNOPSIS -.Nm -.Op Fl fnz -.Ar pattern -.Op Ar -.Nm -.Fl h -.Pp -.Nm "git grab" -.Op Fl nz -.Ar pattern -.Op Ar glob ... -.Nm "git grab" -.Fl h -.Sh DESCRIPTION -The -.Nm -utility searches for text in files corresponding to -.Ar pattern -and prints the corresponding matches to the standard output. -Unlike the -.Xr grep 1 -utility, -.Nm -is not strictly line-oriented; -instead of always matching on complete lines, -the user defines the structure of the text they would like to match and -filters on the results. -For more details on the pattern syntax, see -.Sx Pattern Syntax . -.Pp -The -.Nm git-grab -utility is identical to the -.Nm -utility in all ways except for two exceptions. -The first is that if no files -.Pq globs in this case to be precise -are specified, -input is not read from the standard-input but instead all files returned -by an invocation of -.Xr git-ls-files 1 -are processed. -If the user provides one or more globs, -only the files returned by -.Xr git-ls-files 1 -that match one or more of the given globs will be processed. -Secondly, the -.Fl f -option is not available; -its behavior is always assumed and cannot be disabled. -.Pp -.Nm -will read from the files provided on the command-line. -If no files are provided, the standard input will be read instead. -The special filename -.Sq - -can also be provided, -which represents the standard input. -.Pp -The default behavior of -.Nm -is to print pattern matches to the standard-output. -If more than one file argument is provided, -matches will be prefixed by their respective filename and a colon. -Note that this behavior is modified by the -.Fl f -and -.Fl z -options. -.Pp -The options are as follows: -.Bl -tag -width Ds -.It Fl f , Fl Fl filenames -Always prefix matches with the names of the files in which the matches -were made, -even if only 1 file was provided. -.It Fl h , Fl Fl help -Display help information by opening this manual page. -.It Fl n , Fl Fl newline -Don’t match newline characters -.Pq Sq \en -with the dot -.Pq Sq \&. -operator in patterns, -or in negated character-classes. -.It Fl z , Fl Fl zero -Separate output data by null bytes -.Pq Sq \e0 -instead of newlines. -This option can be used to process matches containing newlines. -.Pp -If combined with the -.Fl f -option, -or if two or more files were provided as arguments, -filenames and matches will be separated by null bytes instead of colons. -.El -.Ss Pattern Syntax -A pattern is a sequences of commands optionally separated by whitespace. -A command is an operator followed by a delimiter, a regular expression, -and then terminated by the same delimiter. The last command of a pattern -need not have a terminating delimiter. -.Pp -The supported operators are as follows: -.Bl -tag -compact -.It g -Keep selections that match the given regex. -.It v -Discard selections that match the given regex. -.It x -Select everything that matches the given regex. -.It y -Select everything that doesn’t match the given regex. -.El -.Pp -An example pattern to match all numbers that contain a ‘3’ but aren’t -‘1337’ could be -.Sq x/[0-9]+/ g/3/ v/^1337$/ . -In that pattern, -.Sq x/[0-9]+/ -selects all numbers in the input, -.Sq g/3/ -keeps only those matches that contain the number 3, -and -.Sq v/^1337$/ -filters out the specific number 1337. -.Pp -As you may use whichever delimiter you like, the following is also valid: -.Pp -.Dl x|[0-9]+| g.3. v#^1337# -.Sh ENVIRONMENT -.Bl -tag -width GRAB_COLOR_FNAME -.It Ev GRAB_COLOR_FNAME -The color(s) with which to highlight filenames, -or 35 if unset. -.It Ev GRAB_COLOR_SEPC -The color(s) with which to highlight the colon-separator between -filenames and matches, -or 36 if unset. -.It Ev NO_COLOR -Do not display any colored output when set to a non-empty string, -even if the standard-output is a terminal. -.El -.Pp -The -.Ev GRAB_COLOR_* -environment variables are SGR parameters. -For more information regarding SGR parameters see -.Sx SEE ALSO . -.Sh EXIT STATUS -.Ex -std -.Sh EXAMPLES -List all your systems CPU flags, sorted and without duplicates: -.Pp -.Dl $ grab 'x/^flags.*/ x/\ew+/ v/flags/' | sort | uniq -.Pp -Search for a pattern in multiple files without printing filenames: -.Pp -.Dl $ cat file1 file2 file3 | grab 'x/pattern/' -.Pp -Search for usages of an -.Ql -Vue component — -but only those which are being passed a -.Ql placeholder -property — -searching all files in the current git-repository: -.Pp -.Dl $ git grab 'x/]+>/ g/\ebplaceholder\eb/' '*.vue' -.Sh SEE ALSO -.Xr git-ls-files 1 , -.Xr grep 1 -.Rs -.%A Rob Pike -.%D 1987 -.%T Structural Regular Expressions -.%U https://doc.cat-v.org/bell_labs/structural_regexps/se.pdf -.Re -.Pp -.Lk https://en.wikipedia.org/wiki/ANSI_escape_code#SGR "SGR Parameters" -.Sh AUTHORS -.An Thomas Voss Aq Mt mail@thomasvoss.com diff --git a/grab.c b/grab.c deleted file mode 100644 index 96b5d9f..0000000 --- a/grab.c +++ /dev/null @@ -1,492 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if !GRAB_IS_C23 -# include -# include -# define nullptr NULL -#endif - -#include "da.h" - -#ifndef REG_STARTEND -# error "REG_STARTEND not defined" -#endif - -#define die(...) err(EXIT_FAILURE, __VA_ARGS__); -#define diex(...) errx(EXIT_FAILURE, __VA_ARGS__); -#define warn(...) \ - do { \ - warn(__VA_ARGS__); \ - rv = EXIT_FAILURE; \ - } while (0) -#define streq(a, b) (!strcmp(a, b)) - -#define EEARLY "Input string terminated prematurely" - -struct op { - char c; - regex_t pat; -}; - -struct ops { - struct op *buf; - size_t len, cap; -}; - -struct chars { - char *buf; - size_t len, cap; -}; - -struct sv { - char *p; - size_t len; -}; - -typedef unsigned char uchar; -typedef void (*cmd_func)(struct sv, struct ops, size_t, const char *); - -static void cmdg(struct sv, struct ops, size_t, const char *); -static void cmdx(struct sv, struct ops, size_t, const char *); -static void cmdy(struct sv, struct ops, size_t, const char *); - -static void grab(struct ops, FILE *, const char *); -static void putm(struct sv, const char *); -static regex_t mkregex(char *, size_t); -static struct ops comppat(char *); -static char *env_or_default(const char *, const char *); -#if GIT_GRAB -static FILE *getfstream(int n, char *v[n]); -#endif - -static bool xisspace(char); -static char *xstrchrnul(const char *, char); - -static int filecnt, rv; -static bool color, nflag, zflag; -static bool fflag = -#if GIT_GRAB - true; -#else - false; -#endif - -static const cmd_func op_table[UCHAR_MAX] = { - ['g'] = cmdg, - ['v'] = cmdg, - ['x'] = cmdx, - ['y'] = cmdy, -}; - -static const char esc_table[UCHAR_MAX] = { - ['\\'] = '\\', ['a'] = '\a', ['b'] = '\b', ['f'] = '\f', - ['n'] = '\n', ['r'] = '\r', ['t'] = '\t', ['v'] = '\v', -}; - -static void -usage(const char *s) -{ - fprintf(stderr, -#if GIT_GRAB - "Usage: %s [-nz] pattern [glob ...]\n" -#else - "Usage: %s [-fnz] pattern [file ...]\n" -#endif - " %s -h\n", - s, s); - exit(EXIT_FAILURE); -} - -int -main(int argc, char **argv) -{ - int opt; - struct ops ops; - struct option longopts[] = { - {"filenames", no_argument, 0, 'f'}, - {"help", no_argument, 0, 'h'}, - {"newline", no_argument, 0, 'n'}, - {"zero", no_argument, 0, 'z'}, - }; - -#if GIT_GRAB - char *entry = NULL; - size_t len; - ssize_t nr; - FILE *flist; - const char *opts = "hnz"; -#else - const char *opts = "fhnz"; -#endif - - argv[0] = basename(argv[0]); - if (argc < 2) - usage(argv[0]); - - setlocale(LC_ALL, ""); - - while ((opt = getopt_long(argc, argv, opts, longopts, nullptr)) != -1) { - switch (opt) { -#if !GIT_GRAB - case 'f': - fflag = true; - break; -#endif - case 'h': - execlp("man", "man", "1", argv[0], nullptr); - die("execlp: man 1 %s", argv[0]); - case 'n': - nflag = true; - break; - case 'z': - zflag = true; - break; - default: - usage(argv[0]); - } - } - - argc -= optind; - argv += optind; - filecnt = argc - 1; - - if (isatty(STDOUT_FILENO) == 1 && !env_or_default("NO_COLOR", nullptr)) - color = !streq(env_or_default("TERM", ""), "dumb"); - - ops = comppat(argv[0]); - -#if GIT_GRAB - if ((flist = getfstream(argc - 1, argv + 1)) == nullptr) - die("getfstream"); - while ((nr = getdelim(&entry, &len, '\0', flist)) > 0) { - FILE *fp; - - if ((fp = fopen(entry, "r")) == nullptr) - warn("fopen: %s", entry); - else { - grab(ops, fp, entry); - fclose(fp); - } - } - if (ferror(flist)) - warn("getdelim"); -#else - if (argc == 1) - grab(ops, stdin, "-"); - else { - for (int i = 1; i < argc; i++) { - FILE *fp; - - if (streq(argv[i], "-")) { - grab(ops, stdin, "-"); - } else if ((fp = fopen(argv[i], "r")) == nullptr) { - warn("fopen: %s", argv[i]); - } else { - grab(ops, fp, argv[i]); - fclose(fp); - } - } - } -#endif - -#ifdef GRAB_DEBUG - for (size_t i = 0; i < ops.len; i++) - regfree(&ops.buf[i].pat); - free(ops.buf); -#endif - - return rv; -} - -struct ops -comppat(char *s) -{ -#define skip_ws(p) for (; *(p) && xisspace(*(p)); (p)++) - struct ops ops; - - da_init(&ops, 8); - skip_ws(s); - if (!*s) - diex(EEARLY); - - do { - char delim; - char *p; - struct op op; - - op.c = *s; - if (!op_table[(uchar)op.c]) - diex("Invalid operator ‘%c’", *s); - if (!(delim = *++s)) - diex(EEARLY); - - p = ++s; - s = xstrchrnul(s, delim); - op.pat = mkregex(p, s - p); - da_append(&ops, op); - - if (*s) - s++; - skip_ws(s); - } while (*s && *(s + 1)); - - return ops; -#undef skip_ws -} - -void -grab(struct ops ops, FILE *stream, const char *filename) -{ - size_t n; - struct chars chars = {0}; - - do { - static_assert(sizeof(char) == 1, "sizeof(char) != 1; wtf?"); - chars.cap += BUFSIZ; - if ((chars.buf = realloc(chars.buf, chars.cap)) == nullptr) - die("realloc"); - chars.len += n = fread(chars.buf + chars.len, 1, BUFSIZ, stream); - } while (n == BUFSIZ); - - if (ferror(stream)) - warn("fread: %s", filename); - else { - struct sv sv = { - .p = chars.buf, - .len = chars.len, - }; - op_table[(uchar)ops.buf[0].c](sv, ops, 0, filename); - } - - free(chars.buf); -} - -void -cmdg(struct sv sv, struct ops ops, size_t i, const char *filename) -{ - int r; - regmatch_t pm = { - .rm_so = 0, - .rm_eo = sv.len, - }; - struct op op = ops.buf[i]; - - r = regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND); - if ((r == REG_NOMATCH && op.c == 'g') || (r != REG_NOMATCH && op.c == 'v')) - return; - - if (i + 1 == ops.len) - putm(sv, filename); - else - op_table[(uchar)ops.buf[i + 1].c](sv, ops, i + 1, filename); -} - -void -cmdx(struct sv sv, struct ops ops, size_t i, const char *filename) -{ - regmatch_t pm = { - .rm_so = 0, - .rm_eo = sv.len, - }; - struct op op = ops.buf[i]; - - do { - struct sv nsv; - - if (regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND) == REG_NOMATCH) - break; - nsv = (struct sv){.p = sv.p + pm.rm_so, .len = pm.rm_eo - pm.rm_so}; - if (i + 1 == ops.len) - putm(nsv, filename); - else - op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); - - if (pm.rm_so == pm.rm_eo) - pm.rm_eo++; - pm = (regmatch_t){ - .rm_so = pm.rm_eo, - .rm_eo = sv.len, - }; - } while (pm.rm_so < pm.rm_eo); -} - -void -cmdy(struct sv sv, struct ops ops, size_t i, const char *filename) -{ - regmatch_t pm = { - .rm_so = 0, - .rm_eo = sv.len, - }; - regmatch_t prev = { - .rm_so = 0, - .rm_eo = 0, - }; - struct op op = ops.buf[i]; - - do { - struct sv nsv; - - if (regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND) == REG_NOMATCH) - break; - - if (prev.rm_so || prev.rm_eo || pm.rm_so) { - nsv = (struct sv){ - .p = sv.p + prev.rm_eo, - .len = pm.rm_so - prev.rm_eo, - }; - if (i + 1 == ops.len) - putm(nsv, filename); - else - op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); - } - - prev = pm; - if (pm.rm_so == pm.rm_eo) - pm.rm_eo++; - pm = (regmatch_t){ - .rm_so = pm.rm_eo, - .rm_eo = sv.len, - }; - } while (pm.rm_so < pm.rm_eo); - - if (prev.rm_eo < pm.rm_eo) { - struct sv nsv = { - .p = sv.p + pm.rm_so, - .len = pm.rm_eo - pm.rm_so, - }; - if (i + 1 == ops.len) - putm(nsv, filename); - else - op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); - } -} - -void -putm(struct sv sv, const char *filename) -{ - static const char *fnc, *sepc; - - if (!fnc) { - fnc = env_or_default("GRAB_COLOR_FNAME", "35"); - sepc = env_or_default("GRAB_COLOR_SEP", "36"); - } - - if (fflag || filecnt > 1) { - if (color) { - printf("\33[%sm%s\33[%sm%c\33[0m", fnc, filename, sepc, - zflag ? '\0' : ':'); - } else - printf("%s%c", filename, zflag ? '\0' : ':'); - } - fwrite(sv.p, 1, sv.len, stdout); - putchar(zflag ? '\0' : '\n'); -} - -regex_t -mkregex(char *s, size_t n) -{ - char c = s[n]; - int ret, cflags; - regex_t r; - - for (size_t i = 0; i < n - 1; i++) { - if (s[i] == '\\') { - char c = esc_table[(uchar)s[i + 1]]; - if (c) { - for (size_t j = i; j < n - 1; j++) - s[j] = s[j + 1]; - s[i] = c; - n--; - } - } - } - - s[n] = 0; - cflags = REG_EXTENDED; - if (nflag) - cflags |= REG_NEWLINE; - if ((ret = regcomp(&r, s, cflags)) != 0) { - char emsg[128]; - regerror(ret, &r, emsg, sizeof(emsg)); - diex("Failed to compile regex: %s", emsg); - } - s[n] = c; - - return r; -} - -#if GIT_GRAB -FILE * -getfstream(int argc, char *argv[argc]) -{ - pid_t pid; - int fds[2]; - enum { - FD_R, - FD_W, - }; - - if (pipe(fds) == -1) - die("pipe"); - - switch (pid = fork()) { - case -1: - die("fork"); - case 0:; - size_t len = argc + 5; - char **args; - - close(fds[FD_R]); - if (dup2(fds[FD_W], STDOUT_FILENO) == -1) - die("dup2"); - - if (!(args = malloc(len * sizeof(char *)))) - die("malloc"); - args[0] = "git"; - args[1] = "ls-files"; - args[2] = "-z"; - args[3] = "--"; - memcpy(args + 4, argv, argc * sizeof(char *)); - args[len - 1] = nullptr; - - execvp("git", args); - die("execvp: git ls-files -z"); - } - - close(fds[FD_W]); - return fdopen(fds[FD_R], "r"); -} -#endif - -char * -env_or_default(const char *e, const char *d) -{ - const char *s = getenv(e); - return (char *)(s && *s ? s : d); -} - -bool -xisspace(char c) -{ - return c == ' ' || c == '\t' || c == '\n'; -} - -char * -xstrchrnul(const char *s, char c) -{ - for (; *s; s++) { - if (*s == '\\') - s++; - else if (*s == c) - break; - } - return (char *)s; -} diff --git a/make.c b/make.c index d744ad7..95a6f54 100644 --- a/make.c +++ b/make.c @@ -72,11 +72,11 @@ main(int argc, char **argv) cmdprc(c); cmdadd(&c, "cp", "grab", "git-grab", bin); cmdprc(c); - cmdadd(&c, "cp", "grab.1", "git-grab.1", man); + cmdadd(&c, "cp", "man/grab.1", "man/git-grab.1", man); cmdprc(c); } } else { - if (foutdated("grab", "grab.c", "da.h")) { + if (foutdated("grab", "src/grab.c", "src/da.h")) { for (int i = 0; i < 2; i++) { char buf[] = "-DGIT_GRAB=X"; buf[sizeof(buf) - 2] = i + '0'; @@ -90,7 +90,7 @@ main(int argc, char **argv) cmdadd(&c, CFLAGS_DEBUG); else cmdadd(&c, CFLAGS_RELEASE); - cmdadd(&c, "-o", i == 0 ? "grab" : "git-grab", "grab.c"); + cmdadd(&c, "-o", i == 0 ? "grab" : "git-grab", "src/grab.c"); cmdprc(c); } } diff --git a/man/git-grab.1 b/man/git-grab.1 new file mode 100644 index 0000000..64c35cc --- /dev/null +++ b/man/git-grab.1 @@ -0,0 +1 @@ +.so grab.1 diff --git a/man/grab.1 b/man/grab.1 new file mode 100644 index 0000000..8bd2e9e --- /dev/null +++ b/man/grab.1 @@ -0,0 +1,189 @@ +.Dd January 12 2024 +.Dt GRAB 1 +.Os +.Sh NAME +.Nm grab +.Nd search for patterns in files +.Sh SYNOPSIS +.Nm +.Op Fl fnz +.Ar pattern +.Op Ar +.Nm +.Fl h +.Pp +.Nm "git grab" +.Op Fl nz +.Ar pattern +.Op Ar glob ... +.Nm "git grab" +.Fl h +.Sh DESCRIPTION +The +.Nm +utility searches for text in files corresponding to +.Ar pattern +and prints the corresponding matches to the standard output. +Unlike the +.Xr grep 1 +utility, +.Nm +is not strictly line-oriented; +instead of always matching on complete lines, +the user defines the structure of the text they would like to match and +filters on the results. +For more details on the pattern syntax, see +.Sx Pattern Syntax . +.Pp +The +.Nm git-grab +utility is identical to the +.Nm +utility in all ways except for two exceptions. +The first is that if no files +.Pq globs in this case to be precise +are specified, +input is not read from the standard-input but instead all files returned +by an invocation of +.Xr git-ls-files 1 +are processed. +If the user provides one or more globs, +only the files returned by +.Xr git-ls-files 1 +that match one or more of the given globs will be processed. +Secondly, the +.Fl f +option is not available; +its behavior is always assumed and cannot be disabled. +.Pp +.Nm +will read from the files provided on the command-line. +If no files are provided, the standard input will be read instead. +The special filename +.Sq - +can also be provided, +which represents the standard input. +.Pp +The default behavior of +.Nm +is to print pattern matches to the standard-output. +If more than one file argument is provided, +matches will be prefixed by their respective filename and a colon. +Note that this behavior is modified by the +.Fl f +and +.Fl z +options. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl f , Fl Fl filenames +Always prefix matches with the names of the files in which the matches +were made, +even if only 1 file was provided. +.It Fl h , Fl Fl help +Display help information by opening this manual page. +.It Fl n , Fl Fl newline +Don’t match newline characters +.Pq Sq \en +with the dot +.Pq Sq \&. +operator in patterns, +or in negated character-classes. +.It Fl z , Fl Fl zero +Separate output data by null bytes +.Pq Sq \e0 +instead of newlines. +This option can be used to process matches containing newlines. +.Pp +If combined with the +.Fl f +option, +or if two or more files were provided as arguments, +filenames and matches will be separated by null bytes instead of colons. +.El +.Ss Pattern Syntax +A pattern is a sequences of commands optionally separated by whitespace. +A command is an operator followed by a delimiter, a regular expression, +and then terminated by the same delimiter. The last command of a pattern +need not have a terminating delimiter. +.Pp +The supported operators are as follows: +.Bl -tag -compact +.It g +Keep selections that match the given regex. +.It v +Discard selections that match the given regex. +.It x +Select everything that matches the given regex. +.It y +Select everything that doesn’t match the given regex. +.El +.Pp +An example pattern to match all numbers that contain a ‘3’ but aren’t +‘1337’ could be +.Sq x/[0-9]+/ g/3/ v/^1337$/ . +In that pattern, +.Sq x/[0-9]+/ +selects all numbers in the input, +.Sq g/3/ +keeps only those matches that contain the number 3, +and +.Sq v/^1337$/ +filters out the specific number 1337. +.Pp +As you may use whichever delimiter you like, the following is also valid: +.Pp +.Dl x|[0-9]+| g.3. v#^1337# +.Sh ENVIRONMENT +.Bl -tag -width GRAB_COLOR_FNAME +.It Ev GRAB_COLOR_FNAME +The color(s) with which to highlight filenames, +or 35 if unset. +.It Ev GRAB_COLOR_SEPC +The color(s) with which to highlight the colon-separator between +filenames and matches, +or 36 if unset. +.It Ev NO_COLOR +Do not display any colored output when set to a non-empty string, +even if the standard-output is a terminal. +.El +.Pp +The +.Ev GRAB_COLOR_* +environment variables are SGR parameters. +For more information regarding SGR parameters see +.Sx SEE ALSO . +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +List all your systems CPU flags, sorted and without duplicates: +.Pp +.Dl $ grab 'x/^flags.*/ x/\ew+/ v/flags/' | sort | uniq +.Pp +Search for a pattern in multiple files without printing filenames: +.Pp +.Dl $ cat file1 file2 file3 | grab 'x/pattern/' +.Pp +Search for usages of an +.Ql +Vue component — +but only those which are being passed a +.Ql placeholder +property — +searching all files in the current git-repository: +.Pp +.Dl $ git grab 'x/]+>/ g/\ebplaceholder\eb/' '*.vue' +.Sh SEE ALSO +.Xr git-ls-files 1 , +.Xr grep 1 +.Rs +.%A Rob Pike +.%D 1987 +.%T Structural Regular Expressions +.%U https://doc.cat-v.org/bell_labs/structural_regexps/se.pdf +.Re +.Pp +.Lk https://en.wikipedia.org/wiki/ANSI_escape_code#SGR "SGR Parameters" +.Sh AUTHORS +.An Thomas Voss Aq Mt mail@thomasvoss.com diff --git a/src/da.h b/src/da.h new file mode 100644 index 0000000..1587ece --- /dev/null +++ b/src/da.h @@ -0,0 +1,64 @@ +/* + * Simple & stupid dynamic array single-header implementation. You can use the + * macros defined in this file with any structure that has the following fields: + * + * struct dyn_array { + * T *buf // Array of items + * N len // Length of array + * N cap // Capacity of array + * } + * + * The type ‘T’ is whatever type you want to store. The type ‘N’ is any numeric + * type, most likely ‘size_t’. + * + * You should include ‘err.h’ and ‘stdlib.h’ along with this file. If you want + * to use da_remove(), include ‘string.h’. The da_remove() macro also doesn’t + * bother with shrinking your array when the length is far lower than the + * capacity. If you care about that, do it yourself. + * + * + * Macro Overview + * ―――――――――――――― + * The argument ‘a’ to all of the below macros is a pointer to the dynamic array + * structure. + * + * da_init(a, n) Initialize the array with a capacity of ‘n’ items. + * da_append(a, x) Append the item ‘x’ to the array + * da_remove(a, x) Remove the item ‘x’ from the array + * da_remove_range(a, x, y) Remove the items between the range [x, y) + */ + +#ifndef MANGO_DA_H +#define MANGO_DA_H + +#define __da_s(a) (sizeof(*(a)->buf)) + +#define da_init(a, n) \ + do { \ + (a)->cap = n; \ + (a)->len = 0; \ + (a)->buf = malloc((a)->cap * __da_s(a)); \ + if ((a)->buf == NULL) \ + err(EXIT_FAILURE, "malloc"); \ + } while (0) + +#define da_append(a, x) \ + do { \ + if ((a)->len >= (a)->cap) { \ + (a)->cap = (a)->cap * 2 + 1; \ + (a)->buf = realloc((a)->buf, (a)->cap * __da_s(a)); \ + if ((a)->buf == NULL) \ + err(EXIT_FAILURE, "realloc"); \ + } \ + (a)->buf[(a)->len++] = (x); \ + } while (0) + +#define da_remove(a, i) da_remove_range((a), (i), (i) + 1) + +#define da_remove_range(a, i, j) \ + do { \ + memmove((a)->buf + (i), (a)->buf + (j), ((a)->len - (j)) * __da_s(a)); \ + (a)->len -= j - i; \ + } while (0) + +#endif /* !MANGO_DA_H */ diff --git a/src/grab.c b/src/grab.c new file mode 100644 index 0000000..96b5d9f --- /dev/null +++ b/src/grab.c @@ -0,0 +1,492 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !GRAB_IS_C23 +# include +# include +# define nullptr NULL +#endif + +#include "da.h" + +#ifndef REG_STARTEND +# error "REG_STARTEND not defined" +#endif + +#define die(...) err(EXIT_FAILURE, __VA_ARGS__); +#define diex(...) errx(EXIT_FAILURE, __VA_ARGS__); +#define warn(...) \ + do { \ + warn(__VA_ARGS__); \ + rv = EXIT_FAILURE; \ + } while (0) +#define streq(a, b) (!strcmp(a, b)) + +#define EEARLY "Input string terminated prematurely" + +struct op { + char c; + regex_t pat; +}; + +struct ops { + struct op *buf; + size_t len, cap; +}; + +struct chars { + char *buf; + size_t len, cap; +}; + +struct sv { + char *p; + size_t len; +}; + +typedef unsigned char uchar; +typedef void (*cmd_func)(struct sv, struct ops, size_t, const char *); + +static void cmdg(struct sv, struct ops, size_t, const char *); +static void cmdx(struct sv, struct ops, size_t, const char *); +static void cmdy(struct sv, struct ops, size_t, const char *); + +static void grab(struct ops, FILE *, const char *); +static void putm(struct sv, const char *); +static regex_t mkregex(char *, size_t); +static struct ops comppat(char *); +static char *env_or_default(const char *, const char *); +#if GIT_GRAB +static FILE *getfstream(int n, char *v[n]); +#endif + +static bool xisspace(char); +static char *xstrchrnul(const char *, char); + +static int filecnt, rv; +static bool color, nflag, zflag; +static bool fflag = +#if GIT_GRAB + true; +#else + false; +#endif + +static const cmd_func op_table[UCHAR_MAX] = { + ['g'] = cmdg, + ['v'] = cmdg, + ['x'] = cmdx, + ['y'] = cmdy, +}; + +static const char esc_table[UCHAR_MAX] = { + ['\\'] = '\\', ['a'] = '\a', ['b'] = '\b', ['f'] = '\f', + ['n'] = '\n', ['r'] = '\r', ['t'] = '\t', ['v'] = '\v', +}; + +static void +usage(const char *s) +{ + fprintf(stderr, +#if GIT_GRAB + "Usage: %s [-nz] pattern [glob ...]\n" +#else + "Usage: %s [-fnz] pattern [file ...]\n" +#endif + " %s -h\n", + s, s); + exit(EXIT_FAILURE); +} + +int +main(int argc, char **argv) +{ + int opt; + struct ops ops; + struct option longopts[] = { + {"filenames", no_argument, 0, 'f'}, + {"help", no_argument, 0, 'h'}, + {"newline", no_argument, 0, 'n'}, + {"zero", no_argument, 0, 'z'}, + }; + +#if GIT_GRAB + char *entry = NULL; + size_t len; + ssize_t nr; + FILE *flist; + const char *opts = "hnz"; +#else + const char *opts = "fhnz"; +#endif + + argv[0] = basename(argv[0]); + if (argc < 2) + usage(argv[0]); + + setlocale(LC_ALL, ""); + + while ((opt = getopt_long(argc, argv, opts, longopts, nullptr)) != -1) { + switch (opt) { +#if !GIT_GRAB + case 'f': + fflag = true; + break; +#endif + case 'h': + execlp("man", "man", "1", argv[0], nullptr); + die("execlp: man 1 %s", argv[0]); + case 'n': + nflag = true; + break; + case 'z': + zflag = true; + break; + default: + usage(argv[0]); + } + } + + argc -= optind; + argv += optind; + filecnt = argc - 1; + + if (isatty(STDOUT_FILENO) == 1 && !env_or_default("NO_COLOR", nullptr)) + color = !streq(env_or_default("TERM", ""), "dumb"); + + ops = comppat(argv[0]); + +#if GIT_GRAB + if ((flist = getfstream(argc - 1, argv + 1)) == nullptr) + die("getfstream"); + while ((nr = getdelim(&entry, &len, '\0', flist)) > 0) { + FILE *fp; + + if ((fp = fopen(entry, "r")) == nullptr) + warn("fopen: %s", entry); + else { + grab(ops, fp, entry); + fclose(fp); + } + } + if (ferror(flist)) + warn("getdelim"); +#else + if (argc == 1) + grab(ops, stdin, "-"); + else { + for (int i = 1; i < argc; i++) { + FILE *fp; + + if (streq(argv[i], "-")) { + grab(ops, stdin, "-"); + } else if ((fp = fopen(argv[i], "r")) == nullptr) { + warn("fopen: %s", argv[i]); + } else { + grab(ops, fp, argv[i]); + fclose(fp); + } + } + } +#endif + +#ifdef GRAB_DEBUG + for (size_t i = 0; i < ops.len; i++) + regfree(&ops.buf[i].pat); + free(ops.buf); +#endif + + return rv; +} + +struct ops +comppat(char *s) +{ +#define skip_ws(p) for (; *(p) && xisspace(*(p)); (p)++) + struct ops ops; + + da_init(&ops, 8); + skip_ws(s); + if (!*s) + diex(EEARLY); + + do { + char delim; + char *p; + struct op op; + + op.c = *s; + if (!op_table[(uchar)op.c]) + diex("Invalid operator ‘%c’", *s); + if (!(delim = *++s)) + diex(EEARLY); + + p = ++s; + s = xstrchrnul(s, delim); + op.pat = mkregex(p, s - p); + da_append(&ops, op); + + if (*s) + s++; + skip_ws(s); + } while (*s && *(s + 1)); + + return ops; +#undef skip_ws +} + +void +grab(struct ops ops, FILE *stream, const char *filename) +{ + size_t n; + struct chars chars = {0}; + + do { + static_assert(sizeof(char) == 1, "sizeof(char) != 1; wtf?"); + chars.cap += BUFSIZ; + if ((chars.buf = realloc(chars.buf, chars.cap)) == nullptr) + die("realloc"); + chars.len += n = fread(chars.buf + chars.len, 1, BUFSIZ, stream); + } while (n == BUFSIZ); + + if (ferror(stream)) + warn("fread: %s", filename); + else { + struct sv sv = { + .p = chars.buf, + .len = chars.len, + }; + op_table[(uchar)ops.buf[0].c](sv, ops, 0, filename); + } + + free(chars.buf); +} + +void +cmdg(struct sv sv, struct ops ops, size_t i, const char *filename) +{ + int r; + regmatch_t pm = { + .rm_so = 0, + .rm_eo = sv.len, + }; + struct op op = ops.buf[i]; + + r = regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND); + if ((r == REG_NOMATCH && op.c == 'g') || (r != REG_NOMATCH && op.c == 'v')) + return; + + if (i + 1 == ops.len) + putm(sv, filename); + else + op_table[(uchar)ops.buf[i + 1].c](sv, ops, i + 1, filename); +} + +void +cmdx(struct sv sv, struct ops ops, size_t i, const char *filename) +{ + regmatch_t pm = { + .rm_so = 0, + .rm_eo = sv.len, + }; + struct op op = ops.buf[i]; + + do { + struct sv nsv; + + if (regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND) == REG_NOMATCH) + break; + nsv = (struct sv){.p = sv.p + pm.rm_so, .len = pm.rm_eo - pm.rm_so}; + if (i + 1 == ops.len) + putm(nsv, filename); + else + op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); + + if (pm.rm_so == pm.rm_eo) + pm.rm_eo++; + pm = (regmatch_t){ + .rm_so = pm.rm_eo, + .rm_eo = sv.len, + }; + } while (pm.rm_so < pm.rm_eo); +} + +void +cmdy(struct sv sv, struct ops ops, size_t i, const char *filename) +{ + regmatch_t pm = { + .rm_so = 0, + .rm_eo = sv.len, + }; + regmatch_t prev = { + .rm_so = 0, + .rm_eo = 0, + }; + struct op op = ops.buf[i]; + + do { + struct sv nsv; + + if (regexec(&op.pat, sv.p, 1, &pm, REG_STARTEND) == REG_NOMATCH) + break; + + if (prev.rm_so || prev.rm_eo || pm.rm_so) { + nsv = (struct sv){ + .p = sv.p + prev.rm_eo, + .len = pm.rm_so - prev.rm_eo, + }; + if (i + 1 == ops.len) + putm(nsv, filename); + else + op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); + } + + prev = pm; + if (pm.rm_so == pm.rm_eo) + pm.rm_eo++; + pm = (regmatch_t){ + .rm_so = pm.rm_eo, + .rm_eo = sv.len, + }; + } while (pm.rm_so < pm.rm_eo); + + if (prev.rm_eo < pm.rm_eo) { + struct sv nsv = { + .p = sv.p + pm.rm_so, + .len = pm.rm_eo - pm.rm_so, + }; + if (i + 1 == ops.len) + putm(nsv, filename); + else + op_table[(uchar)ops.buf[i + 1].c](nsv, ops, i + 1, filename); + } +} + +void +putm(struct sv sv, const char *filename) +{ + static const char *fnc, *sepc; + + if (!fnc) { + fnc = env_or_default("GRAB_COLOR_FNAME", "35"); + sepc = env_or_default("GRAB_COLOR_SEP", "36"); + } + + if (fflag || filecnt > 1) { + if (color) { + printf("\33[%sm%s\33[%sm%c\33[0m", fnc, filename, sepc, + zflag ? '\0' : ':'); + } else + printf("%s%c", filename, zflag ? '\0' : ':'); + } + fwrite(sv.p, 1, sv.len, stdout); + putchar(zflag ? '\0' : '\n'); +} + +regex_t +mkregex(char *s, size_t n) +{ + char c = s[n]; + int ret, cflags; + regex_t r; + + for (size_t i = 0; i < n - 1; i++) { + if (s[i] == '\\') { + char c = esc_table[(uchar)s[i + 1]]; + if (c) { + for (size_t j = i; j < n - 1; j++) + s[j] = s[j + 1]; + s[i] = c; + n--; + } + } + } + + s[n] = 0; + cflags = REG_EXTENDED; + if (nflag) + cflags |= REG_NEWLINE; + if ((ret = regcomp(&r, s, cflags)) != 0) { + char emsg[128]; + regerror(ret, &r, emsg, sizeof(emsg)); + diex("Failed to compile regex: %s", emsg); + } + s[n] = c; + + return r; +} + +#if GIT_GRAB +FILE * +getfstream(int argc, char *argv[argc]) +{ + pid_t pid; + int fds[2]; + enum { + FD_R, + FD_W, + }; + + if (pipe(fds) == -1) + die("pipe"); + + switch (pid = fork()) { + case -1: + die("fork"); + case 0:; + size_t len = argc + 5; + char **args; + + close(fds[FD_R]); + if (dup2(fds[FD_W], STDOUT_FILENO) == -1) + die("dup2"); + + if (!(args = malloc(len * sizeof(char *)))) + die("malloc"); + args[0] = "git"; + args[1] = "ls-files"; + args[2] = "-z"; + args[3] = "--"; + memcpy(args + 4, argv, argc * sizeof(char *)); + args[len - 1] = nullptr; + + execvp("git", args); + die("execvp: git ls-files -z"); + } + + close(fds[FD_W]); + return fdopen(fds[FD_R], "r"); +} +#endif + +char * +env_or_default(const char *e, const char *d) +{ + const char *s = getenv(e); + return (char *)(s && *s ? s : d); +} + +bool +xisspace(char c) +{ + return c == ' ' || c == '\t' || c == '\n'; +} + +char * +xstrchrnul(const char *s, char c) +{ + for (; *s; s++) { + if (*s == '\\') + s++; + else if (*s == c) + break; + } + return (char *)s; +} -- cgit v1.2.3