aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Voss <thomas.voss@humanwave.nl> 2024-01-18 13:07:57 +0100
committerThomas Voss <thomas.voss@humanwave.nl> 2024-01-18 13:07:57 +0100
commit70ce7c85c43bef33c4f29dcd015e6b66f81f4596 (patch)
tree10450631f08183ad2ae85614c6e263f0db6fa4eb
parent719b3e8ea5d35555be31b3fedcb3495b19a5856c (diff)
Add support for UTF-8 and Unicode
-rw-r--r--src/grab.c43
1 files changed, 28 insertions, 15 deletions
diff --git a/src/grab.c b/src/grab.c
index 0686bf6..15eb3eb 100644
--- a/src/grab.c
+++ b/src/grab.c
@@ -12,6 +12,9 @@
# include <pcre2posix.h>
#else
# include <regex.h>
+
+# define REG_UCP 0
+# define REG_UTF 0
# ifndef REG_STARTEND
# error "REG_STARTEND not defined"
# endif
@@ -75,7 +78,7 @@ static bool xisspace(char);
static char *xstrchrnul(const char *, char);
static int filecnt, rv;
-static bool cflag, nflag, zflag;
+static bool cflag, nflag, Uflag, zflag;
static bool fflag =
#if GIT_GRAB
true;
@@ -95,9 +98,9 @@ usage(const char *s)
{
fprintf(stderr,
#if GIT_GRAB
- "Usage: %s [-cnz] pattern [glob ...]\n"
+ "Usage: %s [-cnUz] pattern [glob ...]\n"
#else
- "Usage: %s [-cfnz] pattern [file ...]\n"
+ "Usage: %s [-cfnUz] pattern [file ...]\n"
#endif
" %s -h\n",
s, s);
@@ -110,14 +113,15 @@ main(int argc, char **argv)
int opt;
struct ops ops;
struct option longopts[] = {
- {"color", no_argument, 0, 'c'},
+ {"color", no_argument, 0, 'c'},
#if GIT_GRAB
- {"filenames", no_argument, 0, 'f'},
+ {"filenames", no_argument, 0, 'f'},
#endif
- {"help", no_argument, 0, 'h'},
- {"newline", no_argument, 0, 'n'},
- {"zero", no_argument, 0, 'z'},
- {nullptr, 0, 0, 0 },
+ {"help", no_argument, 0, 'h'},
+ {"newline", no_argument, 0, 'n'},
+ {"no-unicode", no_argument, 0, 'U'},
+ {"zero", no_argument, 0, 'z'},
+ {nullptr, 0, 0, 0 },
};
#if GIT_GRAB
@@ -125,9 +129,9 @@ main(int argc, char **argv)
size_t len;
ssize_t nr;
FILE *flist;
- const char *opts = "chnz";
+ const char *opts = "chnUz";
#else
- const char *opts = "cfhnz";
+ const char *opts = "cfhnUz";
#endif
argv[0] = basename(argv[0]);
@@ -146,15 +150,22 @@ main(int argc, char **argv)
fflag = true;
break;
#endif
- case 'h':
- execlp("man", "man", "1", argv[0], nullptr);
- die("execlp: man 1 %s", argv[0]);
case 'n':
nflag = true;
break;
+ case 'U':
+#if GRAB_DO_PCRE
+ Uflag = true;
+ break;
+#else
+ errx(2, "program not built with PCRE support");
+#endif
case 'z':
zflag = true;
break;
+ case 'h':
+ execlp("man", "man", "1", argv[0], nullptr);
+ die("execlp: man 1 %s", argv[0]);
default:
usage(argv[0]);
}
@@ -452,9 +463,11 @@ mkregex(char *s, size_t n)
regex_t r;
s[n] = 0;
- cflags = REG_EXTENDED;
+ cflags = REG_EXTENDED | REG_UTF;
if (nflag)
cflags |= REG_NEWLINE;
+ if (!Uflag)
+ cflags |= REG_UCP;
if ((ret = regcomp(&r, s, cflags)) != 0) {
char emsg[256];
regerror(ret, &r, emsg, sizeof(emsg));