From a4ce08bde0bcef664706b78146077ad7053a468b Mon Sep 17 00:00:00 2001 From: Thomas Voss Date: Sun, 25 Jun 2023 18:54:22 +0200 Subject: Make sorting stuff much faster --- bilingual_sort.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 bilingual_sort.c (limited to 'bilingual_sort.c') diff --git a/bilingual_sort.c b/bilingual_sort.c new file mode 100644 index 0000000..0ea5e0c --- /dev/null +++ b/bilingual_sort.c @@ -0,0 +1,41 @@ +/* This is a simple program that runs in an infinite loop. It reads strings + * from standard input in the form of “str1:str2”, sorts them according to the + * locale (set by gawk), and then prints out the first string in sorted order. + * + * The reason we use this instead of just sort(1) or something is because + * spawning one process for each comparison is super slow. This lets us do + * multiple comparisons of string-pairs in only a single process. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +int +main(void) +{ + char *s1, *s2, + line[256]; + + /* We need line buffering to make gawk interact with this properly */ + setvbuf(stdout, NULL, _IOLBF, 0); + setlocale(LC_ALL, ""); + + while (true) { + if (fgets(line, sizeof(line), stdin) == NULL) + break; + + s1 = line; + s2 = strchr(line, ':') + 1; + s2[-1] = '\0'; + *strchrnul(s2, '\n') = '\0'; + + puts(strcoll(s1, s2) < 0 ? s1 : s2); + } + + return EXIT_SUCCESS; +} -- cgit v1.2.3