Make sorting stuff much faster

author: Thomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
committer: Thomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
commit: a4ce08bde0bcef664706b78146077ad7053a468b (patch)
tree: 6671037e22591b98def5e99a7e2fbf3a5a0c7ff0 /bilingual_sort.c
parent: 529114f2c769c359f2c0353631a183670d3dbebe (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/bilingual_sort.c b/bilingual_sort.c
new file mode 100644
index 0000000..0ea5e0c
--- /dev/null
+++ b/bilingual_sort.c
@@ -0,0 +1,41 @@
+/* This is a simple program that runs in an infinite loop.  It reads strings
+ * from standard input in the form of “str1:str2”, sorts them according to the
+ * locale (set by gawk), and then prints out the first string in sorted order.
+ *
+ * The reason we use this instead of just sort(1) or something is because
+ * spawning one process for each comparison is super slow.  This lets us do
+ * multiple comparisons of string-pairs in only a single process.
+ */
+
+#define _GNU_SOURCE
+
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main(void)
+{
+	char *s1, *s2,
+	     line[256];
+
+	/* We need line buffering to make gawk interact with this properly */
+	setvbuf(stdout, NULL, _IOLBF, 0);
+	setlocale(LC_ALL, "");
+
+	while (true) {
+		if (fgets(line, sizeof(line), stdin) == NULL)
+			break;
+
+		s1 = line;
+		s2 = strchr(line, ':') + 1;
+		s2[-1] = '\0';
+		*strchrnul(s2, '\n') = '\0';
+
+		puts(strcoll(s1, s2) < 0 ? s1 : s2);
+	}
+
+	return EXIT_SUCCESS;
+}
author	Thomas Voss <mail@thomasvoss.com>	2023-06-25 18:54:22 +0200
committer	Thomas Voss <mail@thomasvoss.com>	2023-06-25 18:54:22 +0200
commit	a4ce08bde0bcef664706b78146077ad7053a468b (patch)
tree	6671037e22591b98def5e99a7e2fbf3a5a0c7ff0 /bilingual_sort.c
parent	529114f2c769c359f2c0353631a183670d3dbebe (diff)