summaryrefslogtreecommitdiffhomepage
path: root/bilingual_sort.c
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
committerThomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
commita4ce08bde0bcef664706b78146077ad7053a468b (patch)
tree6671037e22591b98def5e99a7e2fbf3a5a0c7ff0 /bilingual_sort.c
parent529114f2c769c359f2c0353631a183670d3dbebe (diff)
Make sorting stuff much faster
Diffstat (limited to 'bilingual_sort.c')
-rw-r--r--bilingual_sort.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/bilingual_sort.c b/bilingual_sort.c
new file mode 100644
index 0000000..0ea5e0c
--- /dev/null
+++ b/bilingual_sort.c
@@ -0,0 +1,41 @@
+/* This is a simple program that runs in an infinite loop. It reads strings
+ * from standard input in the form of “str1:str2”, sorts them according to the
+ * locale (set by gawk), and then prints out the first string in sorted order.
+ *
+ * The reason we use this instead of just sort(1) or something is because
+ * spawning one process for each comparison is super slow. This lets us do
+ * multiple comparisons of string-pairs in only a single process.
+ */
+
+#define _GNU_SOURCE
+
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main(void)
+{
+ char *s1, *s2,
+ line[256];
+
+ /* We need line buffering to make gawk interact with this properly */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+ setlocale(LC_ALL, "");
+
+ while (true) {
+ if (fgets(line, sizeof(line), stdin) == NULL)
+ break;
+
+ s1 = line;
+ s2 = strchr(line, ':') + 1;
+ s2[-1] = '\0';
+ *strchrnul(s2, '\n') = '\0';
+
+ puts(strcoll(s1, s2) < 0 ? s1 : s2);
+ }
+
+ return EXIT_SUCCESS;
+}