summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorThomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
committerThomas Voss <mail@thomasvoss.com> 2023-06-25 18:54:22 +0200
commita4ce08bde0bcef664706b78146077ad7053a468b (patch)
tree6671037e22591b98def5e99a7e2fbf3a5a0c7ff0
parent529114f2c769c359f2c0353631a183670d3dbebe (diff)
Make sorting stuff much faster
-rw-r--r--.gitignore1
-rw-r--r--Makefile6
-rw-r--r--bilingual_sort.c41
-rwxr-xr-xbuild22
-rw-r--r--scripts/bilingual_sort.awk6
5 files changed, 64 insertions, 12 deletions
diff --git a/.gitignore b/.gitignore
index ed46773..e3baee2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
fonts/
done/
out/
+bilingual_sort
server
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e917176
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,6 @@
+all:
+ go build server.go
+ ${CC} -O3 bilingual_sort.c -o bilingual_sort
+
+clean:
+ rm -f bilingual_sort server
diff --git a/bilingual_sort.c b/bilingual_sort.c
new file mode 100644
index 0000000..0ea5e0c
--- /dev/null
+++ b/bilingual_sort.c
@@ -0,0 +1,41 @@
+/* This is a simple program that runs in an infinite loop. It reads strings
+ * from standard input in the form of “str1:str2”, sorts them according to the
+ * locale (set by gawk), and then prints out the first string in sorted order.
+ *
+ * The reason we use this instead of just sort(1) or something is because
+ * spawning one process for each comparison is super slow. This lets us do
+ * multiple comparisons of string-pairs in only a single process.
+ */
+
+#define _GNU_SOURCE
+
+#include <locale.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main(void)
+{
+ char *s1, *s2,
+ line[256];
+
+ /* We need line buffering to make gawk interact with this properly */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+ setlocale(LC_ALL, "");
+
+ while (true) {
+ if (fgets(line, sizeof(line), stdin) == NULL)
+ break;
+
+ s1 = line;
+ s2 = strchr(line, ':') + 1;
+ s2[-1] = '\0';
+ *strchrnul(s2, '\n') = '\0';
+
+ puts(strcoll(s1, s2) < 0 ? s1 : s2);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/build b/build
index 66cf511..1a4d07e 100755
--- a/build
+++ b/build
@@ -221,10 +221,13 @@ compile_for_lang() {
}
END {
- if (lang == "en")
- asort(col)
- else
- asort(col, col, "bilingual_sort")
+ locale = lang == "pt" \
+ ? "pt_BR.UTF-8" \
+ : lang "_" toupper(lang) ".UTF-8"
+
+ CMD = "LC_ALL=" locale " ./bilingual_sort"
+ asort(col, col, "bilingual_sort")
+ close(CMD)
for (i = 1; i <= 3; i++) {
if (i == 3 && denom == 5) {
@@ -289,10 +292,13 @@ compile_for_lang() {
}
END {
- if (lang == "en")
- asort(col)
- else
- asort(col, col, "bilingual_sort")
+ locale = lang == "pt" \
+ ? "pt_BR.UTF-8" \
+ : lang "_" toupper(lang) ".UTF-8"
+
+ CMD = "LC_ALL=" locale " ./bilingual_sort"
+ asort(col, col, "bilingual_sort")
+ close(CMD)
for (i = 1; i <= 2; i++) {
if (i == 1)
diff --git a/scripts/bilingual_sort.awk b/scripts/bilingual_sort.awk
index 6f6f53a..7e50136 100644
--- a/scripts/bilingual_sort.awk
+++ b/scripts/bilingual_sort.awk
@@ -3,10 +3,8 @@ function bilingual_sort(i1, v1, i2, v2, l, r, f)
l = cc2name[v1[0]]
r = cc2name[v2[0]]
- printf "%s\n%s\n", l, r |& "sort"
- close("sort", "to")
- "sort" |& getline f
- close("sort", "from")
+ print l ":" r |& CMD
+ CMD |& getline f
return f == l ? -1 : +1
}