diff options
author | Thomas Voss <mail@thomasvoss.com> | 2023-06-25 18:54:22 +0200 |
---|---|---|
committer | Thomas Voss <mail@thomasvoss.com> | 2023-06-25 18:54:22 +0200 |
commit | a4ce08bde0bcef664706b78146077ad7053a468b (patch) | |
tree | 6671037e22591b98def5e99a7e2fbf3a5a0c7ff0 | |
parent | 529114f2c769c359f2c0353631a183670d3dbebe (diff) |
Make sorting stuff much faster
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | bilingual_sort.c | 41 | ||||
-rwxr-xr-x | build | 22 | ||||
-rw-r--r-- | scripts/bilingual_sort.awk | 6 |
5 files changed, 64 insertions, 12 deletions
@@ -1,4 +1,5 @@ fonts/ done/ out/ +bilingual_sort server diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e917176 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +all: + go build server.go + ${CC} -O3 bilingual_sort.c -o bilingual_sort + +clean: + rm -f bilingual_sort server diff --git a/bilingual_sort.c b/bilingual_sort.c new file mode 100644 index 0000000..0ea5e0c --- /dev/null +++ b/bilingual_sort.c @@ -0,0 +1,41 @@ +/* This is a simple program that runs in an infinite loop. It reads strings + * from standard input in the form of “str1:str2”, sorts them according to the + * locale (set by gawk), and then prints out the first string in sorted order. + * + * The reason we use this instead of just sort(1) or something is because + * spawning one process for each comparison is super slow. This lets us do + * multiple comparisons of string-pairs in only a single process. + */ + +#define _GNU_SOURCE + +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int +main(void) +{ + char *s1, *s2, + line[256]; + + /* We need line buffering to make gawk interact with this properly */ + setvbuf(stdout, NULL, _IOLBF, 0); + setlocale(LC_ALL, ""); + + while (true) { + if (fgets(line, sizeof(line), stdin) == NULL) + break; + + s1 = line; + s2 = strchr(line, ':') + 1; + s2[-1] = '\0'; + *strchrnul(s2, '\n') = '\0'; + + puts(strcoll(s1, s2) < 0 ? s1 : s2); + } + + return EXIT_SUCCESS; +} @@ -221,10 +221,13 @@ compile_for_lang() { } END { - if (lang == "en") - asort(col) - else - asort(col, col, "bilingual_sort") + locale = lang == "pt" \ + ? "pt_BR.UTF-8" \ + : lang "_" toupper(lang) ".UTF-8" + + CMD = "LC_ALL=" locale " ./bilingual_sort" + asort(col, col, "bilingual_sort") + close(CMD) for (i = 1; i <= 3; i++) { if (i == 3 && denom == 5) { @@ -289,10 +292,13 @@ compile_for_lang() { } END { - if (lang == "en") - asort(col) - else - asort(col, col, "bilingual_sort") + locale = lang == "pt" \ + ? "pt_BR.UTF-8" \ + : lang "_" toupper(lang) ".UTF-8" + + CMD = "LC_ALL=" locale " ./bilingual_sort" + asort(col, col, "bilingual_sort") + close(CMD) for (i = 1; i <= 2; i++) { if (i == 1) diff --git a/scripts/bilingual_sort.awk b/scripts/bilingual_sort.awk index 6f6f53a..7e50136 100644 --- a/scripts/bilingual_sort.awk +++ b/scripts/bilingual_sort.awk @@ -3,10 +3,8 @@ function bilingual_sort(i1, v1, i2, v2, l, r, f) l = cc2name[v1[0]] r = cc2name[v2[0]] - printf "%s\n%s\n", l, r |& "sort" - close("sort", "to") - "sort" |& getline f - close("sort", "from") + print l ":" r |& CMD + CMD |& getline f return f == l ? -1 : +1 } |