diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2023-06-25 18:54:22 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2023-06-25 18:54:22 +0200 | 
| commit | a4ce08bde0bcef664706b78146077ad7053a468b (patch) | |
| tree | 6671037e22591b98def5e99a7e2fbf3a5a0c7ff0 | |
| parent | 529114f2c769c359f2c0353631a183670d3dbebe (diff) | |
Make sorting stuff much faster
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | bilingual_sort.c | 41 | ||||
| -rwxr-xr-x | build | 22 | ||||
| -rw-r--r-- | scripts/bilingual_sort.awk | 6 | 
5 files changed, 64 insertions, 12 deletions
| @@ -1,4 +1,5 @@  fonts/  done/  out/ +bilingual_sort  server diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e917176 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +all: +	go build server.go +	${CC} -O3 bilingual_sort.c -o bilingual_sort + +clean: +	rm -f bilingual_sort server diff --git a/bilingual_sort.c b/bilingual_sort.c new file mode 100644 index 0000000..0ea5e0c --- /dev/null +++ b/bilingual_sort.c @@ -0,0 +1,41 @@ +/* This is a simple program that runs in an infinite loop.  It reads strings + * from standard input in the form of “str1:str2”, sorts them according to the + * locale (set by gawk), and then prints out the first string in sorted order. + * + * The reason we use this instead of just sort(1) or something is because + * spawning one process for each comparison is super slow.  This lets us do + * multiple comparisons of string-pairs in only a single process. + */ + +#define _GNU_SOURCE + +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int +main(void) +{ +	char *s1, *s2, +	     line[256]; + +	/* We need line buffering to make gawk interact with this properly */ +	setvbuf(stdout, NULL, _IOLBF, 0); +	setlocale(LC_ALL, ""); + +	while (true) { +		if (fgets(line, sizeof(line), stdin) == NULL) +			break; + +		s1 = line; +		s2 = strchr(line, ':') + 1; +		s2[-1] = '\0'; +		*strchrnul(s2, '\n') = '\0'; + +		puts(strcoll(s1, s2) < 0 ? s1 : s2); +	} + +	return EXIT_SUCCESS; +} @@ -221,10 +221,13 @@ compile_for_lang() {  			}  			END { -				if (lang == "en") -					asort(col) -				else -					asort(col, col, "bilingual_sort") +				locale = lang == "pt" \ +					? "pt_BR.UTF-8"   \ +					: lang "_" toupper(lang) ".UTF-8" + +				CMD = "LC_ALL=" locale " ./bilingual_sort" +				asort(col, col, "bilingual_sort") +				close(CMD)  				for (i = 1; i <= 3; i++) {  					if (i == 3 && denom == 5) { @@ -289,10 +292,13 @@ compile_for_lang() {  			}  			END { -				if (lang == "en") -					asort(col) -				else -					asort(col, col, "bilingual_sort") +				locale = lang == "pt" \ +					? "pt_BR.UTF-8"   \ +					: lang "_" toupper(lang) ".UTF-8" + +				CMD = "LC_ALL=" locale " ./bilingual_sort" +				asort(col, col, "bilingual_sort") +				close(CMD)  				for (i = 1; i <= 2; i++) {  					if (i == 1) diff --git a/scripts/bilingual_sort.awk b/scripts/bilingual_sort.awk index 6f6f53a..7e50136 100644 --- a/scripts/bilingual_sort.awk +++ b/scripts/bilingual_sort.awk @@ -3,10 +3,8 @@ function bilingual_sort(i1, v1, i2, v2,    l, r, f)  	l = cc2name[v1[0]]  	r = cc2name[v2[0]] -	printf "%s\n%s\n", l, r |& "sort" -	close("sort", "to") -	"sort" |& getline f -	close("sort", "from") +	print l ":" r |& CMD +	CMD |& getline f  	return f == l ? -1 : +1  } |