diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-10-30 10:57:28 +0100 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-10-30 10:57:28 +0100 | 
| commit | 566cef5c77d4a884f054857c7aa4d3e76d19479e (patch) | |
| tree | 160cbcbbcc356a638cb3be06e1a5cd379834084c | |
| parent | bf4ffc33598c2b05f7dbb07f688a2eb33b58150b (diff) | |
Improve file reading
| -rw-r--r-- | make.c | 3 | ||||
| -rw-r--r-- | src/work.c | 108 | 
2 files changed, 79 insertions, 32 deletions
@@ -38,6 +38,9 @@ static char *cflags_req[] = {  	"-Ivendor/mlib/include",  	"-pipe",  	"-std=c23", +#ifdef __linux__ +	"-D_FILE_OFFSET_BITS=64", +#endif  #ifdef __GLIBC__  	"-D_GNU_SOURCE",  #endif @@ -1,6 +1,8 @@  #include <sys/mman.h>  #include <sys/stat.h> +#include <errno.h> +#include <fcntl.h>  #include <stdatomic.h>  #include <stdckdint.h>  #include <stddef.h> @@ -68,54 +70,96 @@ extern typeof(pcre2_match) *pcre2_match_fn;  void  process_file(const char *locl_filename, unsigned char **locl_buf)  { +	ptrdiff_t baselen; +	static thread_local off_t basecap; +  	filename = locl_filename;  	buf = locl_buf; -	FILE *fp = streq(filename, "-") ? stdin : fopen(filename, "r"); -	if (fp == nullptr) { -		warn("fopen: %s:", filename); -		atomic_store(&rv, EXIT_WARNING); -		return; +	int fd = streq(filename, "-") ? STDIN_FILENO : open(filename, O_RDONLY); +	if (fd == -1) { +		warn("open: %s:", filename); +		goto err;  	} -	allocator_t mem = init_heap_allocator(nullptr); -	if (baseptr == nullptr) -		baseptr = array_new(mem, char8_t, 0x1000); -	size_t bufsz = array_cap(baseptr); -	last_match = baseptr; - -	do { -		static_assert(sizeof(char8_t) == 1, "sizeof(char8_t) != 1; wtf?"); -		baseptr = array_resz(baseptr, bufsz += BUFSIZ); /* TODO: Bounds checking */ -		size_t n = fread(baseptr + array_len(baseptr), 1, BUFSIZ, fp); -		array_hdr(baseptr)->len += n; -	} while (!feof(fp)); - -	if (ferror(fp)) { -		warn("fread: %s:", filename); -		atomic_store(&rv, EXIT_WARNING); -		goto out; +	(void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL | POSIX_FADV_WILLNEED); + +	struct stat st; +	if (fstat(fd, &st) == -1) { +		warn("fstat: %s:", filename); +		goto err; +	} + +	if (S_ISREG(st.st_mode)) { +#if __linux__ +		(void)readahead(fd, 0, st.st_size); +#endif +		if (st.st_size > basecap) { +			basecap = st.st_size; +			if ((baseptr = realloc(baseptr, st.st_size)) == nullptr) +				cerr(EXIT_FATAL, "realloc:"); +		} +		(void)madvise(baseptr, st.st_size, POSIX_MADV_SEQUENTIAL); + +		ptrdiff_t nw = 0; +		for (;;) { +			ssize_t nr = read(fd, baseptr + nw, st.st_size - nw); +			if (nr == -1) { +				if (errno == EINTR) +					continue; +				warn("read: %s:", filename); +				goto err; +			} +			if (nr == 0) +				break; +			nw += nr; +		} +		baselen = st.st_size; +	} else { +		ptrdiff_t nw = 0; +		for (;;) { +			if (nw + st.st_blksize > basecap) { +				if (ckd_mul(&basecap, basecap, 2)) { +					errno = EOVERFLOW; +					cerr(EXIT_FATAL, "realloc:"); +				} +				if ((baseptr = realloc(baseptr, basecap)) == nullptr) +					cerr(EXIT_FATAL, "realloc:"); +			} +			ssize_t nr = read(fd, baseptr + nw, st.st_blksize); +			if (nr == -1) { +				if (errno == EINTR) +					continue; +				warn("read: %s:", filename); +				goto err; +			} +			if (nr == 0) +				break; +			nw += nr; +		} +		baselen = nw;  	}  	/* Shouldn’t need more than 32 ever… */ +	allocator_t mem = init_heap_allocator(nullptr);  	static thread_local u8view_t *hl = nullptr;  	if (hl == nullptr)  		hl = array_new(mem, typeof(*hl), 32); -	operator_dispatch(0, (u8view_t){baseptr, array_len(baseptr)}, &hl); +	operator_dispatch(0, (u8view_t){baseptr, baselen}, &hl); + +	if (fd != -1) +		(void)close(fd);  #if DEBUG -	array_free(baseptr); +	free(baseptr);  	baseptr = nullptr; -	array_free(hl); -	hl = nullptr; -#else -	array_hdr(baseptr)->len = 0; -	array_hdr(hl)->len = 0;  #endif +	return; -out: -	if (fp != stdin) -		(void)fclose(fp); +err: +	if (fd != -1) +		(void)close(fd); +	atomic_store(&rv, EXIT_WARNING);  }  |