From 00974c325bfdb24c6027a87ce391cfda0ab58fcb Mon Sep 17 00:00:00 2001
From: Mango0x45 <thomasvoss@live.com>
Date: Thu, 9 Sep 2021 02:42:25 +0200
Subject: Genesis commit

---
 .gitignore                  |   6 +
 Makefile                    |  15 ++
 UNLICENSE                   |  24 ++
 extras/sizeof_struct_stat.c |  12 +
 macros.m4                   |   2 +
 smallbrain.s                | 538 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 597 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Makefile
 create mode 100644 UNLICENSE
 create mode 100644 extras/sizeof_struct_stat.c
 create mode 100644 macros.m4
 create mode 100644 smallbrain.s

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c430098
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+c-example/
+tests/
+*.bf
+*.out
+out.s
+smallbrain
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..475bf51
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,15 @@
+target	= smallbrain
+
+CC	= cc
+CFLAGS	= -g -no-pie
+
+all: ${target}
+${target}: ${target}.s
+	m4 macros.m4 $< >out.s
+	${CC} ${CFLAGS} -o $@ out.s
+
+.PHONY: bench clean
+bench:
+	hyperfine -S sh -w 200 -r 100000 "./smallbrain tests/towers-of-hanoi.bf"
+clean:
+	rm -f ${target} out.s
diff --git a/UNLICENSE b/UNLICENSE
new file mode 100644
index 0000000..68a49da
--- /dev/null
+++ b/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/extras/sizeof_struct_stat.c b/extras/sizeof_struct_stat.c
new file mode 100644
index 0000000..67b6b9c
--- /dev/null
+++ b/extras/sizeof_struct_stat.c
@@ -0,0 +1,12 @@
+#include <sys/stat.h>
+
+#include <stddef.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+	printf("Size of stat struct:\t%zu bytes\nOffset of st_size:\t%zu\n", sizeof(struct stat),
+	       offsetof(struct stat, st_size));
+	return 0;
+}
diff --git a/macros.m4 b/macros.m4
new file mode 100644
index 0000000..4f15035
--- /dev/null
+++ b/macros.m4
@@ -0,0 +1,2 @@
+define(`COUNTER', 0)
+define(`ENUM', `.equ $1, define(`COUNTER', incr(COUNTER))COUNTER')
diff --git a/smallbrain.s b/smallbrain.s
new file mode 100644
index 0000000..86e3114
--- /dev/null
+++ b/smallbrain.s
@@ -0,0 +1,538 @@
+.equ EOF, -1
+.equ O_RDONLY, 0
+.equ EXIT_FAILURE, 1
+
+# Opcodes
+ENUM(OP_ADD)
+ENUM(OP_SUB)
+ENUM(OP_RIGHT)
+ENUM(OP_LEFT)
+ENUM(OP_LOOP_START)
+ENUM(OP_LOOP_END)
+ENUM(OP_READ)
+ENUM(OP_WRITE)
+ENUM(OP_ZERO)
+ENUM(OP_COPY)
+
+.global main
+
+.data
+zero_pattern:
+	.asciz "[-]"
+die_fmt:
+	.asciz "%s: %s\n"
+usage_fmt:
+	.asciz "Usage: %s script\n"
+func_open:
+	.asciz "open"
+func_fstat:
+	.asciz "fstat"
+func_read:
+	.asciz "read"
+func_malloc:
+	.asciz "malloc"
+memory:
+	.zero 30000
+
+.bss
+bytecode:
+	.quad 0
+program:
+	.quad 0
+
+.text
+# ==================
+# Description:
+#	The entry point of the program.
+#
+# Args:
+#	%rdi: The number of command line arguments
+#	%rsi: An array of command line arguments ((%rsi) is the program name)
+#
+# Return:
+#	%rax: The programs exit code
+# ==================
+main:
+	# Make sure that the right number of arguments were passed
+	cmpl	$2, %edi
+	jne	usage
+
+	movq	8(%rsi), %rdi # Move the specified script filename into %rdi
+	call	read_file     # Read the file
+	call	compile       # Compile into bytecode
+	#call	execute       # Execute the program
+
+	# Return successfully
+	xorl	%eax, %eax
+	ret
+
+# ==================
+# Description:
+#	Read the specified file into a program buffer which the user must later free themselves.
+#
+# Args:
+#	%rdi: The scripts filename
+# ==================
+read_file:
+	pushq	%rbp
+	movq	%rsp, %rbp
+
+	# Allocate space for local variables
+	# 	4 bytes for the file descriptor
+	#	144 bytes for the struct stat (check `extras/sizeof_struct_stat.c` for numbers)
+	#	12 bytes of padding to align %rsp on a 16 byte boundary
+	subq	$160, %rsp
+
+	# Open the file in read-only mode with the open(2) syscall
+	movl	$O_RDONLY, %esi
+	call	open
+
+	cmpl	$-1, %eax      # Check if open(1) returned -1 (it failed)
+	je	open_die       # If so, exit the program
+	movl	%eax, -4(%rbp) # If not store the file descriptor
+
+	movl	%eax, %edi       # Set the first argument to the file descriptor
+	leaq	-160(%rbp), %rsi # Set the second argument to the address of the struct stat
+	call	fstat            # Call fstat(2) to populate the struct stat
+
+	# Error check fstat(2) just like open(2)
+	cmpl	$-1, %eax
+	je	fstat_die
+
+	# Allocate a buffer for the programs contents
+	movq	-112(%rbp), %rdi # Put st_size (the filesize) in %rdi
+	incq	%rdi             # Make space for the NUL byte
+	call	malloc           # Allocate the memory
+	testq	%rax, %rax       # Check if %rax is NULL
+	je	malloc_die       # If it is then malloc failed and we exit
+	movq	%rax, (program)  # Store the address of the allocated memory in (program)
+	addq	-112(%rbp), %rax # Point to the last element of the program array
+	movb	$0, (%rax)       # NULL terminate the program
+
+	# Allocate a buffer for the bytecode
+	movq	-112(%rbp), %rdi # Put st_size (the filesize) in %rdi
+	incq	%rdi             # Make space for the NUL byte
+	shlq	$4, %rdi         # Multiply the size of the buffer by 16 to make space for opcodes
+	call	malloc           # Allocate the memory
+	movq	%rax, (bytecode) # Store the address of the allocated memory in (bytecode)
+	testq	%rax, %rax       # Check if %rax is NULL
+	je	malloc_die       # If it is then malloc failed and we exit
+
+	# Read the program into the program buffer we allocated with read(2)
+	movq	-112(%rbp), %rdx # Set the buffersize
+	movq	(program), %rsi  # Set the buffer
+	movl	-4(%rbp), %edi   # Set the file descriptor
+	call	read
+
+	# Error check read(2) just like open(2)
+	cmpl	$-1, %eax
+	je	read_die
+
+	# Close the file, don't error check this
+	movl	-4(%rbp), %eax
+	call	close
+
+	leave
+	ret
+
+# ==================
+# Description:
+#	Compile the program into a bytecode which is an optimized version of the raw program. Each
+#	opcode is a "struct" where the higher 8 bytes are an opcode and the lower 8 are an option
+#	but of data.
+#
+# ==================
+compile:
+	movq	(program), %r15  # Store the address of the program pointer into %r15
+	movq	(bytecode), %r14 # Store the address of the bytecode pointer into %r14
+compile_loop:
+	# Load the current command into %rax
+	movq	(%r15), %rax
+
+	# Jump to a different label depending on which instruction we hit
+	cmpb	$'+', %al
+	je	compile_add
+	cmpb	$'-', %al
+	je	compile_sub
+	cmpb	$'>', %al
+	je	compile_right
+	cmpb	$'<', %al
+	je	compile_left
+	cmpb	$'[', %al
+	je	compile_loop_start
+	cmpb	$']', %al
+	je	compile_loop_end
+	cmpb	$',', %al
+	je	compile_read
+	cmpb	$'.', %al
+	je	compile_write
+
+	# COMMENTS GET HERE
+
+	incq	%r15        # Increment the instruction pointer
+	subq	$16, %r14   # Negate the effect of the later `addq`
+	jmp	compile_out
+
+compile_add:
+	movq	$OP_ADD, (%r14) # Specify the ADD opcode
+	movq	$1, 8(%r14)     # Write the count of '+'s to the data portion
+compile_add_loop:
+	incq	%r15             # Move to the next instruction
+	cmpb	$'+', (%r15)     # Check if there is another +
+	jne	compile_out      # If not, check for a newline
+	incq	8(%r14)          # Increment the accumulator
+	jmp	compile_add_loop # Loop again
+
+compile_sub:
+	movq	$OP_SUB, (%r14) # Specify the SUB opcode
+	movq	$1, 8(%r14)     # Write the count of '-'s to the data portion
+compile_sub_loop:
+	incq	%r15             # Move to the next instruction
+	cmpb	$'-', (%r15)     # Check if there is another -
+	jne	compile_out      # If not, exit this loop
+	incq	8(%r14)          # Increment the accumulator
+	jmp	compile_sub_loop # Loop again
+
+compile_right:
+	movq	$OP_RIGHT, (%r14) # Specify the RIGHT opcode
+	movq	$1, 8(%r14)       # Write the count of '>'s to the data portion
+compile_right_loop:
+	incq	%r15               # Move to the next instruction
+	cmpb	$'>', (%r15)       # Check if there is another >
+	jne	compile_out        # If not, exit this loop
+	incq	8(%r14)            # Increment the accumulator
+	jmp	compile_right_loop # Loop again
+
+compile_left:
+	movq	$OP_LEFT, (%r14) # Specify the RIGHT opcode
+	movq	$1, 8(%r14)      # Write the count of '<'s to the data portion
+compile_left_loop:
+	incq	%r15              # Move to the next instruction
+	cmpb	$'<', (%r15)      # Check if there is another <
+	jne	compile_out       # If not, exit this loop
+	incq	8(%r14)           # Increment the accumulator
+	jmp	compile_left_loop # Loop again
+
+compile_loop_start:
+	# When we reach a '[' the first thing we want to do is check to see if it matches the
+	# pattern '[-]'. This pattern is one that sets a memory cell to 0, so we can optimize that.
+	movq	%r15, %rdi          # Compare the current position in the program string
+	movq	$zero_pattern, %rsi # Compare it against the zero pattern '[-]'
+	movl	$3, %edx            # We want to compare 3 bytes
+	call	strncmp             # Compare them with strncmp(3)
+	testl	%eax, %eax          # Check if the comparison returned 0
+	jz	compile_zero        # If it did, we matched the pattern
+
+	movq	%r15, %rdi                # Move the current instruction pointer into %rdi
+	call	copy_loop_checker         # Call the copy loop checker
+	testq	%rax, %rax                # Check to see if we hit a copy loop
+	jz	compile_loop_start_normal # If we didn't, this is a regular loop
+	movq	%rax, %r15                # Otherwise, set %r15 to the new location
+	incq	%r15                      # Then point to the next instruction
+	jmp	compile_out
+
+compile_loop_start_normal:
+	# Push the address of the loop start to the stack for the next ']'
+	pushq	%r14
+
+	movq	$OP_LOOP_START, (%r14) # Specify the LOOP_START opcode
+	movq	$0, 8(%r14)            # Zero the data section
+	incq	%r15                   # Increment the instruction pointer
+	jmp	compile_out
+
+compile_loop_end:
+	popq	8(%r14)              # Pop the address of the previous loop start to the data section
+	movq	$OP_LOOP_END, (%r14) # Push the address of the loop end to the stack
+	incq	%r15                 # Increment the instruction pointer
+	jmp	compile_out
+
+compile_read:
+	movq	$OP_READ, (%r14) # Specify the READ opcode
+	movq	$0, 8(%r14)      # Zero the data section
+	incq	%r15             # Increment the instruction pointer
+	jmp	compile_out
+
+compile_write:
+	movq	$OP_WRITE, (%r14) # Specify the WRITE opcode
+	movq	$0, 8(%r14)       # Zero the data section
+	incq	%r15              # Increment the instruction pointer
+	jmp	compile_out
+
+compile_zero:
+	movq	$OP_ZERO, (%r14) # Specify the ZERO opcode
+	movq	$0, 8(%r14)      # Zero the data section
+	addq	$3, %r15         # '[-]' is a 3 byte instruction
+	# FALLTHROUGH
+
+compile_out:
+	addq	$16, %r14    # Move to the next opcode
+	movb	(%r15), %al  # Move the current instruction into %al
+	testb	%al, %al     # Check if we have reached the NUL byte
+	jne	compile_loop # If we haven't, loop
+	movq	$0, (%r14)   # Otherwise, NUL terminate the bytecode
+
+	# Now that we have traversed the entire program, we do a 2nd pass backwards so that we can
+	# set the jump addresses for the '[' commands now that the ']' commands have the addresses
+	# set.
+compile_backwards:
+	# We are at the NUL terminator, so move backwards
+	subq	$16, %r14
+
+	cmpq	$OP_LOOP_END, (%r14)   # Check if we hit a ']'
+	jne	compile_backwards_next # If we didn't move to the nex check
+	pushq	%r14                   # Otherwise push the address of the opcode to the stack
+	jmp	compile_backwards_out
+
+compile_backwards_next:
+	cmpq	$OP_LOOP_START, (%r14) # Check if we hit a '['
+	jne	compile_backwards_out  # If we didn't just keep looping
+	popq	8(%r14)                # If we did then pop the corresponding ']' address
+
+compile_backwards_out:
+	cmpq	%r14, (bytecode)  # Check if we've seen every opcode
+	jne	compile_backwards # If not keep looping
+	ret                       # Otherwise return
+
+# ==================
+# Description:
+#	Try to figure out if we are at a copy loop and optimize it. A copy loop follows the pattern
+#	of a loop ([]) beginning with a '-' followed by N occurances of '>' followed by a '+' and N
+#	occurances of '<'. This sequence copies the current cell to the cell at offset N and clears
+#	the current cell afterwards.
+#
+# Args:
+#	%rdi: A pointer to the first '[' of the potential copy loop
+#
+# Return:
+#	0 if not a copy loop, otherwise the new position of the instruction pointer.
+# ==================
+copy_loop_checker:
+	# Skip '['
+	incq	%rdi
+
+	# All copy loops must begin with a '-'
+	cmpb	$'-', (%rdi)
+	jne	copy_loop_fail
+	incq	%rdi
+
+	# Zero %rax so we can use it to count the copy offset
+	xorl	%eax, %eax
+copy_loop_count_offset:
+	cmpb	$'>', (%rdi)           # Check for '>'
+	jne	copy_loop_next         # If we don't match anymore then move to the next step
+	incq	%rax                   # Increment the offset counter
+	incq	%rdi                   # Increment the instruction pointer
+	jmp	copy_loop_count_offset # Loop again
+
+copy_loop_next:
+	cmpb	$'+', (%rdi)   # Check if we see the mandatory '+'
+	jne	copy_loop_fail # If we don't then fail
+	incq	%rdi           # Otherwise move to the next instruction
+
+	# The following code is the exact same as what we just did to count the offset but we are
+	# now using %rcx and decrementing for each '<'. This is so we can make sure that the copy
+	# loop is a working one.
+	cmpb	$'<', (%rdi)
+	jne	copy_loop_fail
+	movq	%rax, %rcx
+copy_loop_verify_offset:
+	cmpb	$'<', (%rdi)
+	jne	copy_loop_next_2
+	decq	%rcx
+	incq	%rdi
+
+copy_loop_next_2:
+	cmpb	$']', (%rdi)   # Ensure this is the end of the loop
+	jne	copy_loop_fail # If its not then fail
+	testq	%rcx, %rcx     # Otherwise make sure that our offsets line up
+	jnz	copy_loop_fail # If they don't then fail
+
+	movq	$OP_COPY, (%r14)   # Create an OP_COPY opcode
+	movq	%rax, 8(%r14)      # Set the offset to copy to
+	movq	$OP_ZERO, 16(%r14) # Create an OP_ZERO opcode
+	movq	$0, 24(%r14)       # Set an empty data section
+	addq	$16, %r14          # Increment the opcode pointer
+
+	# Return the address of the instruction pointer
+	movq	%rdi, %rax
+	ret
+copy_loop_fail:
+	xorl	%eax, %eax
+	ret
+
+# ==================
+# Description:
+#	Execute the brainfuck program. This is done the slow and naive way by just looping over
+#	command and executing it (although the '[-]' pattern does get optimized for that towers of
+#	hanoi speed!). I have written a *much* faster C version which compiles the brainfuck into a
+#	more optimized bytecode, but porting that to ASM was just too frustrating.
+# ==================
+execute:
+	movq	(bytecode), %r15 # Store the address of the program pointer into %r15
+	movq	$memory, %r14    # Store the address of the first memory cell into %r14 # TODO make sure this handles overflows normally
+execute_loop:
+	# Load the current command into %rax
+	movq	(%r15), %rax
+
+	# Jump to a different label depending on which instruction we hit
+	cmpq	$OP_ADD, %rax
+	je	execute_add
+	cmpq	$OP_SUB, %rax
+	je	execute_sub
+	cmpq	$OP_RIGHT, %rax
+	je	execute_right
+	cmpq	$OP_LEFT, %rax
+	je	execute_left
+	cmpq	$OP_LOOP_START, %rax
+	je	execute_loop_start
+	cmpq	$OP_LOOP_END, %rax
+	je	execute_loop_end
+	cmpq	$OP_READ, %rax
+	je	execute_read
+	cmpq	$OP_WRITE, %rax
+	je	execute_write
+	cmpq	$OP_ZERO, %rax
+	je	execute_zero
+	cmpq	$OP_COPY, %rax
+	je	execute_copy
+
+execute_add:
+	# Increment the current memory cell
+	movq	8(%r15), %rax
+	addb	%al, (%r14)
+	jmp	execute_out
+
+execute_sub:
+	# Decrement the current memory cell
+	movq	8(%r15), %rax
+	subb	%al, (%r14)
+	jmp	execute_out
+
+execute_right:
+	# Move the memory pointer right by 1
+	addq	8(%r15), %r14
+	jmp	execute_out
+
+execute_left:
+	# Move the memory pointer left by 1
+	subq	8(%r15), %r14
+	jmp	execute_out
+
+execute_loop_start:
+	cmpb	$0, (%r14)
+	cmovzq	8(%r15), %r15
+	jmp	execute_out
+
+execute_loop_end:
+	# If the current memory cell is 0 move to the next command
+	cmpb	$0, (%r14)
+	cmovnzq	8(%r15), %r15
+	jmp	execute_out
+
+execute_read:
+	call	getchar          # Read a character with getchar(3)
+	cmpb	$EOF, %al        # Check if the EOF was read
+	je	execute_read_eof # If EOF was read, jump to a special handler for that
+	movb	%al, (%r14)      # Otherwise move the read character into the current memory cell
+	jmp	execute_out
+
+execute_read_eof:
+	# If EOF was read, set the current cell to 0
+	movb	$0, (%r14)
+	jmp	execute_out
+
+execute_write:
+	movl	(%r14), %edi # Move the current memory cell into %edi
+	call	putchar      # Print it with putchar(3)
+	jmp	execute_out
+
+execute_zero:
+	# This is the one optimzation we actually do. If the string [-] is matched then just zero
+	# the current cell.
+	movb	$0, (%r14)
+	jmp	execute_out
+
+execute_copy:
+	movq	8(%r15), %rax
+	movb	(%r14), %cl
+	movb	%cl, (%rax, %r14, 1)
+	# FALLTHROUGH
+
+execute_out:
+	addq	$16 , %r15   # Increment the instruction pointer
+	movq	(%r15), %rax # Move the current instruction into %rax
+	testq	%rax, %rax   # Check if we have reached the NUL byte
+	jne	execute_loop # If we haven't, loop
+	ret                  # Otherwise, return
+
+# ==================
+# Description:
+#	The following die functions all work the same. They simply take the name of the
+#	corresponding function and store it in %rdi. Then the die function is called to print a
+#	message to stderr and terminate the program.
+# ==================
+
+# Set the function name to "open" and call die
+open_die:
+	movq	$func_open, %rdi
+	jmp	die
+
+# Same as open_die but for fstat(2)
+fstat_die:
+	movq	$func_fstat, %rdi
+	jmp	die
+
+# Same as open_die but for read(2)
+read_die:
+	movq	$func_read, %rdi
+	jmp	die
+
+# Same as open_die but for malloc(3)
+malloc_die:
+	movq	$func_malloc, %rdi
+	jmp	die
+
+
+# ==================
+# Description:
+#	Print out an error message in the format "<func name>: <err msg>" then exit via `_exit`
+#
+# Args:
+#	%rdi: The function name
+# ==================
+die:
+	# Store the function name temporarily in %r15
+	movq	%rdi, %r15
+
+	call	__errno_location # Call __errno_location to get a pointer to errno
+	movq	(%rax), %rdi     # Move errno into %rdi
+	call	strerror         # Get the error string with strerror(3)
+
+	movq	%rax, %rcx     # Set the error string
+	xorl	%eax, %eax     # Zero %rax
+	movq	%r15, %rdx     # Set the function name
+	movq	$die_fmt, %rsi # Set the format string
+	movq	stderr, %rdi   # Get stderr 
+	call	fprintf        # Print the message to stderr
+
+	jmp	_exit
+
+# ==================
+# Description:
+#	Print a usage message to standard error and exit the program via `_exit`
+# ==================
+usage:
+	xorl	%eax, %eax       # Set rax to 0
+	movq	(%rsi), %rdx     # Get argv[0]
+	movq	$usage_fmt, %rsi # Set the format string
+	movq	stderr, %rdi     # Get stderr 
+	call	fprintf          # Print the error
+	# FALLTHROUGH
+
+# ==================
+# Description:
+#	Exit the program with the return code EXIT_FAILURE
+# ==================
+_exit:
+	movl	$EXIT_FAILURE, %eax
+	call	exit
-- 
cgit v1.2.3