.equ EOF, -1 .equ O_RDONLY, 0 .equ EXIT_FAILURE, 1 # Opcodes ENUM(OP_ADD) ENUM(OP_SUB) ENUM(OP_RIGHT) ENUM(OP_LEFT) ENUM(OP_LOOP_START) ENUM(OP_LOOP_END) ENUM(OP_READ) ENUM(OP_WRITE) ENUM(OP_ZERO) ENUM(OP_COPY) .global main .data zero_pattern: .asciz "[-]" read_mode: .asciz "r" die_fmt: .asciz "%s: %s\n" usage_fmt: .asciz "Usage: %s script\n" func_open: .asciz "open" func_fstat: .asciz "fstat" func_read: .asciz "read" func_malloc: .asciz "malloc" memory: .zero 30000 .bss bytecode: .quad 0 program: .quad 0 .text # ================== # Description: # The entry point of the program. # # Args: # %rdi: The number of command line arguments # %rsi: An array of command line arguments ((%rsi) is the program name) # # Return: # %rax: The programs exit code # ================== main: # Make sure that the right number of arguments were passed cmpl $2, %edi jne usage movq 8(%rsi), %rdi # Move the specified script filename into %rdi call read_file # Read the file call compile # Compile into bytecode call execute # Execute the program # Return successfully xorl %eax, %eax ret # ================== # Description: # Read the specified file into a program buffer which the user must later free themselves. # # Args: # %rdi: The scripts filename # ================== read_file: pushq %rbp movq %rsp, %rbp # Allocate space for local variables # 4 bytes for the file descriptor # 144 bytes for the struct stat (check `extras/sizeof_struct_stat.c` for numbers) # 12 bytes of padding to align %rsp on a 16 byte boundary subq $160, %rsp # Open the file in read-only mode with the open(2) syscall movl $O_RDONLY, %esi call open cmpl $-1, %eax # Check if open(1) returned -1 (it failed) je open_die # If so, exit the program movl %eax, -4(%rbp) # If not store the file descriptor movl %eax, %edi # Set the first argument to the file descriptor leaq -160(%rbp), %rsi # Set the second argument to the address of the struct stat call fstat # Call fstat(2) to populate the struct stat # Error check fstat(2) just like open(2) cmpl $-1, %eax je fstat_die # Allocate a buffer for the programs contents movq -112(%rbp), %rdi # Put st_size (the filesize) in %rdi incq %rdi # Make space for the NUL byte pushq %rdi # Store the value of %rdi for the next 2 malloc calls call malloc # Allocate the memory testq %rax, %rax # Check if %rax is NULL je malloc_die # If it is then malloc failed and we exit movq %rax, (program) # Store the address of the allocated memory in (program) addq -112(%rbp), %rax # Point to the last element of the program array movb $0, (%rax) # NULL terminate the program # Allocate a buffer for the bytecode popq %rdi # Retrieve %rdi from the stack shlq $4, %rdi # Multiply the size of the buffer by 16 to make space for opcodes call malloc # Allocate the memory movq %rax, (bytecode) # Store the address of the allocated memory in (bytecode) testq %rax, %rax # Check if %rax is NULL je malloc_die # If it is then malloc failed and we exit # Get a FILE* from the file descriptor with fdopen(3) movl -4(%rbp), %edi movq $read_mode, %rsi call fdopen pushq %rax # Push the FILE* to the stack for the later call to fclose(3) movq (program), %r15 # Move the program buffer into %r15 # cmpjeq - Compare Jump Equal Quadword # ==================================== # Compare the value of 'val' to %rax and if they match jump to 'jump'. .macro cmpjeq val, jump cmpq \val, %rax je \jump .endm # cmpjeb - Compare Jump Equal Byte # ================================ # Compare the value of 'val' to %al and if they match jump to 'jump'. .macro cmpjeb val, jump cmpb \val, %al je \jump .endm read_file_loop: movq -168(%rbp), %rdi # Move the FILE* into %rdi call fgetc # Read a character from the file cmpl $EOF, %eax # Check if we reached EOF je read_file_eof # If we did then end the loop # If we match any non-comment character jump to read_file_not_comment cmpjeb $'+', read_file_not_comment cmpjeb $'-', read_file_not_comment cmpjeb $'>', read_file_not_comment cmpjeb $'<', read_file_not_comment cmpjeb $'[', read_file_not_comment cmpjeb $']', read_file_not_comment cmpjeb $',', read_file_not_comment cmpjeb $'.', read_file_not_comment # DEFAULT CASE (its a comment we can ignore) jmp read_file_loop read_file_not_comment: movb %al, (%r15) # Read the character into the program buffer incq %r15 # Point to the next empty slot in the buffer jmp read_file_loop # Loop again read_file_eof: # NUL terminate the buffer movb $0, (%r15) # Close the file, don't error check this movq -168(%rbp), %rdi call fclose leave ret # ================== # Description: # Compile the program into a bytecode which is an optimized version of the raw program. Each # opcode is a "struct" where the higher 8 bytes are an opcode and the lower 8 are an option # but of data. # ================== compile: movq (program), %r15 # Store the address of the program pointer into %r15 movq (bytecode), %r14 # Store the address of the bytecode pointer into %r14 compile_loop: # Load the current command into %rax movq (%r15), %rax # Jump to a different label depending on which instruction we hit cmpjeb $'+', compile_add cmpjeb $'-', compile_sub cmpjeb $'>', compile_right cmpjeb $'<', compile_left cmpjeb $'[', compile_loop_start cmpjeb $']', compile_loop_end cmpjeb $',', compile_read cmpjeb $'.', compile_write compile_add: movq $OP_ADD, (%r14) # Specify the ADD opcode movq $1, 8(%r14) # Write the count of '+'s to the data portion compile_add_loop: incq %r15 # Move to the next instruction cmpb $'+', (%r15) # Check if there is another + jne compile_out # If not, exit this loop incq 8(%r14) # Increment the accumulator jmp compile_add_loop # Loop again compile_sub: movq $OP_SUB, (%r14) # Specify the SUB opcode movq $1, 8(%r14) # Write the count of '-'s to the data portion compile_sub_loop: incq %r15 # Move to the next instruction cmpb $'-', (%r15) # Check if there is another - jne compile_out # If not, exit this loop incq 8(%r14) # Increment the accumulator jmp compile_sub_loop # Loop again compile_right: movq $OP_RIGHT, (%r14) # Specify the RIGHT opcode movq $1, 8(%r14) # Write the count of '>'s to the data portion compile_right_loop: incq %r15 # Move to the next instruction cmpb $'>', (%r15) # Check if there is another > jne compile_out # If not, exit this loop incq 8(%r14) # Increment the accumulator jmp compile_right_loop # Loop again compile_left: movq $OP_LEFT, (%r14) # Specify the LEFT opcode movq $1, 8(%r14) # Write the count of '<'s to the data portion compile_left_loop: incq %r15 # Move to the next instruction cmpb $'<', (%r15) # Check if there is another < jne compile_out # If not, exit this loop incq 8(%r14) # Increment the accumulator jmp compile_left_loop # Loop again compile_loop_start: # When we reach a '[' the first thing we want to do is check to see if it matches the # pattern '[-]'. This pattern is one that sets a memory cell to 0, so we can optimize that. movq %r15, %rdi # Compare the current position in the program string movq $zero_pattern, %rsi # Compare it against the zero pattern '[-]' movl $4, %ecx # We want to compare 3 bytes (the instruction requires +1) repe cmpsb # Keep looping CMPSB while bytes match jrcxz compile_zero # Jump to compile_zero if the strings matched movq %r15, %rdi # Move the current instruction pointer into %rdi call copy_loop_checker # Call the copy loop checker testq %rax, %rax # Check to see if we hit a copy loop jz compile_loop_start_normal # If we didn't, this is a regular loop movq %rax, %r15 # Otherwise, set %r15 to the new location incq %r15 # Then point to the next instruction jmp compile_out compile_loop_start_normal: # Push the address of the loop start to the stack for the next ']' pushq %r14 movq $OP_LOOP_START, (%r14) # Specify the LOOP_START opcode movq $0, 8(%r14) # Zero the data section incq %r15 # Increment the instruction pointer jmp compile_out compile_loop_end: popq 8(%r14) # Pop the address of the previous loop start to the data section movq $OP_LOOP_END, (%r14) # Push the address of the loop end to the stack incq %r15 # Increment the instruction pointer jmp compile_out compile_read: movq $OP_READ, (%r14) # Specify the READ opcode movq $0, 8(%r14) # Zero the data section incq %r15 # Increment the instruction pointer jmp compile_out compile_write: movq $OP_WRITE, (%r14) # Specify the WRITE opcode movq $0, 8(%r14) # Zero the data section incq %r15 # Increment the instruction pointer jmp compile_out compile_zero: movq $OP_ZERO, (%r14) # Specify the ZERO opcode movq $0, 8(%r14) # Zero the data section addq $3, %r15 # '[-]' is a 3 byte instruction # FALLTHROUGH compile_out: addq $16, %r14 # Move to the next opcode movb (%r15), %al # Move the current instruction into %al testb %al, %al # Check if we have reached the NUL byte jne compile_loop # If we haven't, loop movq $0, (%r14) # Otherwise, NUL terminate the bytecode # Now that we have traversed the entire program, we do a 2nd pass backwards so that we can # set the jump addresses for the '[' commands now that the ']' commands have the addresses # set. compile_backwards: # We are at the NUL terminator, so move backwards subq $16, %r14 cmpq $OP_LOOP_END, (%r14) # Check if we hit a ']' jne compile_backwards_next # If we didn't move to the next check pushq %r14 # Otherwise push the address of the opcode to the stack jmp compile_backwards_out compile_backwards_next: cmpq $OP_LOOP_START, (%r14) # Check if we hit a '[' jne compile_backwards_out # If we didn't just keep looping popq 8(%r14) # If we did then pop the corresponding ']' address compile_backwards_out: cmpq %r14, (bytecode) # Check if we've seen every opcode jne compile_backwards # If not keep looping movq (program), %rdi # Otherwise, move the program buffer to %rdi call free # Free it ret # And return # ================== # Description: # Try to figure out if we are at a copy loop and optimize it. A copy loop follows the pattern # of a loop ([]) beginning with a '-' followed by N occurances of '>' followed by a '+' and N # occurances of '<'. This sequence copies the current cell to the cell at offset N and clears # the current cell afterwards. # # Args: # %rdi: A pointer to the first '[' of the potential copy loop # # Return: # 0 if not a copy loop, otherwise the new position of the instruction pointer. # ================== copy_loop_checker: # Skip '[' incq %rdi # All copy loops must begin with a '-' cmpb $'-', (%rdi) jne copy_loop_fail incq %rdi # Zero %rax so we can use it to count the copy offset xorl %eax, %eax copy_loop_count_offset: cmpb $'>', (%rdi) # Check for '>' jne copy_loop_next # If we don't match anymore then move to the next step incq %rax # Increment the offset counter incq %rdi # Increment the instruction pointer jmp copy_loop_count_offset # Loop again copy_loop_next: cmpb $'+', (%rdi) # Check if we see the mandatory '+' jne copy_loop_fail # If we don't then fail incq %rdi # Otherwise move to the next instruction # The following code is the exact same as what we just did to count the offset but we are # now using %rcx and decrementing for each '<'. This is so we can make sure that the copy # loop is a working one. cmpb $'<', (%rdi) jne copy_loop_fail movq %rax, %rcx copy_loop_verify_offset: cmpb $'<', (%rdi) jne copy_loop_next_2 decq %rcx incq %rdi copy_loop_next_2: cmpb $']', (%rdi) # Ensure this is the end of the loop jne copy_loop_fail # If its not then fail testq %rcx, %rcx # Otherwise make sure that our offsets line up jnz copy_loop_fail # If they don't then fail movq $OP_COPY, (%r14) # Create an OP_COPY opcode movq %rax, 8(%r14) # Set the offset to copy to movq $OP_ZERO, 16(%r14) # Create an OP_ZERO opcode movq $0, 24(%r14) # Set an empty data section addq $16, %r14 # Increment the opcode pointer # Return the address of the instruction pointer movq %rdi, %rax ret copy_loop_fail: xorl %eax, %eax ret # ================== # Description: # Execute the brainfuck bytecode. # ================== execute: movq (bytecode), %r15 # Store the address of the program pointer into %r15 movq $memory, %r14 # Store the address of the first memory cell into %r14 # TODO make sure this handles overflows normally execute_loop: # Load the current command into %rax movq (%r15), %rax # Jump to a different label depending on which instruction we hit cmpjeq $OP_ADD, execute_add cmpjeq $OP_SUB, execute_sub cmpjeq $OP_RIGHT, execute_right cmpjeq $OP_LEFT, execute_left cmpjeq $OP_LOOP_START, execute_loop_start cmpjeq $OP_LOOP_END, execute_loop_end cmpjeq $OP_READ, execute_read cmpjeq $OP_WRITE, execute_write cmpjeq $OP_ZERO, execute_zero # OP_COPY jmp execute_copy execute_add: # Increment the current memory cell movq 8(%r15), %rax addb %al, (%r14) jmp execute_out execute_sub: # Decrement the current memory cell movq 8(%r15), %rax subb %al, (%r14) jmp execute_out execute_right: # Move the memory pointer right addq 8(%r15), %r14 jmp execute_out execute_left: # Move the memory pointer left subq 8(%r15), %r14 jmp execute_out execute_loop_start: # If the current memory cell is 0 move to the next '[' cmpb $0, (%r14) cmovzq 8(%r15), %r15 jmp execute_out execute_loop_end: # If the current memory cell is not 0 move to the next ']' cmpb $0, (%r14) cmovnzq 8(%r15), %r15 jmp execute_out execute_read: # Set the current cell to the character read from stdin call getchar # Read a character with getchar(3) cmpb $EOF, %al # Check if the EOF was read je execute_read_eof # If EOF was read, jump to a special handler for that movb %al, (%r14) # Otherwise move the read character into the current memory cell jmp execute_out execute_read_eof: # If EOF was read, set the current cell to 0 movb $0, (%r14) jmp execute_out execute_write: # Print the character at the current memory cell movl (%r14), %edi # Move the current memory cell into %edi call putchar # Print it with putchar(3) jmp execute_out execute_zero: # Zero the current cell movb $0, (%r14) jmp execute_out execute_copy: # Copy the current memory cells contents elsewhere movq 8(%r15), %rax movb (%r14), %cl movb %cl, (%rax, %r14, 1) # FALLTHROUGH execute_out: addq $16 , %r15 # Increment the instruction pointer movq (%r15), %rax # Move the current instruction into %rax testq %rax, %rax # Check if we have reached the NUL byte jne execute_loop # If we haven't, loop ret # Otherwise, return # ================== # Description: # The following die functions all work the same. They simply take the name of the # corresponding function and store it in %rdi. Then the die function is called to print a # message to stderr and terminate the program. # ================== .macro fdie s movq \s, %rdi jmp die .endm open_die: fdie $func_open fstat_die: fdie $func_fstat read_die: fdie $func_read malloc_die: fdie $func_malloc # ================== # Description: # Print out an error message in the format ": " then exit via `_exit` # # Args: # %rdi: The function name # ================== die: # Store the function name temporarily in %r15 movq %rdi, %r15 call __errno_location # Call __errno_location to get a pointer to errno movq (%rax), %rdi # Move errno into %rdi call strerror # Get the error string with strerror(3) movq %rax, %rcx # Set the error string xorl %eax, %eax # Zero %rax movq %r15, %rdx # Set the function name movq $die_fmt, %rsi # Set the format string movq stderr, %rdi # Get stderr call fprintf # Print the message to stderr jmp _exit # ================== # Description: # Print a usage message to standard error and exit the program via `_exit` # ================== usage: xorl %eax, %eax # Set rax to 0 movq (%rsi), %rdx # Get argv[0] movq $usage_fmt, %rsi # Set the format string movq stderr, %rdi # Get stderr call fprintf # Print the error # FALLTHROUGH # ================== # Description: # Exit the program with the return code EXIT_FAILURE # ================== _exit: movl $EXIT_FAILURE, %eax call exit