1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
|
.equ EOF, -1
.equ O_RDONLY, 0
.equ EXIT_FAILURE, 1
// Opcodes
ENUM(OP_ADD)
ENUM(OP_SUB)
ENUM(OP_RIGHT)
ENUM(OP_LEFT)
ENUM(OP_LOOP_START)
ENUM(OP_LOOP_END)
ENUM(OP_READ)
ENUM(OP_WRITE)
ENUM(OP_ZERO)
ENUM(OP_COPY)
.global main
.data
zero_pattern: .asciz "[-]"
read_mode: .asciz "r"
die_fmt: .asciz "%s: %s\n"
usage_fmt: .asciz "Usage: %s script\n"
func_open: .asciz "open"
func_fstat: .asciz "fstat"
func_read: .asciz "read"
func_malloc: .asciz "malloc"
memory: .zero 30000
.bss
bytecode: .quad 0
program: .quad 0
.text
// ==================
// Description:
// The entry point of the program.
//
// Args:
// %rdi: The number of command line arguments
// %rsi: An array of command line arguments ((%rsi) is the program name)
//
// Return:
// %rax: The programs exit code
// ==================
main:
// Make sure that the right number of arguments were passed
cmpl $2, %edi
jne usage
movq 8(%rsi), %rdi // Move the specified script filename into %rdi
call read_file // Read the file
call compile // Compile into bytecode
call execute // Execute the program
// Return successfully
xorl %eax, %eax
ret
// ==================
// Description:
// Read the specified file into a program buffer which the user must later free themselves.
//
// Args:
// %rdi: The scripts filename
// ==================
read_file:
pushq %rbp
movq %rsp, %rbp
// Allocate space for local variables
// 4 bytes for the file descriptor
// 144 bytes for the struct stat (check `extras/sizeof_struct_stat.c` for numbers)
// 12 bytes of padding to align %rsp on a 16 byte boundary
subq $160, %rsp
// Open the file in read-only mode with the open(2) syscall
movl $O_RDONLY, %esi
call open
cmpl $-1, %eax // Check if open(1) returned -1 (it failed)
je open_die // If so, exit the program
movl %eax, -4(%rbp) // If not store the file descriptor
movl %eax, %edi // Set the first argument to the file descriptor
leaq -160(%rbp), %rsi // Set the second argument to the address of the struct stat
call fstat // Call fstat(2) to populate the struct stat
// Error check fstat(2) just like open(2)
cmpl $-1, %eax
je fstat_die
// Allocate a buffer for the programs contents
movq -112(%rbp), %rdi // Put st_size (the filesize) in %rdi
incq %rdi // Make space for the NUL byte
pushq %rdi // Store the value of %rdi for the next 2 malloc calls
call malloc // Allocate the memory
testq %rax, %rax // Check if %rax is NULL
je malloc_die // If it is then malloc failed and we exit
movq %rax, (program) // Store the address of the allocated memory in (program)
addq -112(%rbp), %rax // Point to the last element of the program array
movb $0, (%rax) // NULL terminate the program
// Allocate a buffer for the bytecode
popq %rdi // Retrieve %rdi from the stack
shlq $4, %rdi // Multiply the size of the buffer by 16 to make space for opcodes
call malloc // Allocate the memory
movq %rax, (bytecode) // Store the address of the allocated memory in (bytecode)
testq %rax, %rax // Check if %rax is NULL
je malloc_die // If it is then malloc failed and we exit
// Get a FILE* from the file descriptor with fdopen(3)
movl -4(%rbp), %edi
movq $read_mode, %rsi
call fdopen
pushq %rax // Push the FILE* to the stack for the later call to fclose(3)
movq (program), %r15 // Move the program buffer into %r15
// cmpjeq - Compare Jump Equal Quadword
// ====================================
// Compare the value of 'val' to %rax and if they match jump to 'jump'.
.macro cmpjeq val, jump
cmpq \val, %rax
je \jump
.endm
// cmpjeb - Compare Jump Equal Byte
// ================================
// Compare the value of 'val' to %al and if they match jump to 'jump'.
.macro cmpjeb val, jump
cmpb \val, %al
je \jump
.endm
read_file_loop:
movq -168(%rbp), %rdi // Move the FILE* into %rdi
call fgetc // Read a character from the file
cmpl $EOF, %eax // Check if we reached EOF
je read_file_eof // If we did then end the loop
// If we match any non-comment character jump to read_file_not_comment
cmpjeb $'+', read_file_not_comment
cmpjeb $'-', read_file_not_comment
cmpjeb $'>', read_file_not_comment
cmpjeb $'<', read_file_not_comment
cmpjeb $'[', read_file_not_comment
cmpjeb $']', read_file_not_comment
cmpjeb $',', read_file_not_comment
cmpjeb $'.', read_file_not_comment
// DEFAULT CASE (its a comment we can ignore)
jmp read_file_loop
read_file_not_comment:
movb %al, (%r15) // Read the character into the program buffer
incq %r15 // Point to the next empty slot in the buffer
jmp read_file_loop // Loop again
read_file_eof:
// NUL terminate the buffer
movb $0, (%r15)
// Close the file, don't error check this
movq -168(%rbp), %rdi
call fclose
leave
ret
// ==================
// Description:
// Compile the program into a bytecode which is an optimized version of the raw program. Each
// opcode is a "struct" where the higher 8 bytes are an opcode and the lower 8 are data for
// the instruction.
// ==================
compile:
movq (program), %r15 // Store the address of the program pointer into %r15
movq (bytecode), %r14 // Store the address of the bytecode pointer into %r14
compile_loop:
// Load the current command into %rax
movq (%r15), %rax
// Jump to a different label depending on which instruction we hit
cmpjeb $'+', compile_add
cmpjeb $'-', compile_sub
cmpjeb $'>', compile_right
cmpjeb $'<', compile_left
cmpjeb $'[', compile_loop_start
cmpjeb $']', compile_loop_end
cmpjeb $',', compile_read
cmpjeb $'.', compile_write
compile_add:
movq $OP_ADD, (%r14) // Specify the ADD opcode
movq $1, 8(%r14) // Write the count of '+'s to the data portion
compile_add_loop:
incq %r15 // Move to the next instruction
cmpb $'+', (%r15) // Check if there is another +
jne compile_out // If not, exit this loop
incq 8(%r14) // Increment the accumulator
jmp compile_add_loop // Loop again
compile_sub:
movq $OP_SUB, (%r14) // Specify the SUB opcode
movq $1, 8(%r14) // Write the count of '-'s to the data portion
compile_sub_loop:
incq %r15 // Move to the next instruction
cmpb $'-', (%r15) // Check if there is another -
jne compile_out // If not, exit this loop
incq 8(%r14) // Increment the accumulator
jmp compile_sub_loop // Loop again
compile_right:
movq $OP_RIGHT, (%r14) // Specify the RIGHT opcode
movq $1, 8(%r14) // Write the count of '>'s to the data portion
compile_right_loop:
incq %r15 // Move to the next instruction
cmpb $'>', (%r15) // Check if there is another >
jne compile_out // If not, exit this loop
incq 8(%r14) // Increment the accumulator
jmp compile_right_loop // Loop again
compile_left:
movq $OP_LEFT, (%r14) // Specify the LEFT opcode
movq $1, 8(%r14) // Write the count of '<'s to the data portion
compile_left_loop:
incq %r15 // Move to the next instruction
cmpb $'<', (%r15) // Check if there is another <
jne compile_out // If not, exit this loop
incq 8(%r14) // Increment the accumulator
jmp compile_left_loop // Loop again
compile_loop_start:
// When we reach a '[' the first thing we want to do is check to see if it matches the
// pattern '[-]'. This pattern is one that sets a memory cell to 0, so we can optimize that.
movq %r15, %rdi // Compare the current position in the program string
movq $zero_pattern, %rsi // Compare it against the zero pattern '[-]'
movl $4, %ecx // We want to compare 3 bytes (the instruction requires +1)
repe cmpsb // Keep looping CMPSB while bytes match
jrcxz compile_zero // Jump to compile_zero if the strings matched
movq %r15, %rdi // Move the current instruction pointer into %rdi
call copy_loop_checker // Call the copy loop checker
testq %rax, %rax // Check to see if we hit a copy loop
jz compile_loop_start_normal // If we didn't, this is a regular loop
movq %rax, %r15 // Otherwise, set %r15 to the new location
incq %r15 // Then point to the next instruction
jmp compile_out
compile_loop_start_normal:
// Push the address of the loop start to the stack for the next ']'
pushq %r14
movq $OP_LOOP_START, (%r14) // Specify the LOOP_START opcode
movq $0, 8(%r14) // Zero the data section
incq %r15 // Increment the instruction pointer
jmp compile_out
compile_loop_end:
popq 8(%r14) // Pop the address of the previous loop start to the data section
movq $OP_LOOP_END, (%r14) // Push the address of the loop end to the stack
incq %r15 // Increment the instruction pointer
jmp compile_out
compile_read:
movq $OP_READ, (%r14) // Specify the READ opcode
movq $0, 8(%r14) // Zero the data section
incq %r15 // Increment the instruction pointer
jmp compile_out
compile_write:
movq $OP_WRITE, (%r14) // Specify the WRITE opcode
movq $0, 8(%r14) // Zero the data section
incq %r15 // Increment the instruction pointer
jmp compile_out
compile_zero:
movq $OP_ZERO, (%r14) // Specify the ZERO opcode
movq $0, 8(%r14) // Zero the data section
addq $3, %r15 // '[-]' is a 3 byte instruction
// FALLTHROUGH
compile_out:
addq $16, %r14 // Move to the next opcode
movb (%r15), %al // Move the current instruction into %al
testb %al, %al // Check if we have reached the NUL byte
jne compile_loop // If we haven't, loop
movq $0, (%r14) // Otherwise, NUL terminate the bytecode
// Now that we have traversed the entire program, we do a 2nd pass backwards so that we can
// set the jump addresses for the '[' commands now that the ']' commands have the addresses
// set.
compile_backwards:
// We are at the NUL terminator, so move backwards
subq $16, %r14
cmpq $OP_LOOP_END, (%r14) // Check if we hit a ']'
jne compile_backwards_next // If we didn't move to the next check
pushq %r14 // Otherwise push the address of the opcode to the stack
jmp compile_backwards_out
compile_backwards_next:
cmpq $OP_LOOP_START, (%r14) // Check if we hit a '['
jne compile_backwards_out // If we didn't just keep looping
popq 8(%r14) // If we did then pop the corresponding ']' address
compile_backwards_out:
cmpq %r14, (bytecode) // Check if we've seen every opcode
jne compile_backwards // If not keep looping
movq (program), %rdi // Otherwise, move the program buffer to %rdi
call free // Free it
ret // And return
// ==================
// Description:
// Try to figure out if we are at a copy loop and optimize it. A copy loop follows the pattern
// of a loop ([]) beginning with a '-' followed by N occurances of '>' followed by a '+' and N
// occurances of '<'. This sequence copies the current cell to the cell at offset N and clears
// the current cell afterwards.
//
// Args:
// %rdi: A pointer to the first '[' of the potential copy loop
//
// Return:
// 0 if not a copy loop, otherwise the new position of the instruction pointer.
// ==================
copy_loop_checker:
// Skip '['
incq %rdi
// All copy loops must begin with a '-'
cmpb $'-', (%rdi)
jne copy_loop_fail
incq %rdi
// Zero %rax so we can use it to count the copy offset
xorl %eax, %eax
copy_loop_count_offset:
cmpb $'>', (%rdi) // Check for '>'
jne copy_loop_next // If we don't match anymore then move to the next step
incq %rax // Increment the offset counter
incq %rdi // Increment the instruction pointer
jmp copy_loop_count_offset // Loop again
copy_loop_next:
cmpb $'+', (%rdi) // Check if we see the mandatory '+'
jne copy_loop_fail // If we don't then fail
incq %rdi // Otherwise move to the next instruction
// The following code is the exact same as what we just did to count the offset but we are
// now using %rcx and decrementing for each '<'. This is so we can make sure that the copy
// loop is a working one.
cmpb $'<', (%rdi)
jne copy_loop_fail
movq %rax, %rcx
copy_loop_verify_offset:
cmpb $'<', (%rdi)
jne copy_loop_next_2
decq %rcx
incq %rdi
copy_loop_next_2:
cmpb $']', (%rdi) // Ensure this is the end of the loop
jne copy_loop_fail // If its not then fail
testq %rcx, %rcx // Otherwise make sure that our offsets line up
jnz copy_loop_fail // If they don't then fail
movq $OP_COPY, (%r14) // Create an OP_COPY opcode
movq %rax, 8(%r14) // Set the offset to copy to
movq $OP_ZERO, 16(%r14) // Create an OP_ZERO opcode
movq $0, 24(%r14) // Set an empty data section
addq $16, %r14 // Increment the opcode pointer
// Return the address of the instruction pointer
movq %rdi, %rax
ret
copy_loop_fail:
xorl %eax, %eax
ret
// ==================
// Description:
// Execute the brainfuck bytecode.
// ==================
execute:
movq (bytecode), %r15 // Store the address of the program pointer into %r15
movq $memory, %r14 // Store the address of the first memory cell into %r14 # TODO make sure this handles overflows normally
execute_loop:
// Load the current command into %rax
movq (%r15), %rax
// Jump to a different label depending on which instruction we hit
cmpjeq $OP_ADD, execute_add
cmpjeq $OP_SUB, execute_sub
cmpjeq $OP_RIGHT, execute_right
cmpjeq $OP_LEFT, execute_left
cmpjeq $OP_LOOP_START, execute_loop_start
cmpjeq $OP_LOOP_END, execute_loop_end
cmpjeq $OP_READ, execute_read
cmpjeq $OP_WRITE, execute_write
cmpjeq $OP_ZERO, execute_zero
// OP_COPY
jmp execute_copy
execute_add:
// Increment the current memory cell
movq 8(%r15), %rax
addb %al, (%r14)
jmp execute_out
execute_sub:
// Decrement the current memory cell
movq 8(%r15), %rax
subb %al, (%r14)
jmp execute_out
execute_right:
// Move the memory pointer right
addq 8(%r15), %r14
jmp execute_out
execute_left:
// Move the memory pointer left
subq 8(%r15), %r14
jmp execute_out
execute_loop_start:
// If the current memory cell is 0 move to the next ']'
cmpb $0, (%r14)
cmovzq 8(%r15), %r15
jmp execute_out
execute_loop_end:
// If the current memory cell is not 0 move to the last '['
cmpb $0, (%r14)
cmovnzq 8(%r15), %r15
jmp execute_out
execute_read:
// Set the current cell to the character read from stdin
call getchar // Read a character with getchar(3)
cmpb $EOF, %al // Check if the EOF was read
je execute_read_eof // If EOF was read, jump to a special handler for that
movb %al, (%r14) // Otherwise move the read character into the current memory cell
jmp execute_out
execute_read_eof:
// If EOF was read, set the current cell to 0
movb $0, (%r14)
jmp execute_out
execute_write:
// Print the character at the current memory cell
movl (%r14), %edi // Move the current memory cell into %edi
call putchar // Print it with putchar(3)
jmp execute_out
execute_zero:
// Zero the current cell
movb $0, (%r14)
jmp execute_out
execute_copy:
// Copy the current memory cells contents elsewhere
movq 8(%r15), %rax
movb (%r14), %cl
movb %cl, (%rax, %r14, 1)
// FALLTHROUGH
execute_out:
addq $16 , %r15 // Increment the instruction pointer
movq (%r15), %rax // Move the current instruction into %rax
testq %rax, %rax // Check if we have reached the NUL byte
jne execute_loop // If we haven't, loop
ret // Otherwise, return
// ==================
// Description:
// The following die functions all work the same. They simply take the name of the
// corresponding function and store it in %rdi. Then the die function is called to print a
// message to stderr and terminate the program.
// ==================
.macro fdie s
movq \s, %rdi
jmp die
.endm
open_die: fdie $func_open
fstat_die: fdie $func_fstat
read_die: fdie $func_read
malloc_die: fdie $func_malloc
// ==================
// Description:
// Print out an error message in the format "<func name>: <err msg>" then exit via `_exit`
//
// Args:
// %rdi: The function name
// ==================
die:
// Store the function name temporarily in %r15
movq %rdi, %r15
call __errno_location // Call __errno_location to get a pointer to errno
movq (%rax), %rdi // Move errno into %rdi
call strerror // Get the error string with strerror(3)
movq %rax, %rcx // Set the error string
xorl %eax, %eax // Zero %rax
movq %r15, %rdx // Set the function name
movq $die_fmt, %rsi // Set the format string
movq stderr, %rdi // Get stderr
call fprintf // Print the message to stderr
jmp _exit
// ==================
// Description:
// Print a usage message to standard error and exit the program via `_exit`
// ==================
usage:
xorl %eax, %eax // Set rax to 0
movq (%rsi), %rdx // Get argv[0]
movq $usage_fmt, %rsi // Set the format string
movq stderr, %rdi // Get stderr
call fprintf // Print the error
// FALLTHROUGH
// ==================
// Description:
// Exit the program with the return code EXIT_FAILURE
// ==================
_exit:
movl $EXIT_FAILURE, %eax
call exit
|