Merge pull request #3 from birth-software/progress

Introduce the general structure of the compiler
2023-07-29 11:05:34 -06:00 · 2023-07-29 11:05:34 -06:00 · 30931dc6f1
commit 30931dc6f1
parent 3e05b62161 619145c7e7
8 changed files with 937 additions and 327 deletions
--- a/src/data_structures.zig
+++ b/src/data_structures.zig
@ -0,0 +1,3 @@
+const std = @import("std");
+
+pub const ArrayList = std.ArrayListUnmanaged;
--- a/src/emit.zig
+++ b/src/emit.zig
@ -0,0 +1,398 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const log = std.log;
+const page_size = std.mem.page_size;
+const assert = std.debug.assert;
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+
+const ir = @import("ir.zig");
+
+pub const Result = struct {
+    pub fn free(result: *Result, allocator: Allocator) void {
+        _ = allocator;
+        _ = result;
+    }
+};
+
+const Section = struct {
+    content: []align(page_size) u8,
+    index: usize = 0,
+};
+
+const Image = struct {
+    sections: struct {
+        text: Section,
+        rodata: Section,
+        data: Section,
+    },
+    entry_point: u32 = 0,
+
+    fn create() !Image {
+        return Image{
+            .sections = .{
+                .text = .{ .content = try mmap(page_size, .{ .executable = true }) },
+                .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
+                .data = .{ .content = try mmap(page_size, .{ .executable = false }) },
+            },
+        };
+    }
+
+    fn destroy(image: *Image) void {
+        inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
+            std.os.munmap(@field(image.sections, field_name).content);
+        }
+    }
+
+    inline fn mmap(size: usize, flags: packed struct {
+        executable: bool,
+    }) ![]align(page_size) u8 {
+        const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
+        const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
+
+        return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
+    }
+
+    fn appendCode(image: *Image, code: []const u8) void {
+        const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
+        @memcpy(destination, code);
+        image.sections.text.index += code.len;
+    }
+
+    fn appendCodeByte(image: *Image, code_byte: u8) void {
+        image.sections.text.content[image.sections.text.index] = code_byte;
+        image.sections.text.index += 1;
+    }
+
+    fn getEntryPoint(image: *const Image, comptime Function: type) *const Function {
+        comptime {
+            assert(@typeInfo(Function) == .Fn);
+        }
+
+        assert(image.sections.text.content.len > 0);
+        return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
+    }
+};
+
+const Rex = enum(u8) {
+    b = upper_4_bits | (1 << 0),
+    x = upper_4_bits | (1 << 1),
+    r = upper_4_bits | (1 << 2),
+    w = upper_4_bits | (1 << 3),
+
+    const upper_4_bits = 0b100_0000;
+};
+
+const GPRegister = enum(u4) {
+    a = 0,
+    c = 1,
+    d = 2,
+    b = 3,
+    sp = 4,
+    bp = 5,
+    si = 6,
+    di = 7,
+    r8 = 8,
+    r9 = 9,
+    r10 = 10,
+    r11 = 11,
+    r12 = 12,
+    r13 = 13,
+    r14 = 14,
+    r15 = 15,
+};
+
+pub const BasicGPRegister = enum(u3) {
+    a = 0,
+    c = 1,
+    d = 2,
+    b = 3,
+    sp = 4,
+    bp = 5,
+    si = 6,
+    di = 7,
+};
+
+const prefix_lock = 0xf0;
+const prefix_repne_nz = 0xf2;
+const prefix_rep = 0xf3;
+const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)};
+const prefix_16_bit_operand = [1]u8{0x66};
+
+const ret = 0xc3;
+const mov_a_imm = [1]u8{0xb8};
+const mov_reg_imm8: u8 = 0xb0;
+
+inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
+    comptime {
+        assert(@typeInfo(@TypeOf(integer)) == .Int);
+    }
+
+    return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
+}
+
+inline fn movAImm(image: *Image, integer: anytype) void {
+    const T = @TypeOf(integer);
+    image.appendCode(&(switch (T) {
+        u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
+        u16, i16 => prefix_16_bit_operand ++ mov_a_imm,
+        u32, i32 => mov_a_imm,
+        u64, i64 => prefix_rex_w ++ mov_a_imm,
+        else => @compileError("Unsupported"),
+    } ++ intToArrayOfBytes(integer)));
+}
+
+test "ret void" {
+    var image = try Image.create();
+    defer image.destroy();
+    image.appendCodeByte(ret);
+
+    const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
+    function_pointer();
+}
+
+const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 };
+
+fn getMaxInteger(comptime T: type) T {
+    comptime {
+        assert(@typeInfo(T) == .Int);
+    }
+
+    return switch (@typeInfo(T).Int.signedness) {
+        .unsigned => std.math.maxInt(T),
+        .signed => std.math.minInt(T),
+    };
+}
+
+test "ret integer" {
+    inline for (integer_types_to_test) |Int| {
+        var image = try Image.create();
+        defer image.destroy();
+        const expected_number = getMaxInteger(Int);
+
+        movAImm(&image, expected_number);
+        image.appendCodeByte(ret);
+
+        const function_pointer = image.getEntryPoint(fn () callconv(.C) Int);
+        const result = function_pointer();
+        try expect(result == expected_number);
+    }
+}
+
+const LastByte = packed struct(u8) {
+    dst: BasicGPRegister,
+    src: BasicGPRegister,
+    always_on: u2 = 0b11,
+};
+
+fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
+    dstRmSrcR(image, T, .mov, dst, src);
+}
+
+fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
+    const last_byte: u8 = @bitCast(LastByte{
+        .dst = dst,
+        .src = src,
+    });
+    const opcode_byte = @intFromEnum(opcode);
+
+    const bytes = switch (T) {
+        u8, i8 => blk: {
+            const base = [_]u8{ opcode_byte - 1, last_byte };
+            if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) {
+                image.appendCodeByte(0x40);
+            }
+
+            break :blk base;
+        },
+        u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte },
+        u32, i32 => .{ opcode_byte, last_byte },
+        u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte },
+        else => @compileError("Not supported"),
+    };
+
+    image.appendCode(&bytes);
+}
+
+test "ret integer argument" {
+    inline for (integer_types_to_test) |Int| {
+        var image = try Image.create();
+        defer image.destroy();
+        const number = getMaxInteger(Int);
+
+        movRmR(&image, Int, .a, .di);
+        image.appendCodeByte(ret);
+
+        const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int);
+        const result = functionPointer(number);
+        try expectEqual(number, result);
+    }
+}
+
+var r = std.rand.Pcg.init(0xffffffffffffffff);
+
+fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
+    const random = r.random();
+    return switch (@typeInfo(T).Int.signedness) {
+        .signed => random.intRangeAtMost(T, min, max),
+        .unsigned => random.uintAtMost(T, max),
+    };
+}
+
+fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
+    dstRmSrcR(image, T, .sub, dst, src);
+}
+
+test "ret sub arguments" {
+    inline for (integer_types_to_test) |Int| {
+        var image = try Image.create();
+        defer image.destroy();
+        const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
+        const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
+
+        movRmR(&image, Int, .a, .di);
+        subRmR(&image, Int, .a, .si);
+        image.appendCodeByte(ret);
+
+        const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int);
+        const result = functionPointer(a, b);
+        try expectEqual(a - b, result);
+    }
+}
+
+const OpcodeRmR = enum(u8) {
+    add = 0x01,
+    @"or" = 0x09,
+    @"and" = 0x21,
+    sub = 0x29,
+    xor = 0x31,
+    @"test" = 0x85,
+    mov = 0x89,
+};
+
+test "test binary operations" {
+    inline for (integer_types_to_test) |T| {
+        const test_cases = [_]TestIntegerBinaryOperation(T){
+            .{
+                .opcode = .add,
+                .callback = struct {
+                    fn callback(a: T, b: T) T {
+                        return @addWithOverflow(a, b)[0];
+                    }
+                }.callback,
+            },
+            .{
+                .opcode = .sub,
+                .callback = struct {
+                    fn callback(a: T, b: T) T {
+                        return @subWithOverflow(a, b)[0];
+                    }
+                }.callback,
+            },
+            .{
+                .opcode = .@"or",
+                .callback = struct {
+                    fn callback(a: T, b: T) T {
+                        return a | b;
+                    }
+                }.callback,
+            },
+            .{
+                .opcode = .@"and",
+                .callback = struct {
+                    fn callback(a: T, b: T) T {
+                        return a & b;
+                    }
+                }.callback,
+            },
+            .{
+                .opcode = .xor,
+                .callback = struct {
+                    fn callback(a: T, b: T) T {
+                        return a ^ b;
+                    }
+                }.callback,
+            },
+        };
+
+        for (test_cases) |test_case| {
+            try test_case.runTest();
+        }
+    }
+}
+
+fn TestIntegerBinaryOperation(comptime T: type) type {
+    const should_log = false;
+    return struct {
+        callback: *const fn (a: T, b: T) T,
+        opcode: OpcodeRmR,
+
+        pub fn runTest(test_case: @This()) !void {
+            for (0..10) |_| {
+                var image = try Image.create();
+                defer image.destroy();
+                errdefer image.destroy();
+                const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
+                const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
+                movRmR(&image, T, .a, .di);
+                dstRmSrcR(&image, T, test_case.opcode, .a, .si);
+                image.appendCodeByte(ret);
+
+                const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T);
+                const expected = test_case.callback(a, b);
+                const result = functionPointer(a, b);
+                if (should_log) {
+                    log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T });
+                }
+                try expectEqual(expected, result);
+            }
+        }
+    };
+}
+
+test "call after" {
+    var image = try Image.create();
+    defer image.destroy();
+    errdefer image.destroy();
+    const jump_patch_offset = image.sections.text.index + 1;
+    image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
+    const jump_source = image.sections.text.index;
+    image.appendCodeByte(ret);
+    const jump_target = image.sections.text.index;
+    @as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source);
+    image.appendCodeByte(ret);
+
+    const functionPointer = image.getEntryPoint(fn () callconv(.C) void);
+    functionPointer();
+}
+
+test "call before" {
+    var image = try Image.create();
+    defer image.destroy();
+    errdefer image.destroy();
+    const first_jump_patch_offset = image.sections.text.index + 1;
+    const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
+    image.appendCode(&first_call);
+    const first_jump_source = image.sections.text.index;
+    image.appendCodeByte(ret);
+    const second_jump_target = image.sections.text.index;
+    image.appendCodeByte(ret);
+    const first_jump_target = image.sections.text.index;
+    @as(*align(1) i32, @ptrCast(&image.sections.text.content[first_jump_patch_offset])).* = @intCast(first_jump_target - first_jump_source);
+    const second_call = .{0xe8} ++ @as([4]u8, @bitCast(@as(i32, @intCast(@as(i64, @intCast(second_jump_target)) - @as(i64, @intCast(image.sections.text.index + 5))))));
+    image.appendCode(&second_call);
+    image.appendCodeByte(ret);
+
+    const functionPointer = image.getEntryPoint(fn () callconv(.C) void);
+    functionPointer();
+}
+
+pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result {
+    _ = allocator;
+
+    for (ir_result.functions.items) |*function| {
+        _ = function;
+    }
+
+    return Result{};
+}
--- a/src/fs.zig
+++ b/src/fs.zig
@ -0,0 +1,9 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const first = "src/test/main.b";
+
+pub fn readFile(allocator: Allocator, file_relative_path: []const u8) ![]const u8 {
+    const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize));
+    return file;
+}
--- a/src/ir.zig
+++ b/src/ir.zig
@ -0,0 +1,142 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const equal = std.mem.eql;
+
+const data_structures = @import("data_structures.zig");
+const ArrayList = data_structures.ArrayList;
+const parser = @import("parser.zig");
+
+const void_type = Type{
+    .id = .void,
+};
+
+const Type = struct {
+    id: Id,
+
+    inline fn isPrimitive(T: Type) bool {
+        return switch (T.id) {
+            .void => true,
+        };
+    }
+    const Id = enum {
+        void,
+    };
+};
+
+const Error = error{
+    type_mismatch,
+    internal,
+    arguments_not_used,
+};
+
+const TopLevelDeclaration = struct {
+    type: Id,
+    index: u31,
+
+    const Id = enum {
+        function,
+        expression,
+    };
+};
+
+const Instruction = struct {
+    id: Id,
+    index: u16,
+
+    const Id = enum {
+        ret_void,
+    };
+};
+
+const ret_void = Instruction{
+    .id = .ret_void,
+    .index = 0,
+};
+
+const ret = struct {
+    is_type: bool,
+};
+
+const Function = struct {
+    instructions: ArrayList(Instruction),
+    return_type: Type,
+};
+
+pub const Result = struct {
+    top_level_declarations: ArrayList(TopLevelDeclaration),
+    functions: ArrayList(Function),
+
+    pub fn free(result: *Result, allocator: Allocator) void {
+        for (result.functions.items) |*function| {
+            function.instructions.clearAndFree(allocator);
+        }
+        result.functions.clearAndFree(allocator);
+        result.top_level_declarations.clearAndFree(allocator);
+    }
+};
+
+const Analyzer = struct {
+    parser: *const parser.Result,
+    top_level_declarations: ArrayList(TopLevelDeclaration),
+    functions: ArrayList(Function),
+    allocator: Allocator,
+
+    fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result {
+        var analyzer = Analyzer{
+            .parser = parser_result,
+            .top_level_declarations = ArrayList(TopLevelDeclaration){},
+            .allocator = allocator,
+            .functions = ArrayList(Function){},
+        };
+
+        for (parser_result.functions.items) |ast_function| {
+            if (ast_function.statements.items.len != 0) {
+                for (ast_function.statements.items) |statement| {
+                    _ = statement;
+                    @panic("TODO: statement");
+                }
+            } else {
+                if (ast_function.arguments.items.len != 0) {
+                    return Error.arguments_not_used;
+                }
+
+                try analyzer.expectPrimitiveType(void_type, ast_function.return_type);
+
+                const function_index = analyzer.functions.items.len;
+
+                var function = Function{
+                    .instructions = ArrayList(Instruction){},
+                    .return_type = void_type,
+                };
+
+                function.instructions.append(allocator, ret_void) catch return Error.internal;
+
+                analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{
+                    .type = .function,
+                    .index = @intCast(function_index),
+                }) catch return Error.internal;
+
+                analyzer.functions.append(allocator, function) catch return Error.internal;
+            }
+        }
+
+        return .{
+            .top_level_declarations = analyzer.top_level_declarations,
+            .functions = analyzer.functions,
+        };
+    }
+
+    fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void {
+        assert(type_value.isPrimitive());
+        const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal;
+
+        if (!equal(u8, @tagName(type_value.id), type_identifier)) {
+            return Error.type_mismatch;
+        }
+    }
+};
+
+pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result {
+    return Analyzer.analyze(allocator, parser_result);
+}
--- a/src/lexer.zig
+++ b/src/lexer.zig
@ -0,0 +1,156 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const log = std.log;
+
+const data_structures = @import("data_structures.zig");
+const ArrayList = data_structures.ArrayList;
+
+const fs = @import("fs.zig");
+
+pub inline fn rdtsc() u64 {
+    var edx: u32 = undefined;
+    var eax: u32 = undefined;
+
+    asm volatile (
+        \\rdtsc
+        : [eax] "={eax}" (eax),
+          [edx] "={edx}" (edx),
+    );
+
+    return @as(u64, edx) << 32 | eax;
+}
+
+inline fn rdtscFast() u32 {
+    return asm volatile (
+        \\rdtsc
+        : [eax] "={eax}" (-> u32),
+        :
+        : "edx"
+    );
+}
+
+const vector_byte_count = 16;
+// These two actually take less space due to how Zig handles bool as u1
+const VBool = @Vector(vector_byte_count, bool);
+const VU1 = @Vector(vector_byte_count, u1);
+
+const VU8 = @Vector(vector_byte_count, u8);
+
+inline fn vand(v1: VBool, v2: VBool) VBool {
+    return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2)));
+}
+
+inline fn byteMask(n: u8) VU8 {
+    return @splat(n);
+}
+
+inline fn endOfIdentifier(ch: u8) bool {
+    // TODO: complete
+    return ch == ' ' or ch == '(' or ch == ')';
+}
+
+const Identifier = struct {
+    start: u32,
+    end: u32,
+};
+
+pub const TokenId = enum {
+    identifier,
+    special_character,
+};
+
+pub const SpecialCharacter = enum(u8) {
+    arrow = 0,
+    left_parenthesis = '(',
+    right_parenthesis = ')',
+    left_brace = '{',
+    right_brace = '}',
+};
+
+pub const Result = struct {
+    identifiers: ArrayList(Identifier),
+    special_characters: ArrayList(SpecialCharacter),
+    ids: ArrayList(TokenId),
+    file: []const u8,
+    time: u64 = 0,
+
+    pub fn free(result: *Result, allocator: Allocator) void {
+        result.identifiers.clearAndFree(allocator);
+        result.special_characters.clearAndFree(allocator);
+        result.ids.clearAndFree(allocator);
+        allocator.free(result.file);
+    }
+};
+
+fn lex(allocator: Allocator, text: []const u8) !Result {
+    const time_start = std.time.Instant.now() catch unreachable;
+
+    var index: usize = 0;
+
+    var result = Result{
+        .identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len),
+        .special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len),
+        .ids = try ArrayList(TokenId).initCapacity(allocator, text.len),
+        .file = text,
+    };
+
+    defer {
+        const time_end = std.time.Instant.now() catch unreachable;
+        result.time = time_end.since(time_start);
+    }
+
+    while (index < text.len) {
+        const first_char = text[index];
+        switch (first_char) {
+            'a'...'z', 'A'...'Z', '_' => {
+                const start = index;
+                // SIMD this
+                while (!endOfIdentifier(text[index])) {
+                    index += 1;
+                }
+
+                result.identifiers.appendAssumeCapacity(.{
+                    .start = @intCast(start),
+                    .end = @intCast(index),
+                });
+
+                result.ids.appendAssumeCapacity(.identifier);
+            },
+            '(', ')', '{', '}' => |special_character| {
+                result.special_characters.appendAssumeCapacity(@enumFromInt(special_character));
+                result.ids.appendAssumeCapacity(.special_character);
+                index += 1;
+            },
+            ' ', '\n' => index += 1,
+            '-' => {
+                if (text[index + 1] == '>') {
+                    result.special_characters.appendAssumeCapacity(.arrow);
+                    result.ids.appendAssumeCapacity(.special_character);
+                    index += 2;
+                } else {
+                    @panic("TODO");
+                }
+            },
+            else => {
+                index += 1;
+            },
+        }
+    }
+
+    return result;
+}
+
+pub fn runTest(allocator: Allocator, file: []const u8) !Result {
+    const result = try lex(allocator, file);
+
+    return result;
+}
+
+test "lexer" {
+    const allocator = std.testing.allocator;
+    const file_path = fs.first;
+    const file = try fs.readFile(allocator, file_path);
+    var result = try runTest(allocator, file);
+    defer result.free(allocator);
+}
--- a/src/main.zig
+++ b/src/main.zig
@ -1,341 +1,45 @@
 const std = @import("std");
-const log = std.log;
-const page_size = std.mem.page_size;
+const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
-const expect = std.testing.expect;
-const expectEqual = std.testing.expectEqual;

-const Section = struct {
-    content: []align(page_size) u8,
-    index: usize = 0,
-};
+const fs = @import("fs.zig");

-const Image = struct {
-    sections: struct {
-        text: Section,
-        rodata: Section,
-        data: Section,
-    },
-    entry_point: u32 = 0,
+const lexer = @import("lexer.zig");
+const parser = @import("parser.zig");
+const ir = @import("ir.zig");
+const emit = @import("emit.zig");

-    fn create() !Image {
-        return Image{
-            .sections = .{
-                .text = .{ .content = try mmap(page_size, .{ .executable = true }) },
-                .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
-                .data = .{ .content = try mmap(page_size, .{ .executable = false }) },
-            },
+pub const seed = std.math.maxInt(u64);
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    const allocator = gpa.allocator();
+    try behaviorTest(allocator, fs.first);
+}
+
+fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void {
+    const file = try fs.readFile(allocator, file_relative_path);
+    var lexer_result = try lexer.runTest(allocator, file);
+    defer lexer_result.free(allocator);
+    var parser_result = parser.runTest(allocator, &lexer_result) catch |err| {
+        std.log.err("Lexer took {} ns", .{lexer_result.time});
+        return err;
    };
-    }
-
-    fn destroy(image: *Image) void {
-        inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
-            std.os.munmap(@field(image.sections, field_name).content);
-        }
-    }
-
-    inline fn mmap(size: usize, flags: packed struct {
-        executable: bool,
-    }) ![]align(page_size) u8 {
-        const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
-        const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
-
-        return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
-    }
-
-    fn appendCode(image: *Image, code: []const u8) void {
-        const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
-        @memcpy(destination, code);
-        image.sections.text.index += code.len;
-    }
-
-    fn appendCodeByte(image: *Image, code_byte: u8) void {
-        image.sections.text.content[image.sections.text.index] = code_byte;
-        image.sections.text.index += 1;
-    }
-
-    fn getEntryPoint(image: *const Image, comptime Function: type) *const Function {
-        comptime {
-            assert(@typeInfo(Function) == .Fn);
-        }
-
-        assert(image.sections.text.content.len > 0);
-        return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
-    }
-};
-
-const Rex = enum(u8) {
-    b = upper_4_bits | (1 << 0),
-    x = upper_4_bits | (1 << 1),
-    r = upper_4_bits | (1 << 2),
-    w = upper_4_bits | (1 << 3),
-
-    const upper_4_bits = 0b100_0000;
-};
-
-const GPRegister = enum(u4) {
-    a = 0,
-    c = 1,
-    d = 2,
-    b = 3,
-    sp = 4,
-    bp = 5,
-    si = 6,
-    di = 7,
-    r8 = 8,
-    r9 = 9,
-    r10 = 10,
-    r11 = 11,
-    r12 = 12,
-    r13 = 13,
-    r14 = 14,
-    r15 = 15,
-};
-
-pub const BasicGPRegister = enum(u3) {
-    a = 0,
-    c = 1,
-    d = 2,
-    b = 3,
-    sp = 4,
-    bp = 5,
-    si = 6,
-    di = 7,
-};
-
-const prefix_lock = 0xf0;
-const prefix_repne_nz = 0xf2;
-const prefix_rep = 0xf3;
-const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)};
-const prefix_16_bit_operand = [1]u8{0x66};
-
-const ret = 0xc3;
-const mov_a_imm = [1]u8{0xb8};
-const mov_reg_imm8: u8 = 0xb0;
-
-inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
-    comptime {
-        assert(@typeInfo(@TypeOf(integer)) == .Int);
-    }
-
-    return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
+    defer parser_result.free(allocator);
+    var ir_result = try ir.runTest(allocator, &parser_result);
+    defer ir_result.free(allocator);
+    var emit_result = try emit.runTest(allocator, &ir_result);
+    defer emit_result.free(allocator);
 }

-inline fn movAImm(image: *Image, integer: anytype) void {
-    const T = @TypeOf(integer);
-    image.appendCode(&(switch (T) {
-        u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
-        u16, i16 => prefix_16_bit_operand ++ mov_a_imm,
-        u32, i32 => mov_a_imm,
-        u64, i64 => prefix_rex_w ++ mov_a_imm,
-        else => @compileError("Unsupported"),
-    } ++ intToArrayOfBytes(integer)));
+test {
+    _ = lexer;
+    _ = parser;
+    _ = ir;
+    _ = emit;
 }

-test "ret void" {
-    var image = try Image.create();
-    defer image.destroy();
-    image.appendCodeByte(ret);
-
-    const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
-    function_pointer();
-}
-
-const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 };
-
-fn getMaxInteger(comptime T: type) T {
-    comptime {
-        assert(@typeInfo(T) == .Int);
-    }
-
-    return switch (@typeInfo(T).Int.signedness) {
-        .unsigned => std.math.maxInt(T),
-        .signed => std.math.minInt(T),
-    };
-}
-
-test "ret integer" {
-    inline for (integer_types_to_test) |Int| {
-        var image = try Image.create();
-        defer image.destroy();
-        const expected_number = getMaxInteger(Int);
-
-        movAImm(&image, expected_number);
-        image.appendCodeByte(ret);
-
-        const function_pointer = image.getEntryPoint(fn () callconv(.C) Int);
-        const result = function_pointer();
-        try expect(result == expected_number);
-    }
-}
-
-const LastByte = packed struct(u8) {
-    dst: BasicGPRegister,
-    src: BasicGPRegister,
-    always_on: u2 = 0b11,
-};
-
-fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
-    dstRmSrcR(image, T, .mov, dst, src);
-}
-
-fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
-    const last_byte: u8 = @bitCast(LastByte{
-        .dst = dst,
-        .src = src,
-    });
-    const opcode_byte = @intFromEnum(opcode);
-
-    const bytes = switch (T) {
-        u8, i8 => blk: {
-            const base = [_]u8{ opcode_byte - 1, last_byte };
-            if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) {
-                image.appendCodeByte(0x40);
-            }
-
-            break :blk base;
-        },
-        u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte },
-        u32, i32 => .{ opcode_byte, last_byte },
-        u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte },
-        else => @compileError("Not supported"),
-    };
-
-    image.appendCode(&bytes);
-}
-
-test "ret integer argument" {
-    inline for (integer_types_to_test) |Int| {
-        var image = try Image.create();
-        defer image.destroy();
-        const number = getMaxInteger(Int);
-
-        movRmR(&image, Int, .a, .di);
-        image.appendCodeByte(ret);
-
-        const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int);
-        const result = functionPointer(number);
-        try expectEqual(number, result);
-    }
-}
-
-var r = std.rand.Pcg.init(0xffffffffffffffff);
-
-fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
-    const random = r.random();
-    return switch (@typeInfo(T).Int.signedness) {
-        .signed => random.intRangeAtMost(T, min, max),
-        .unsigned => random.uintAtMost(T, max),
-    };
-}
-
-fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
-    dstRmSrcR(image, T, .sub, dst, src);
-}
-
-test "ret sub arguments" {
-    inline for (integer_types_to_test) |Int| {
-        var image = try Image.create();
-        defer image.destroy();
-        const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
-        const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
-
-        movRmR(&image, Int, .a, .di);
-        subRmR(&image, Int, .a, .si);
-        image.appendCodeByte(ret);
-
-        const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int);
-        const result = functionPointer(a, b);
-        try expectEqual(a - b, result);
-    }
-}
-
-const OpcodeRmR = enum(u8) {
-    add = 0x01,
-    @"or" = 0x09,
-    @"and" = 0x21,
-    sub = 0x29,
-    xor = 0x31,
-    @"test" = 0x85,
-    mov = 0x89,
-};
-
-test "test binary operations" {
-    inline for (integer_types_to_test) |T| {
-        const test_cases = [_]TestIntegerBinaryOperation(T){
-            .{
-                .opcode = .add,
-                .callback = struct {
-                    fn callback(a: T, b: T) T {
-                        return @addWithOverflow(a, b)[0];
-                    }
-                }.callback,
-            },
-            .{
-                .opcode = .sub,
-                .callback = struct {
-                    fn callback(a: T, b: T) T {
-                        return @subWithOverflow(a, b)[0];
-                    }
-                }.callback,
-            },
-            .{
-                .opcode = .@"or",
-                .callback = struct {
-                    fn callback(a: T, b: T) T {
-                        return a | b;
-                    }
-                }.callback,
-            },
-            .{
-                .opcode = .@"and",
-                .callback = struct {
-                    fn callback(a: T, b: T) T {
-                        return a & b;
-                    }
-                }.callback,
-            },
-            .{
-                .opcode = .xor,
-                .callback = struct {
-                    fn callback(a: T, b: T) T {
-                        return a ^ b;
-                    }
-                }.callback,
-            },
-        };
-
-        for (test_cases) |test_case| {
-            try test_case.runTest();
-        }
-    }
-}
-
-fn TestIntegerBinaryOperation(comptime T: type) type {
-    const should_log = false;
-    return struct {
-        callback: *const fn (a: T, b: T) T,
-        opcode: OpcodeRmR,
-
-        pub fn runTest(test_case: @This()) !void {
-            for (0..10) |_| {
-                var image = try Image.create();
-                defer image.destroy();
-                errdefer image.destroy();
-                const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
-                const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
-                movRmR(&image, T, .a, .di);
-                dstRmSrcR(&image, T, test_case.opcode, .a, .si);
-                image.appendCodeByte(ret);
-
-                const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T);
-                const expected = test_case.callback(a, b);
-                const result = functionPointer(a, b);
-                if (should_log) {
-                    log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T });
-                }
-                try expectEqual(expected, result);
-            }
-        }
-    };
+test "behavior test 1" {
+    const allocator = std.testing.allocator;
+    try behaviorTest(allocator, fs.first);
 }
--- a/src/parser.zig
+++ b/src/parser.zig
@ -0,0 +1,195 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+
+const data_structures = @import("data_structures.zig");
+const ArrayList = data_structures.ArrayList;
+
+const lexer = @import("lexer.zig");
+
+pub const Result = struct {
+    functions: ArrayList(Function),
+    strings: StringMap,
+
+    pub fn free(result: *Result, allocator: Allocator) void {
+        result.functions.clearAndFree(allocator);
+        result.strings.clearAndFree(allocator);
+    }
+};
+
+const PeekResult = union(lexer.TokenId) {
+    special_character: lexer.SpecialCharacter,
+    identifier: []const u8,
+};
+
+const Function = struct {
+    name: u32,
+    return_type: u32,
+    arguments: ArrayList(Argument),
+    statements: ArrayList(Statement),
+
+    const Argument = struct {
+        foo: u32 = 0,
+    };
+};
+
+const Statement = struct {
+    foo: u32 = 0,
+};
+
+const StringMap = std.AutoHashMapUnmanaged(u32, []const u8);
+
+const Parser = struct {
+    id_index: u32 = 0,
+    identifier_index: u32 = 0,
+    special_character_index: u32 = 0,
+    strings: StringMap,
+    allocator: Allocator,
+    functions: ArrayList(Function),
+
+    fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result {
+        while (parser.id_index < lexer_result.ids.items.len) {
+            try parser.parseTopLevelDeclaration(lexer_result);
+        }
+
+        return Result{
+            .functions = parser.functions,
+            .strings = parser.strings,
+        };
+    }
+
+    fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function {
+        assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis);
+        parser.consume(lexer_result, .special_character);
+
+        while (true) {
+            if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) {
+                break;
+            } else |_| {}
+
+            return error.not_implemented;
+        }
+
+        try parser.expectSpecialCharacter(lexer_result, .arrow);
+
+        const return_type_identifier = try parser.expectIdentifier(lexer_result);
+
+        try parser.expectSpecialCharacter(lexer_result, .left_brace);
+
+        while (true) {
+            if (parser.expectSpecialCharacter(lexer_result, .right_brace)) {
+                break;
+            } else |_| {}
+
+            return error.not_implemented;
+        }
+
+        return Function{
+            .name = name,
+            .statements = ArrayList(Statement){},
+            .arguments = ArrayList(Function.Argument){},
+            .return_type = return_type_identifier,
+        };
+    }
+
+    inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void {
+        assert(lexer_result.ids.items[parser.id_index] == token_id);
+        parser.id_index += 1;
+        switch (token_id) {
+            .special_character => parser.special_character_index += 1,
+            .identifier => parser.identifier_index += 1,
+        }
+    }
+
+    fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void {
+        const top_level_identifier = try parser.expectIdentifier(lexer_result);
+        const next_token = parser.peek(lexer_result);
+
+        switch (next_token) {
+            .special_character => |special_character| switch (special_character) {
+                .left_parenthesis => {
+                    const function = try parser.parseFunction(lexer_result, top_level_identifier);
+                    try parser.functions.append(parser.allocator, function);
+                },
+                else => return error.not_implemented,
+            },
+            .identifier => |identifier| {
+                _ = identifier;
+                return error.not_implemented;
+            },
+        }
+    }
+
+    inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult {
+        return switch (lexer_result.ids.items[parser.id_index]) {
+            .special_character => .{
+                .special_character = lexer_result.special_characters.items[parser.special_character_index],
+            },
+            .identifier => .{
+                .identifier = blk: {
+                    const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
+                    break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end];
+                },
+            },
+        };
+    }
+
+    fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void {
+        const token_id = lexer_result.ids.items[parser.id_index];
+        if (token_id != .special_character) {
+            return error.expected_special_character;
+        }
+
+        defer parser.id_index += 1;
+
+        const special_character = lexer_result.special_characters.items[parser.special_character_index];
+        if (special_character != expected) {
+            return error.expected_different_special_character;
+        }
+
+        parser.special_character_index += 1;
+    }
+
+    fn acceptSpecialCharacter() void {}
+
+    fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 {
+        const token_id = lexer_result.ids.items[parser.id_index];
+        if (token_id != .identifier) {
+            return Error.expected_identifier;
+        }
+
+        parser.id_index += 1;
+
+        const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
+        parser.identifier_index += 1;
+        const identifier = lexer_result.file[identifier_range.start..identifier_range.end];
+        const Hash = std.hash.Wyhash;
+        const seed = @intFromPtr(identifier.ptr);
+        var hasher = Hash.init(seed);
+        std.hash.autoHash(&hasher, identifier.ptr);
+        const hash = hasher.final();
+        const truncated_hash: u32 = @truncate(hash);
+        try parser.strings.put(parser.allocator, truncated_hash, identifier);
+        return truncated_hash;
+    }
+
+    const Error = error{
+        expected_identifier,
+        expected_special_character,
+        expected_different_special_character,
+        not_implemented,
+    };
+};
+
+pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
+    var parser = Parser{
+        .allocator = allocator,
+        .strings = StringMap{},
+        .functions = ArrayList(Function){},
+    };
+
+    return parser.parse(lexer_result) catch |err| {
+        std.log.err("error: {}", .{err});
+        return err;
+    };
+}
--- a/src/test/main.b
+++ b/src/test/main.b
@ -0,0 +1,3 @@
+main() -> void {
+
+}