diff --git a/src/data_structures.zig b/src/data_structures.zig new file mode 100644 index 0000000..7696d38 --- /dev/null +++ b/src/data_structures.zig @@ -0,0 +1,3 @@ +const std = @import("std"); + +pub const ArrayList = std.ArrayListUnmanaged; diff --git a/src/emit.zig b/src/emit.zig new file mode 100644 index 0000000..6be2371 --- /dev/null +++ b/src/emit.zig @@ -0,0 +1,398 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const log = std.log; +const page_size = std.mem.page_size; +const assert = std.debug.assert; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +const ir = @import("ir.zig"); + +pub const Result = struct { + pub fn free(result: *Result, allocator: Allocator) void { + _ = allocator; + _ = result; + } +}; + +const Section = struct { + content: []align(page_size) u8, + index: usize = 0, +}; + +const Image = struct { + sections: struct { + text: Section, + rodata: Section, + data: Section, + }, + entry_point: u32 = 0, + + fn create() !Image { + return Image{ + .sections = .{ + .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, + .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, + .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, + }, + }; + } + + fn destroy(image: *Image) void { + inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { + std.os.munmap(@field(image.sections, field_name).content); + } + } + + inline fn mmap(size: usize, flags: packed struct { + executable: bool, + }) ![]align(page_size) u8 { + const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; + const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; + + return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); + } + + fn appendCode(image: *Image, code: []const u8) void { + const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; + @memcpy(destination, code); + image.sections.text.index += code.len; + } + + fn appendCodeByte(image: *Image, code_byte: u8) void { + image.sections.text.content[image.sections.text.index] = code_byte; + image.sections.text.index += 1; + } + + fn getEntryPoint(image: *const Image, comptime Function: type) *const Function { + comptime { + assert(@typeInfo(Function) == .Fn); + } + + assert(image.sections.text.content.len > 0); + return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); + } +}; + +const Rex = enum(u8) { + b = upper_4_bits | (1 << 0), + x = upper_4_bits | (1 << 1), + r = upper_4_bits | (1 << 2), + w = upper_4_bits | (1 << 3), + + const upper_4_bits = 0b100_0000; +}; + +const GPRegister = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +pub const BasicGPRegister = enum(u3) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, +}; + +const prefix_lock = 0xf0; +const prefix_repne_nz = 0xf2; +const prefix_rep = 0xf3; +const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)}; +const prefix_16_bit_operand = [1]u8{0x66}; + +const ret = 0xc3; +const mov_a_imm = [1]u8{0xb8}; +const mov_reg_imm8: u8 = 0xb0; + +inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { + comptime { + assert(@typeInfo(@TypeOf(integer)) == .Int); + } + + return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer)); +} + +inline fn movAImm(image: *Image, integer: anytype) void { + const T = @TypeOf(integer); + image.appendCode(&(switch (T) { + u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)}, + u16, i16 => prefix_16_bit_operand ++ mov_a_imm, + u32, i32 => mov_a_imm, + u64, i64 => prefix_rex_w ++ mov_a_imm, + else => @compileError("Unsupported"), + } ++ intToArrayOfBytes(integer))); +} + +test "ret void" { + var image = try Image.create(); + defer image.destroy(); + image.appendCodeByte(ret); + + const function_pointer = image.getEntryPoint(fn () callconv(.C) void); + function_pointer(); +} + +const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 }; + +fn getMaxInteger(comptime T: type) T { + comptime { + assert(@typeInfo(T) == .Int); + } + + return switch (@typeInfo(T).Int.signedness) { + .unsigned => std.math.maxInt(T), + .signed => std.math.minInt(T), + }; +} + +test "ret integer" { + inline for (integer_types_to_test) |Int| { + var image = try Image.create(); + defer image.destroy(); + const expected_number = getMaxInteger(Int); + + movAImm(&image, expected_number); + image.appendCodeByte(ret); + + const function_pointer = image.getEntryPoint(fn () callconv(.C) Int); + const result = function_pointer(); + try expect(result == expected_number); + } +} + +const LastByte = packed struct(u8) { + dst: BasicGPRegister, + src: BasicGPRegister, + always_on: u2 = 0b11, +}; + +fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { + dstRmSrcR(image, T, .mov, dst, src); +} + +fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { + const last_byte: u8 = @bitCast(LastByte{ + .dst = dst, + .src = src, + }); + const opcode_byte = @intFromEnum(opcode); + + const bytes = switch (T) { + u8, i8 => blk: { + const base = [_]u8{ opcode_byte - 1, last_byte }; + if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) { + image.appendCodeByte(0x40); + } + + break :blk base; + }, + u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte }, + u32, i32 => .{ opcode_byte, last_byte }, + u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte }, + else => @compileError("Not supported"), + }; + + image.appendCode(&bytes); +} + +test "ret integer argument" { + inline for (integer_types_to_test) |Int| { + var image = try Image.create(); + defer image.destroy(); + const number = getMaxInteger(Int); + + movRmR(&image, Int, .a, .di); + image.appendCodeByte(ret); + + const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int); + const result = functionPointer(number); + try expectEqual(number, result); + } +} + +var r = std.rand.Pcg.init(0xffffffffffffffff); + +fn getRandomNumberRange(comptime T: type, min: T, max: T) T { + const random = r.random(); + return switch (@typeInfo(T).Int.signedness) { + .signed => random.intRangeAtMost(T, min, max), + .unsigned => random.uintAtMost(T, max), + }; +} + +fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { + dstRmSrcR(image, T, .sub, dst, src); +} + +test "ret sub arguments" { + inline for (integer_types_to_test) |Int| { + var image = try Image.create(); + defer image.destroy(); + const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); + const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); + + movRmR(&image, Int, .a, .di); + subRmR(&image, Int, .a, .si); + image.appendCodeByte(ret); + + const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int); + const result = functionPointer(a, b); + try expectEqual(a - b, result); + } +} + +const OpcodeRmR = enum(u8) { + add = 0x01, + @"or" = 0x09, + @"and" = 0x21, + sub = 0x29, + xor = 0x31, + @"test" = 0x85, + mov = 0x89, +}; + +test "test binary operations" { + inline for (integer_types_to_test) |T| { + const test_cases = [_]TestIntegerBinaryOperation(T){ + .{ + .opcode = .add, + .callback = struct { + fn callback(a: T, b: T) T { + return @addWithOverflow(a, b)[0]; + } + }.callback, + }, + .{ + .opcode = .sub, + .callback = struct { + fn callback(a: T, b: T) T { + return @subWithOverflow(a, b)[0]; + } + }.callback, + }, + .{ + .opcode = .@"or", + .callback = struct { + fn callback(a: T, b: T) T { + return a | b; + } + }.callback, + }, + .{ + .opcode = .@"and", + .callback = struct { + fn callback(a: T, b: T) T { + return a & b; + } + }.callback, + }, + .{ + .opcode = .xor, + .callback = struct { + fn callback(a: T, b: T) T { + return a ^ b; + } + }.callback, + }, + }; + + for (test_cases) |test_case| { + try test_case.runTest(); + } + } +} + +fn TestIntegerBinaryOperation(comptime T: type) type { + const should_log = false; + return struct { + callback: *const fn (a: T, b: T) T, + opcode: OpcodeRmR, + + pub fn runTest(test_case: @This()) !void { + for (0..10) |_| { + var image = try Image.create(); + defer image.destroy(); + errdefer image.destroy(); + const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); + const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); + movRmR(&image, T, .a, .di); + dstRmSrcR(&image, T, test_case.opcode, .a, .si); + image.appendCodeByte(ret); + + const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T); + const expected = test_case.callback(a, b); + const result = functionPointer(a, b); + if (should_log) { + log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T }); + } + try expectEqual(expected, result); + } + } + }; +} + +test "call after" { + var image = try Image.create(); + defer image.destroy(); + errdefer image.destroy(); + const jump_patch_offset = image.sections.text.index + 1; + image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); + const jump_source = image.sections.text.index; + image.appendCodeByte(ret); + const jump_target = image.sections.text.index; + @as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source); + image.appendCodeByte(ret); + + const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + functionPointer(); +} + +test "call before" { + var image = try Image.create(); + defer image.destroy(); + errdefer image.destroy(); + const first_jump_patch_offset = image.sections.text.index + 1; + const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; + image.appendCode(&first_call); + const first_jump_source = image.sections.text.index; + image.appendCodeByte(ret); + const second_jump_target = image.sections.text.index; + image.appendCodeByte(ret); + const first_jump_target = image.sections.text.index; + @as(*align(1) i32, @ptrCast(&image.sections.text.content[first_jump_patch_offset])).* = @intCast(first_jump_target - first_jump_source); + const second_call = .{0xe8} ++ @as([4]u8, @bitCast(@as(i32, @intCast(@as(i64, @intCast(second_jump_target)) - @as(i64, @intCast(image.sections.text.index + 5)))))); + image.appendCode(&second_call); + image.appendCodeByte(ret); + + const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + functionPointer(); +} + +pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result { + _ = allocator; + + for (ir_result.functions.items) |*function| { + _ = function; + } + + return Result{}; +} diff --git a/src/fs.zig b/src/fs.zig new file mode 100644 index 0000000..c8c5963 --- /dev/null +++ b/src/fs.zig @@ -0,0 +1,9 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const first = "src/test/main.b"; + +pub fn readFile(allocator: Allocator, file_relative_path: []const u8) ![]const u8 { + const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); + return file; +} diff --git a/src/ir.zig b/src/ir.zig new file mode 100644 index 0000000..c5d7992 --- /dev/null +++ b/src/ir.zig @@ -0,0 +1,142 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const parser = @import("parser.zig"); + +const void_type = Type{ + .id = .void, +}; + +const Type = struct { + id: Id, + + inline fn isPrimitive(T: Type) bool { + return switch (T.id) { + .void => true, + }; + } + const Id = enum { + void, + }; +}; + +const Error = error{ + type_mismatch, + internal, + arguments_not_used, +}; + +const TopLevelDeclaration = struct { + type: Id, + index: u31, + + const Id = enum { + function, + expression, + }; +}; + +const Instruction = struct { + id: Id, + index: u16, + + const Id = enum { + ret_void, + }; +}; + +const ret_void = Instruction{ + .id = .ret_void, + .index = 0, +}; + +const ret = struct { + is_type: bool, +}; + +const Function = struct { + instructions: ArrayList(Instruction), + return_type: Type, +}; + +pub const Result = struct { + top_level_declarations: ArrayList(TopLevelDeclaration), + functions: ArrayList(Function), + + pub fn free(result: *Result, allocator: Allocator) void { + for (result.functions.items) |*function| { + function.instructions.clearAndFree(allocator); + } + result.functions.clearAndFree(allocator); + result.top_level_declarations.clearAndFree(allocator); + } +}; + +const Analyzer = struct { + parser: *const parser.Result, + top_level_declarations: ArrayList(TopLevelDeclaration), + functions: ArrayList(Function), + allocator: Allocator, + + fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result { + var analyzer = Analyzer{ + .parser = parser_result, + .top_level_declarations = ArrayList(TopLevelDeclaration){}, + .allocator = allocator, + .functions = ArrayList(Function){}, + }; + + for (parser_result.functions.items) |ast_function| { + if (ast_function.statements.items.len != 0) { + for (ast_function.statements.items) |statement| { + _ = statement; + @panic("TODO: statement"); + } + } else { + if (ast_function.arguments.items.len != 0) { + return Error.arguments_not_used; + } + + try analyzer.expectPrimitiveType(void_type, ast_function.return_type); + + const function_index = analyzer.functions.items.len; + + var function = Function{ + .instructions = ArrayList(Instruction){}, + .return_type = void_type, + }; + + function.instructions.append(allocator, ret_void) catch return Error.internal; + + analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{ + .type = .function, + .index = @intCast(function_index), + }) catch return Error.internal; + + analyzer.functions.append(allocator, function) catch return Error.internal; + } + } + + return .{ + .top_level_declarations = analyzer.top_level_declarations, + .functions = analyzer.functions, + }; + } + + fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void { + assert(type_value.isPrimitive()); + const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal; + + if (!equal(u8, @tagName(type_value.id), type_identifier)) { + return Error.type_mismatch; + } + } +}; + +pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result { + return Analyzer.analyze(allocator, parser_result); +} diff --git a/src/lexer.zig b/src/lexer.zig new file mode 100644 index 0000000..be18d11 --- /dev/null +++ b/src/lexer.zig @@ -0,0 +1,156 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const log = std.log; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; + +const fs = @import("fs.zig"); + +pub inline fn rdtsc() u64 { + var edx: u32 = undefined; + var eax: u32 = undefined; + + asm volatile ( + \\rdtsc + : [eax] "={eax}" (eax), + [edx] "={edx}" (edx), + ); + + return @as(u64, edx) << 32 | eax; +} + +inline fn rdtscFast() u32 { + return asm volatile ( + \\rdtsc + : [eax] "={eax}" (-> u32), + : + : "edx" + ); +} + +const vector_byte_count = 16; +// These two actually take less space due to how Zig handles bool as u1 +const VBool = @Vector(vector_byte_count, bool); +const VU1 = @Vector(vector_byte_count, u1); + +const VU8 = @Vector(vector_byte_count, u8); + +inline fn vand(v1: VBool, v2: VBool) VBool { + return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2))); +} + +inline fn byteMask(n: u8) VU8 { + return @splat(n); +} + +inline fn endOfIdentifier(ch: u8) bool { + // TODO: complete + return ch == ' ' or ch == '(' or ch == ')'; +} + +const Identifier = struct { + start: u32, + end: u32, +}; + +pub const TokenId = enum { + identifier, + special_character, +}; + +pub const SpecialCharacter = enum(u8) { + arrow = 0, + left_parenthesis = '(', + right_parenthesis = ')', + left_brace = '{', + right_brace = '}', +}; + +pub const Result = struct { + identifiers: ArrayList(Identifier), + special_characters: ArrayList(SpecialCharacter), + ids: ArrayList(TokenId), + file: []const u8, + time: u64 = 0, + + pub fn free(result: *Result, allocator: Allocator) void { + result.identifiers.clearAndFree(allocator); + result.special_characters.clearAndFree(allocator); + result.ids.clearAndFree(allocator); + allocator.free(result.file); + } +}; + +fn lex(allocator: Allocator, text: []const u8) !Result { + const time_start = std.time.Instant.now() catch unreachable; + + var index: usize = 0; + + var result = Result{ + .identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len), + .special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len), + .ids = try ArrayList(TokenId).initCapacity(allocator, text.len), + .file = text, + }; + + defer { + const time_end = std.time.Instant.now() catch unreachable; + result.time = time_end.since(time_start); + } + + while (index < text.len) { + const first_char = text[index]; + switch (first_char) { + 'a'...'z', 'A'...'Z', '_' => { + const start = index; + // SIMD this + while (!endOfIdentifier(text[index])) { + index += 1; + } + + result.identifiers.appendAssumeCapacity(.{ + .start = @intCast(start), + .end = @intCast(index), + }); + + result.ids.appendAssumeCapacity(.identifier); + }, + '(', ')', '{', '}' => |special_character| { + result.special_characters.appendAssumeCapacity(@enumFromInt(special_character)); + result.ids.appendAssumeCapacity(.special_character); + index += 1; + }, + ' ', '\n' => index += 1, + '-' => { + if (text[index + 1] == '>') { + result.special_characters.appendAssumeCapacity(.arrow); + result.ids.appendAssumeCapacity(.special_character); + index += 2; + } else { + @panic("TODO"); + } + }, + else => { + index += 1; + }, + } + } + + return result; +} + +pub fn runTest(allocator: Allocator, file: []const u8) !Result { + const result = try lex(allocator, file); + + return result; +} + +test "lexer" { + const allocator = std.testing.allocator; + const file_path = fs.first; + const file = try fs.readFile(allocator, file_path); + var result = try runTest(allocator, file); + defer result.free(allocator); +} diff --git a/src/main.zig b/src/main.zig index 5daae5d..17ac21d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,341 +1,45 @@ const std = @import("std"); -const log = std.log; -const page_size = std.mem.page_size; +const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const expect = std.testing.expect; -const expectEqual = std.testing.expectEqual; -const Section = struct { - content: []align(page_size) u8, - index: usize = 0, -}; +const fs = @import("fs.zig"); -const Image = struct { - sections: struct { - text: Section, - rodata: Section, - data: Section, - }, - entry_point: u32 = 0, +const lexer = @import("lexer.zig"); +const parser = @import("parser.zig"); +const ir = @import("ir.zig"); +const emit = @import("emit.zig"); - fn create() !Image { - return Image{ - .sections = .{ - .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, - .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, - .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, - }, - }; - } +pub const seed = std.math.maxInt(u64); - fn destroy(image: *Image) void { - inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { - std.os.munmap(@field(image.sections, field_name).content); - } - } - - inline fn mmap(size: usize, flags: packed struct { - executable: bool, - }) ![]align(page_size) u8 { - const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; - const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; - - return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); - } - - fn appendCode(image: *Image, code: []const u8) void { - const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; - @memcpy(destination, code); - image.sections.text.index += code.len; - } - - fn appendCodeByte(image: *Image, code_byte: u8) void { - image.sections.text.content[image.sections.text.index] = code_byte; - image.sections.text.index += 1; - } - - fn getEntryPoint(image: *const Image, comptime Function: type) *const Function { - comptime { - assert(@typeInfo(Function) == .Fn); - } - - assert(image.sections.text.content.len > 0); - return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); - } -}; - -const Rex = enum(u8) { - b = upper_4_bits | (1 << 0), - x = upper_4_bits | (1 << 1), - r = upper_4_bits | (1 << 2), - w = upper_4_bits | (1 << 3), - - const upper_4_bits = 0b100_0000; -}; - -const GPRegister = enum(u4) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, - r8 = 8, - r9 = 9, - r10 = 10, - r11 = 11, - r12 = 12, - r13 = 13, - r14 = 14, - r15 = 15, -}; - -pub const BasicGPRegister = enum(u3) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, -}; - -const prefix_lock = 0xf0; -const prefix_repne_nz = 0xf2; -const prefix_rep = 0xf3; -const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)}; -const prefix_16_bit_operand = [1]u8{0x66}; - -const ret = 0xc3; -const mov_a_imm = [1]u8{0xb8}; -const mov_reg_imm8: u8 = 0xb0; - -inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { - comptime { - assert(@typeInfo(@TypeOf(integer)) == .Int); - } - - return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer)); +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + try behaviorTest(allocator, fs.first); } -inline fn movAImm(image: *Image, integer: anytype) void { - const T = @TypeOf(integer); - image.appendCode(&(switch (T) { - u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)}, - u16, i16 => prefix_16_bit_operand ++ mov_a_imm, - u32, i32 => mov_a_imm, - u64, i64 => prefix_rex_w ++ mov_a_imm, - else => @compileError("Unsupported"), - } ++ intToArrayOfBytes(integer))); -} - -test "ret void" { - var image = try Image.create(); - defer image.destroy(); - image.appendCodeByte(ret); - - const function_pointer = image.getEntryPoint(fn () callconv(.C) void); - function_pointer(); -} - -const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 }; - -fn getMaxInteger(comptime T: type) T { - comptime { - assert(@typeInfo(T) == .Int); - } - - return switch (@typeInfo(T).Int.signedness) { - .unsigned => std.math.maxInt(T), - .signed => std.math.minInt(T), +fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void { + const file = try fs.readFile(allocator, file_relative_path); + var lexer_result = try lexer.runTest(allocator, file); + defer lexer_result.free(allocator); + var parser_result = parser.runTest(allocator, &lexer_result) catch |err| { + std.log.err("Lexer took {} ns", .{lexer_result.time}); + return err; }; + defer parser_result.free(allocator); + var ir_result = try ir.runTest(allocator, &parser_result); + defer ir_result.free(allocator); + var emit_result = try emit.runTest(allocator, &ir_result); + defer emit_result.free(allocator); } -test "ret integer" { - inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); - const expected_number = getMaxInteger(Int); - - movAImm(&image, expected_number); - image.appendCodeByte(ret); - - const function_pointer = image.getEntryPoint(fn () callconv(.C) Int); - const result = function_pointer(); - try expect(result == expected_number); - } +test { + _ = lexer; + _ = parser; + _ = ir; + _ = emit; } -const LastByte = packed struct(u8) { - dst: BasicGPRegister, - src: BasicGPRegister, - always_on: u2 = 0b11, -}; - -fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { - dstRmSrcR(image, T, .mov, dst, src); -} - -fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { - const last_byte: u8 = @bitCast(LastByte{ - .dst = dst, - .src = src, - }); - const opcode_byte = @intFromEnum(opcode); - - const bytes = switch (T) { - u8, i8 => blk: { - const base = [_]u8{ opcode_byte - 1, last_byte }; - if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) { - image.appendCodeByte(0x40); - } - - break :blk base; - }, - u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte }, - u32, i32 => .{ opcode_byte, last_byte }, - u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte }, - else => @compileError("Not supported"), - }; - - image.appendCode(&bytes); -} - -test "ret integer argument" { - inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); - const number = getMaxInteger(Int); - - movRmR(&image, Int, .a, .di); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int); - const result = functionPointer(number); - try expectEqual(number, result); - } -} - -var r = std.rand.Pcg.init(0xffffffffffffffff); - -fn getRandomNumberRange(comptime T: type, min: T, max: T) T { - const random = r.random(); - return switch (@typeInfo(T).Int.signedness) { - .signed => random.intRangeAtMost(T, min, max), - .unsigned => random.uintAtMost(T, max), - }; -} - -fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { - dstRmSrcR(image, T, .sub, dst, src); -} - -test "ret sub arguments" { - inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); - const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); - const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); - - movRmR(&image, Int, .a, .di); - subRmR(&image, Int, .a, .si); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int); - const result = functionPointer(a, b); - try expectEqual(a - b, result); - } -} - -const OpcodeRmR = enum(u8) { - add = 0x01, - @"or" = 0x09, - @"and" = 0x21, - sub = 0x29, - xor = 0x31, - @"test" = 0x85, - mov = 0x89, -}; - -test "test binary operations" { - inline for (integer_types_to_test) |T| { - const test_cases = [_]TestIntegerBinaryOperation(T){ - .{ - .opcode = .add, - .callback = struct { - fn callback(a: T, b: T) T { - return @addWithOverflow(a, b)[0]; - } - }.callback, - }, - .{ - .opcode = .sub, - .callback = struct { - fn callback(a: T, b: T) T { - return @subWithOverflow(a, b)[0]; - } - }.callback, - }, - .{ - .opcode = .@"or", - .callback = struct { - fn callback(a: T, b: T) T { - return a | b; - } - }.callback, - }, - .{ - .opcode = .@"and", - .callback = struct { - fn callback(a: T, b: T) T { - return a & b; - } - }.callback, - }, - .{ - .opcode = .xor, - .callback = struct { - fn callback(a: T, b: T) T { - return a ^ b; - } - }.callback, - }, - }; - - for (test_cases) |test_case| { - try test_case.runTest(); - } - } -} - -fn TestIntegerBinaryOperation(comptime T: type) type { - const should_log = false; - return struct { - callback: *const fn (a: T, b: T) T, - opcode: OpcodeRmR, - - pub fn runTest(test_case: @This()) !void { - for (0..10) |_| { - var image = try Image.create(); - defer image.destroy(); - errdefer image.destroy(); - const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); - const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); - movRmR(&image, T, .a, .di); - dstRmSrcR(&image, T, test_case.opcode, .a, .si); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T); - const expected = test_case.callback(a, b); - const result = functionPointer(a, b); - if (should_log) { - log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T }); - } - try expectEqual(expected, result); - } - } - }; +test "behavior test 1" { + const allocator = std.testing.allocator; + try behaviorTest(allocator, fs.first); } diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..4b56dcc --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,195 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; + +const lexer = @import("lexer.zig"); + +pub const Result = struct { + functions: ArrayList(Function), + strings: StringMap, + + pub fn free(result: *Result, allocator: Allocator) void { + result.functions.clearAndFree(allocator); + result.strings.clearAndFree(allocator); + } +}; + +const PeekResult = union(lexer.TokenId) { + special_character: lexer.SpecialCharacter, + identifier: []const u8, +}; + +const Function = struct { + name: u32, + return_type: u32, + arguments: ArrayList(Argument), + statements: ArrayList(Statement), + + const Argument = struct { + foo: u32 = 0, + }; +}; + +const Statement = struct { + foo: u32 = 0, +}; + +const StringMap = std.AutoHashMapUnmanaged(u32, []const u8); + +const Parser = struct { + id_index: u32 = 0, + identifier_index: u32 = 0, + special_character_index: u32 = 0, + strings: StringMap, + allocator: Allocator, + functions: ArrayList(Function), + + fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result { + while (parser.id_index < lexer_result.ids.items.len) { + try parser.parseTopLevelDeclaration(lexer_result); + } + + return Result{ + .functions = parser.functions, + .strings = parser.strings, + }; + } + + fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function { + assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis); + parser.consume(lexer_result, .special_character); + + while (true) { + if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) { + break; + } else |_| {} + + return error.not_implemented; + } + + try parser.expectSpecialCharacter(lexer_result, .arrow); + + const return_type_identifier = try parser.expectIdentifier(lexer_result); + + try parser.expectSpecialCharacter(lexer_result, .left_brace); + + while (true) { + if (parser.expectSpecialCharacter(lexer_result, .right_brace)) { + break; + } else |_| {} + + return error.not_implemented; + } + + return Function{ + .name = name, + .statements = ArrayList(Statement){}, + .arguments = ArrayList(Function.Argument){}, + .return_type = return_type_identifier, + }; + } + + inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void { + assert(lexer_result.ids.items[parser.id_index] == token_id); + parser.id_index += 1; + switch (token_id) { + .special_character => parser.special_character_index += 1, + .identifier => parser.identifier_index += 1, + } + } + + fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void { + const top_level_identifier = try parser.expectIdentifier(lexer_result); + const next_token = parser.peek(lexer_result); + + switch (next_token) { + .special_character => |special_character| switch (special_character) { + .left_parenthesis => { + const function = try parser.parseFunction(lexer_result, top_level_identifier); + try parser.functions.append(parser.allocator, function); + }, + else => return error.not_implemented, + }, + .identifier => |identifier| { + _ = identifier; + return error.not_implemented; + }, + } + } + + inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult { + return switch (lexer_result.ids.items[parser.id_index]) { + .special_character => .{ + .special_character = lexer_result.special_characters.items[parser.special_character_index], + }, + .identifier => .{ + .identifier = blk: { + const identifier_range = lexer_result.identifiers.items[parser.identifier_index]; + break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end]; + }, + }, + }; + } + + fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void { + const token_id = lexer_result.ids.items[parser.id_index]; + if (token_id != .special_character) { + return error.expected_special_character; + } + + defer parser.id_index += 1; + + const special_character = lexer_result.special_characters.items[parser.special_character_index]; + if (special_character != expected) { + return error.expected_different_special_character; + } + + parser.special_character_index += 1; + } + + fn acceptSpecialCharacter() void {} + + fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 { + const token_id = lexer_result.ids.items[parser.id_index]; + if (token_id != .identifier) { + return Error.expected_identifier; + } + + parser.id_index += 1; + + const identifier_range = lexer_result.identifiers.items[parser.identifier_index]; + parser.identifier_index += 1; + const identifier = lexer_result.file[identifier_range.start..identifier_range.end]; + const Hash = std.hash.Wyhash; + const seed = @intFromPtr(identifier.ptr); + var hasher = Hash.init(seed); + std.hash.autoHash(&hasher, identifier.ptr); + const hash = hasher.final(); + const truncated_hash: u32 = @truncate(hash); + try parser.strings.put(parser.allocator, truncated_hash, identifier); + return truncated_hash; + } + + const Error = error{ + expected_identifier, + expected_special_character, + expected_different_special_character, + not_implemented, + }; +}; + +pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result { + var parser = Parser{ + .allocator = allocator, + .strings = StringMap{}, + .functions = ArrayList(Function){}, + }; + + return parser.parse(lexer_result) catch |err| { + std.log.err("error: {}", .{err}); + return err; + }; +} diff --git a/src/test/main.b b/src/test/main.b new file mode 100644 index 0000000..41c31f9 --- /dev/null +++ b/src/test/main.b @@ -0,0 +1,3 @@ +main() -> void { + +}