From 611e611cab2dbada2a924aa6f52563d7fe781a72 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Thu, 28 Sep 2023 21:57:43 -0600 Subject: [PATCH 1/4] sema for main function --- lib/std/start.nat | 3 +- src/Compilation.zig | 118 ++- src/backend/intermediate_representation.zig | 67 +- src/data_structures.zig | 95 ++- src/frontend/lexical_analyzer.zig | 19 +- src/frontend/semantic_analyzer.zig | 819 ++++++++++++-------- src/frontend/syntactic_analyzer.zig | 252 ++++-- src/fs.zig | 2 - src/main.zig | 2 +- src/test/main.nat | 2 +- 10 files changed, 872 insertions(+), 507 deletions(-) diff --git a/lib/std/start.nat b/lib/std/start.nat index 81360d0..031890b 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -3,6 +3,7 @@ comptime { } const _start = fn () noreturn { - _ = #syscall(231, 0); + const result = #import("main").main(); + _ = #syscall(231, result); unreachable; }; diff --git a/src/Compilation.zig b/src/Compilation.zig index a844add..6a7e357 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -64,6 +64,7 @@ pub const Struct = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Type = union(enum) { @@ -74,6 +75,7 @@ pub const Type = union(enum) { @"struct": Struct.Index, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Integer = struct { @@ -87,12 +89,14 @@ pub const Integer = struct { /// A scope contains a bunch of declarations pub const Scope = struct { - parent: Scope.Index, - type: Type.Index = Type.Index.invalid, declarations: AutoHashMap(u32, Declaration.Index) = .{}, + parent: Scope.Index, + file: File.Index, + type: Type.Index = Type.Index.invalid, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const ScopeType = enum(u1) { @@ -113,6 +117,7 @@ pub const Declaration = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Function = struct { @@ -133,6 +138,7 @@ pub const Function = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Block = struct { @@ -140,6 +146,7 @@ pub const Block = struct { reaches_end: bool, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Field = struct { @@ -147,6 +154,7 @@ pub const Field = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Loop = struct { @@ -156,6 +164,7 @@ pub const Loop = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; const Runtime = struct { @@ -172,6 +181,7 @@ pub const Assignment = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Syscall = struct { @@ -185,11 +195,35 @@ pub const Syscall = struct { pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Call = struct { + value: Value.Index, + arguments: ArgumentList.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const ArgumentList = struct { + array: ArrayList(Value.Index), + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Return = struct { + value: Value.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const Value = union(enum) { unresolved: Unresolved, declaration: Declaration.Index, + declaration_reference: Declaration.Index, void, bool: bool, undefined, @@ -202,9 +236,13 @@ pub const Value = union(enum) { type: Type.Index, integer: u64, syscall: Syscall.Index, + call: Call.Index, + argument_list: ArgumentList, + @"return": Return.Index, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; pub fn isComptime(value: Value) bool { return switch (value) { @@ -238,23 +276,26 @@ pub const Module = struct { loops: BlockList(Loop) = .{}, assignments: BlockList(Assignment) = .{}, syscalls: BlockList(Syscall) = .{}, + calls: BlockList(Call) = .{}, + argument_list: BlockList(ArgumentList) = .{}, + returns: BlockList(Return) = .{}, pub const Descriptor = struct { main_package_path: []const u8, }; const ImportFileResult = struct { - file: *File, + ptr: *File, + index: File.Index, is_new: bool, }; const ImportPackageResult = struct { - file: *File, - is_new: bool, + file: ImportFileResult, is_package: bool, }; - pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult { + pub fn importFile(module: *Module, allocator: Allocator, current_file_index: File.Index, import_name: []const u8) !ImportPackageResult { print("import: '{s}'\n", .{import_name}); if (equal(u8, import_name, "std")) { return module.importPackage(allocator, module.main_package.dependencies.get("std").?); @@ -268,6 +309,7 @@ pub const Module = struct { return module.importPackage(allocator, module.main_package); } + const current_file = module.files.get(current_file_index); if (current_file.package.dependencies.get(import_name)) |package| { return module.importPackage(allocator, package); } @@ -279,55 +321,73 @@ pub const Module = struct { const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); const file_relative_path = std.fs.path.basename(full_path); const package = current_file.package; - const import = try module.getFile(allocator, full_path, file_relative_path, package); + const import_file = try module.getFile(allocator, full_path, file_relative_path, package); - try import.file.addFileReference(allocator, current_file); + try import_file.ptr.addFileReference(allocator, current_file); const result = ImportPackageResult{ - .file = import.file, - .is_new = import.is_new, + .file = import_file, .is_package = false, }; return result; } + fn lookupDeclaration(module: *Module, hashed: u32) !noreturn { + _ = hashed; + _ = module; + while (true) {} + } + fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { const path_lookup = try module.import_table.getOrPut(allocator, full_path); - const file: *File = switch (path_lookup.found_existing) { - true => path_lookup.value_ptr.*, + const file, const index = switch (path_lookup.found_existing) { + true => blk: { + const result = path_lookup.value_ptr.*; + const index = module.files.indexOf(result); + break :blk .{ + result, + index, + }; + }, false => blk: { - const new_file_index = try module.files.append(allocator, File{ + const file_allocation = try module.files.append(allocator, File{ .relative_path = relative_path, .package = package, }); - const file = module.files.get(new_file_index); - path_lookup.value_ptr.* = file; - break :blk file; + std.debug.print("Adding file #{}: {s}\n", .{ file_allocation.index.uniqueInteger(), full_path }); + path_lookup.value_ptr.* = file_allocation.ptr; + // break :blk file; + break :blk .{ + file_allocation.ptr, + file_allocation.index, + }; }, }; return .{ - .file = file, + .ptr = file, + .index = index, .is_new = !path_lookup.found_existing, }; } pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); - const import = try module.getFile(allocator, full_path, package.source_path, package); - try import.file.addPackageReference(allocator, package); + const import_file = try module.getFile(allocator, full_path, package.source_path, package); + try import_file.ptr.addPackageReference(allocator, package); return .{ - .file = import.file, - .is_new = import.is_new, + .file = import_file, .is_package = true, }; } pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { _ = module; - const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); + const source_file = file.package.directory.handle.openFile(file.relative_path, .{}) catch |err| { + std.debug.panic("Can't find file {s} in directory {s} for error {s}", .{ file.relative_path, file.package.directory.path, @errorName(err) }); + }; const file_size = try source_file.getEndPos(); var file_buffer = try allocator.alloc(u8, file_size); @@ -426,7 +486,7 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); } - const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]); + const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0], .{ .block = 0, .index = 0 }); var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); @@ -465,6 +525,9 @@ pub const File = struct { relative_path: []const u8, package: *Package, + pub const List = BlockList(@This()); + pub const Index = List.Index; + const Status = enum { not_loaded, loaded_into_memory, @@ -484,15 +547,6 @@ pub const File = struct { try file.file_references.append(allocator, affected); } - pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { - const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); - _ = file_content; - const file = try allocator.create(File); - file.* = File{}; - - return file; - } - fn lex(file: *File, allocator: Allocator) !void { assert(file.status == .loaded_into_memory); file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index f96baca..e817d4a 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -19,6 +19,7 @@ pub const Result = struct { values: BlockList(Value) = .{}, syscalls: BlockList(Syscall) = .{}, loads: BlockList(Load) = .{}, + phis: BlockList(Phi) = .{}, }; pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { @@ -73,7 +74,10 @@ pub const Instruction = union(enum) { }; const Phi = struct { - foo: u32 = 0, + value: Value.Index, + jump: Jump.Index, + block: BasicBlock.Index, + next: Phi.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; @@ -133,9 +137,10 @@ pub const Builder = struct { module: *Module, current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, current_function_index: Function.Index = Function.Index.invalid, + return_phi_node: Instruction.Index = Instruction.Index.invalid, fn function(builder: *Builder, sema_function: Compilation.Function) !void { - builder.current_function_index = try builder.ir.functions.append(builder.allocator, .{}); + builder.current_function_index = (try builder.ir.functions.append(builder.allocator, .{})).index; // TODO: arguments builder.current_basic_block = try builder.newBlock(); @@ -143,15 +148,23 @@ pub const Builder = struct { const is_noreturn = return_type.* == .noreturn; if (!is_noreturn) { const exit_block = try builder.newBlock(); - const phi = try builder.appendToBlock(exit_block, .{ - .phi = Phi.Index.invalid, + const phi = try builder.ir.phis.addOne(builder.allocator); + const phi_instruction = try builder.appendToBlock(exit_block, .{ + .phi = phi.index, }); + phi.ptr.* = .{ + .value = Value.Index.invalid, + .jump = Jump.Index.invalid, + .block = exit_block, + .next = Phi.Index.invalid, + }; const ret = try builder.appendToBlock(exit_block, .{ .ret = .{ - .value = phi, + .value = phi_instruction, }, }); _ = ret; + builder.return_phi_node = phi_instruction; } const sema_block = sema_function.getBodyBlock(builder.module); try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); @@ -262,14 +275,35 @@ pub const Builder = struct { } _ = try builder.append(.{ - .syscall = try builder.ir.syscalls.append(builder.allocator, .{ + .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ .arguments = arguments, - }), + })).index, }); }, .@"unreachable" => _ = try builder.append(.{ .@"unreachable" = {}, }), + .@"return" => |sema_ret_index| { + const sema_ret = builder.module.returns.get(sema_ret_index); + const return_value = try builder.emitValue(sema_ret.value); + const phi_instruction = builder.ir.instructions.get(builder.return_phi_node); + const phi = builder.ir.phis.get(phi_instruction.phi); + const exit_jump = try builder.jump(.{ .source = builder.current_basic_block, .destination = phi.block }); + phi_instruction.phi = (try builder.ir.phis.append(builder.allocator, .{ + .value = return_value, + .jump = exit_jump, + .next = phi_instruction.phi, + .block = phi.block, + })).index; + + _ = try builder.append(.{ + .jump = exit_jump, + }); + }, + .declaration => |sema_declaration_index| { + _ = sema_declaration_index; + unreachable; + }, else => |t| @panic(@tagName(t)), } } @@ -295,12 +329,12 @@ pub const Builder = struct { const sema_value = builder.module.values.get(sema_value_index).*; return switch (sema_value) { // TODO - .integer => |integer| try builder.ir.values.append(builder.allocator, .{ + .integer => |integer| (try builder.ir.values.append(builder.allocator, .{ .integer = .{ .value = integer, .sign = false, }, - }), + })).index, else => |t| @panic(@tagName(t)), }; } @@ -308,7 +342,8 @@ pub const Builder = struct { fn jump(builder: *Builder, jump_descriptor: Jump) !Jump.Index { const destination_block = builder.ir.blocks.get(jump_descriptor.destination); assert(!destination_block.sealed); - return try builder.ir.jumps.append(builder.allocator, jump_descriptor); + const jump_allocation = try builder.ir.jumps.append(builder.allocator, jump_descriptor); + return jump_allocation.index; } fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { @@ -317,20 +352,20 @@ pub const Builder = struct { } fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { - const instruction_index = try builder.ir.instructions.append(builder.allocator, instruction); - try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_index); + const instruction_allocation = try builder.ir.instructions.append(builder.allocator, instruction); + try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_allocation.index); - return instruction_index; + return instruction_allocation.index; } fn newBlock(builder: *Builder) !BasicBlock.Index { - const new_block_index = try builder.ir.blocks.append(builder.allocator, .{}); + const new_block_allocation = try builder.ir.blocks.append(builder.allocator, .{}); const current_function = builder.ir.functions.get(builder.current_function_index); const function_block_index = current_function.blocks.items.len; - try current_function.blocks.append(builder.allocator, new_block_index); + try current_function.blocks.append(builder.allocator, new_block_allocation.index); print("Adding block: {}\n", .{function_block_index}); - return new_block_index; + return new_block_allocation.index; } }; diff --git a/src/data_structures.zig b/src/data_structures.zig index cc47ff3..7afff5d 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -36,8 +36,8 @@ pub fn BlockList(comptime T: type) type { const List = @This(); pub const Index = packed struct(u32) { - block: u24, index: u6, + block: u24, _reserved: bool = false, valid: bool = true, @@ -50,6 +50,11 @@ pub fn BlockList(comptime T: type) type { pub fn eq(index: Index, other: Index) bool { return @as(u32, @bitCast(index)) == @as(u32, @bitCast(other)); } + + pub fn uniqueInteger(index: Index) u32 { + assert(index.valid); + return @as(u30, @truncate(@as(u32, @bitCast(index)))); + } }; pub const Iterator = struct { @@ -81,6 +86,11 @@ pub fn BlockList(comptime T: type) type { } }; + pub const Allocation = struct { + ptr: *T, + index: Index, + }; + pub fn iterator(list: *const List) Iterator { return .{ .block_index = 0, @@ -94,33 +104,50 @@ pub fn BlockList(comptime T: type) type { return &list.blocks.items[index.block].items[index.index]; } - pub fn append(list: *List, allocator: Allocator, element: T) !Index { + pub fn append(list: *List, allocator: Allocator, element: T) !Allocation { + const result = try list.addOne(allocator); + result.ptr.* = element; + return result; + } + + pub fn addOne(list: *List, allocator: Allocator) !Allocation { try list.ensureCapacity(allocator, list.len + 1); const max_allocation = list.blocks.items.len * item_count; - if (list.len < max_allocation) { - // Follow the guess - if (list.blocks.items[list.first_block].allocateIndex()) |index| { - list.blocks.items[list.first_block].items[index] = element; - list.len += 1; - return .{ - .index = index, - .block = @intCast(list.first_block), + const result = switch (list.len < max_allocation) { + true => blk: { + const block = &list.blocks.items[list.first_block]; + if (block.allocateIndex()) |index| { + const ptr = &block.items[index]; + break :blk Allocation{ + .ptr = ptr, + .index = .{ + .index = index, + .block = @intCast(list.first_block), + }, + }; + } else |_| { + @panic("TODO"); + } + }, + false => blk: { + const block_index = list.blocks.items.len; + const new_block = list.blocks.addOneAssumeCapacity(); + new_block.* = .{}; + const index = new_block.allocateIndex() catch unreachable; + const ptr = &new_block.items[index]; + break :blk Allocation{ + .ptr = ptr, + .index = .{ + .index = index, + .block = @intCast(block_index), + }, }; - } else |_| { - @panic("TODO"); - } - } else { - const block_index = list.blocks.items.len; - const new_block = list.blocks.addOneAssumeCapacity(); - new_block.* = .{}; - const index = new_block.allocateIndex() catch unreachable; - new_block.items[index] = element; - list.len += 1; - return .{ - .index = index, - .block = @intCast(block_index), - }; - } + }, + }; + + list.len += 1; + + return result; } pub fn ensureCapacity(list: *List, allocator: Allocator, new_capacity: usize) !void { @@ -131,6 +158,24 @@ pub fn BlockList(comptime T: type) type { } } + pub fn indexOf(list: *List, elem: *T) Index { + const address = @intFromPtr(elem); + std.debug.print("Items: {}. Block count: {}\n", .{ list.len, list.blocks.items.len }); + for (list.blocks.items, 0..) |*block, block_index| { + const base = @intFromPtr(&block.items[0]); + const top = base + @sizeOf(T) * item_count; + std.debug.print("Bitset: {}. address: 0x{x}. Base: 0x{x}. Top: 0x{x}\n", .{ block.bitset, address, base, top }); + if (address >= base and address < top) { + return .{ + .block = @intCast(block_index), + .index = @intCast(@divExact(address - base, @sizeOf(T))), + }; + } + } + + @panic("not found"); + } + test "Bitset index allocation" { const expect = std.testing.expect; var block = Block{}; diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index c5745fc..fc63727 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -34,6 +34,9 @@ pub const Token = packed struct(u64) { fixed_keyword_false = 0x0d, fixed_keyword_fn = 0x0e, fixed_keyword_unreachable = 0x0f, + fixed_keyword_return = 0x10, + keyword_unsigned_integer = 0x1f, + keyword_signed_integer = 0x20, bang = '!', // 0x21 hash = '#', // 0x23 dollar_sign = '$', // 0x24 @@ -82,6 +85,7 @@ pub const FixedKeyword = enum { false, @"fn", @"unreachable", + @"return", }; pub const Result = struct { @@ -109,8 +113,9 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { break; } - const identifier = text[start_index..][0 .. index - start_index]; - std.debug.print("Identifier: {s}\n", .{identifier}); + // const identifier = text[start_index..][0 .. index - start_index]; + // _ = identifier; + // std.debug.print("Identifier: {s}\n", .{identifier}); if (start_character == 'u' or start_character == 's') { var index_integer = start_index + 1; @@ -119,7 +124,13 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { } if (index_integer == index) { - unreachable; + const id: Token.Id = switch (start_character) { + 'u' => .keyword_unsigned_integer, + 's' => .keyword_signed_integer, + else => unreachable, + }; + + break :blk id; } } @@ -127,7 +138,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), } else .identifier; }, - '(', ')', '{', '}', '-', '=', ';', '#', '@', ',' => |operator| blk: { + '(', ')', '{', '}', '-', '=', ';', '#', '@', ',', '.' => |operator| blk: { index += 1; break :blk @enumFromInt(operator); }, diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index a01deac..438cd95 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -7,6 +7,7 @@ const File = Compilation.File; const Module = Compilation.Module; const Package = Compilation.Package; +const ArgumentList = Compilation.ArgumentList; const Assignment = Compilation.Assignment; const Block = Compilation.Block; const Declaration = Compilation.Declaration; @@ -14,6 +15,7 @@ const Field = Compilation.Field; const Function = Compilation.Function; const Loop = Compilation.Loop; const Scope = Compilation.Scope; +const ScopeType = Compilation.ScopeType; const Struct = Compilation.Struct; const Type = Compilation.Type; const Value = Compilation.Value; @@ -33,35 +35,68 @@ const HashMap = data_structures.AutoHashMap; const print = std.debug.print; const Analyzer = struct { - source_code: []const u8, - nodes: []const Node, - tokens: []const Token, - file: *File, allocator: Allocator, module: *Module, + current_file: File.Index, - fn lazyGlobalDeclaration(analyzer: *Analyzer, node_index: Node.Index) void { - print("Global: {}", .{analyzer.nodes[node_index.unwrap()]}); + fn getSourceFile(analyzer: *Analyzer, scope_index: Scope.Index) []const u8 { + const scope = analyzer.module.scopes.get(scope_index); + const file = analyzer.module.files.get(scope.file); + return file.source_code; } - fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { - const comptime_node = analyzer.nodes[node_index.unwrap()]; + fn getNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) Node { + const scope = analyzer.module.scopes.get(scope_index); + const file = analyzer.module.files.get(scope.file); + const result = file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; + return result; + } - const comptime_block = try analyzer.block(scope, .{ .none = {} }, comptime_node.left); - return try analyzer.module.values.append(analyzer.allocator, .{ + fn getToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) Token { + const scope = analyzer.module.scopes.get(scope_index); + const file = analyzer.module.files.get(scope.file); + const result = file.lexical_analyzer_result.tokens.items[token_index]; + + return result; + } + + fn getNodeList(analyzer: *Analyzer, scope_index: Scope.Index, list_index: u32) ArrayList(Node.Index) { + const scope = analyzer.module.scopes.get(scope_index); + const file = analyzer.module.files.get(scope.file); + return file.syntactic_analyzer_result.node_lists.items[list_index]; + } + + fn comptimeBlock(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value.Index { + const comptime_node = analyzer.getNode(scope_index, node_index); + + const comptime_block = try analyzer.block(scope_index, .{ .none = {} }, comptime_node.left); + const value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ .block = comptime_block, }); + return value_allocation.index; } - fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Assignment.Index { - _ = node_index; - _ = scope; - _ = analyzer; + fn unresolved(analyzer: *Analyzer, node_index: Node.Index) !Value.Allocation { + const value_allocation = try analyzer.module.values.addOne(analyzer.allocator); + value_allocation.ptr.* = .{ + .unresolved = .{ + .node_index = node_index, + }, + }; + + return value_allocation; } - fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + fn unresolvedAllocate(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) !Value.Allocation { + const new = try analyzer.unresolved(node_index); + try analyzer.resolveNode(new.ptr, scope_index, expect_type, node_index); + return new; + } + + fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + _ = expect_type; var reaches_end = true; - const block_node = analyzer.nodes[node_index.unwrap()]; + const block_node = analyzer.getNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; switch (block_node.id) { .block_one, .comptime_block_one => { @@ -72,12 +107,13 @@ const Analyzer = struct { try statement_nodes.append(analyzer.allocator, block_node.left); try statement_nodes.append(analyzer.allocator, block_node.right); }, + .block, .comptime_block => statement_nodes = analyzer.getNodeList(scope_index, block_node.left.unwrap()), else => |t| @panic(@tagName(t)), } const is_comptime = switch (block_node.id) { - .comptime_block_zero, .comptime_block_one, .comptime_block_two => true, - .block_zero, .block_one, .block_two => false, + .comptime_block, .comptime_block_zero, .comptime_block_one, .comptime_block_two => true, + .block, .block_zero, .block_one, .block_two => false, else => |t| @panic(@tagName(t)), }; print("Is comptime: {}\n", .{is_comptime}); @@ -89,7 +125,7 @@ const Analyzer = struct { unreachable; } - const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + const statement_node = analyzer.getNode(scope_index, statement_node_index); const statement_value = switch (statement_node.id) { inline .assign, .simple_while => |statement_id| blk: { const specific_value_index = switch (statement_id) { @@ -99,17 +135,23 @@ const Analyzer = struct { switch (statement_node.left.valid) { // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` false => { - const right = try analyzer.expression(scope, ExpectType.none, statement_node.right); - try statements.append(analyzer.allocator, right); + const right_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); + right_value_allocation.ptr.* = .{ + .unresolved = .{ + .node_index = statement_node.right, + }, + }; + try analyzer.resolveNode(right_value_allocation.ptr, scope_index, ExpectType.none, statement_node.right); + switch (right_value_allocation.ptr.*) { + else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), + } + try statements.append(analyzer.allocator, right_value_allocation.index); continue; }, true => { - const left_node = analyzer.nodes[statement_node.left.unwrap()]; - print("left node index: {}. Left node: {}\n", .{ statement_node.left, left_node }); // const id = analyzer.tokenIdentifier(.token); // print("id: {s}\n", .{id}); - const left = try analyzer.expression(scope, ExpectType.none, statement_node.left); - _ = left; + // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { // unreachable; @@ -125,20 +167,18 @@ const Analyzer = struct { } }, .simple_while => statement: { - const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{ + const loop_allocation = try analyzer.module.loops.append(analyzer.allocator, .{ .condition = Value.Index.invalid, .body = Value.Index.invalid, .breaks = false, }); - const loop_structure = analyzer.module.loops.get(loop_index); - const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); - const while_body = try analyzer.expression(scope, expect_type, statement_node.right); - loop_structure.condition = while_condition; - loop_structure.body = while_body; + loop_allocation.ptr.condition = (try analyzer.unresolvedAllocate(scope_index, ExpectType.boolean, statement_node.left)).index; + loop_allocation.ptr.body = (try analyzer.unresolvedAllocate(scope_index, ExpectType.none, statement_node.right)).index; - reaches_end = loop_structure.breaks or while_condition.valid; + // TODO: bool true + reaches_end = loop_allocation.ptr.breaks or unreachable; - break :statement loop_index; + break :statement loop_allocation.index; }, else => unreachable, }; @@ -147,62 +187,87 @@ const Analyzer = struct { .simple_while => "loop", else => unreachable, }, specific_value_index); - const value_index = try analyzer.module.values.append(analyzer.allocator, value); - break :blk value_index; + const value_allocation = try analyzer.module.values.append(analyzer.allocator, value); + break :blk value_allocation.index; }, .@"unreachable" => blk: { reaches_end = false; break :blk Values.@"unreachable".getIndex(); }, + .simple_variable_declaration => (try analyzer.module.values.append(analyzer.allocator, .{ + .declaration = try analyzer.symbolDeclaration(scope_index, statement_node_index, .local), + })).index, + .@"return" => blk: { + reaches_end = false; + const return_expression: Value.Index = switch (statement_node_index.valid) { + // TODO: expect type + true => ret: { + const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); + return_value_allocation.ptr.* = .{ + .unresolved = .{ + .node_index = statement_node.left, + }, + }; + try analyzer.resolveNode(return_value_allocation.ptr, scope_index, ExpectType.none, statement_node.left); + break :ret return_value_allocation.index; + }, + false => @panic("TODO: ret void"), + }; + + const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ + .value = return_expression, + }); + + const return_expression_value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .@"return" = return_value_allocation.index, + }); + + break :blk return_expression_value_allocation.index; + }, else => |t| @panic(@tagName(t)), }; + try statements.append(analyzer.allocator, statement_value); } - return try analyzer.module.blocks.append(analyzer.allocator, .{ + const block_allocation = try analyzer.module.blocks.append(analyzer.allocator, .{ .statements = statements, .reaches_end = reaches_end, }); + + return block_allocation.index; } - fn whileExpression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Loop.Index { - _ = node; - _ = expect_type; - _ = scope; - _ = analyzer; - } - - fn resolve(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, value: *Value) !void { - const node_index = switch (value.*) { - .unresolved => |unresolved| unresolved.node_index, - else => |t| @panic(@tagName(t)), - }; - value.* = try analyzer.resolveNode(scope, expect_type, node_index); - } - - fn doIdentifier(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Value.Index { - assert(node.id == .identifier); - const identifier_hash = try analyzer.identifierFromToken(node.token); + fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { + const identifier_hash = try analyzer.identifierFromToken(node_scope_index, node_token); + const scope = analyzer.module.scopes.get(scope_index); // TODO: search in upper scopes too const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); if (identifier_scope_lookup.found_existing) { const declaration_index = identifier_scope_lookup.value_ptr.*; const declaration = analyzer.module.declarations.get(declaration_index); const init_value = analyzer.module.values.get(declaration.init_value); - try analyzer.resolve(scope, expect_type, init_value); - if (init_value.* != .runtime and declaration.mutability == .@"const") { + print("Declaration found: {}\n", .{init_value}); + switch (init_value.*) { + .unresolved => |ur| try analyzer.resolveNode(init_value, scope_index, expect_type, ur.node_index), + else => {}, + } + if (init_value.isComptime() and declaration.mutability == .@"const") { return declaration.init_value; } else { - unreachable; + const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .declaration_reference = declaration_index, + }); + return ref_allocation.index; } } else { - @panic("TODO: not found"); + std.debug.panic("Identifier not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getToken(scope_index, node_token), analyzer.getSourceFile(scope_index)) }); } } - fn getArguments(analyzer: *Analyzer, node_index: Node.Index) !ArrayList(Node.Index) { + fn getArguments(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !ArrayList(Node.Index) { var arguments = ArrayList(Node.Index){}; - const node = analyzer.nodes[node_index.unwrap()]; + const node = analyzer.getNode(scope_index, node_index); switch (node.id) { .compiler_intrinsic_two => { try arguments.append(analyzer.allocator, node.left); @@ -214,107 +279,19 @@ const Analyzer = struct { return arguments; } - fn resolveNode(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Value { - const node = analyzer.nodes[node_index.unwrap()]; - return switch (node.id) { - .identifier => unreachable, - .compiler_intrinsic_one, .compiler_intrinsic_two => blk: { - const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); - const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; - print("Intrinsic: {s}\n", .{@tagName(intrinsic)}); - switch (intrinsic) { - .import => { - assert(node.id == .compiler_intrinsic_one); - const import_argument = analyzer.nodes[node.left.unwrap()]; - switch (import_argument.id) { - .string_literal => { - const import_name = analyzer.tokenStringLiteral(import_argument.token); - const imported_file = try analyzer.module.importFile(analyzer.allocator, analyzer.file, import_name); + fn resolveNode(analyzer: *Analyzer, value: *Value, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!void { + const node = analyzer.getNode(scope_index, node_index); + print("Resolving node #{}: {}\n", .{ node_index.uniqueInteger(), node }); - if (imported_file.is_new) { - // TODO: fix error - try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file); - } else { - unreachable; - } + assert(value.* == .unresolved); - break :blk .{ - .type = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file), - }; - }, - else => unreachable, - } - }, - .syscall => { - var argument_nodes = try analyzer.getArguments(node_index); - print("Argument count: {}\n", .{argument_nodes.items.len}); - if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { - const number = try analyzer.expression(scope, ExpectType.none, argument_nodes.items[0]); - assert(number.valid); - var arguments = std.mem.zeroes([6]Value.Index); - for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { - const argument = try analyzer.expression(scope, ExpectType.none, argument_node_index); - print("Index: {}. Argument: {}\n", .{ argument_index, argument }); - arguments[argument_index] = argument; - } - - // TODO: typecheck for usize - for (arguments[0..argument_nodes.items.len]) |argument| { - _ = argument; - } - - break :blk .{ - .syscall = try analyzer.module.syscalls.append(analyzer.allocator, .{ - .number = number, - .arguments = arguments, - .argument_count = @intCast(argument_nodes.items.len - 1), - }), - }; - } else { - unreachable; - } - }, - } - unreachable; + value.* = switch (node.id) { + .identifier => blk: { + const value_index = try analyzer.doIdentifier(scope_index, expect_type, node.token, scope_index); + const value_ref = analyzer.module.values.get(value_index); + break :blk value_ref.*; }, - .function_definition => blk: { - const function_prototype_index = try analyzer.functionPrototype(node.left); - - const function_body = try analyzer.block(scope, .{ - .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), - }, node.right); - - const function_index = try analyzer.module.functions.append(analyzer.allocator, .{ - .prototype = function_prototype_index, - .body = function_body, - }); - break :blk .{ - .function = function_index, - }; - }, - .keyword_true => unreachable, - .simple_while => unreachable, - .block_zero, .block_one => blk: { - const block_index = try analyzer.block(scope, expect_type, node_index); - break :blk .{ - .block = block_index, - }; - }, - .number_literal => switch (std.zig.parseNumberLiteral(analyzer.tokenBytes(analyzer.tokens[node.token]))) { - .int => |integer| .{ - .integer = integer, - }, - else => |t| @panic(@tagName(t)), - }, - else => |t| @panic(@tagName(t)), - }; - } - - fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Value.Index { - const node = analyzer.nodes[node_index.unwrap()]; - return switch (node.id) { - .identifier => analyzer.doIdentifier(scope, expect_type, node), - .keyword_true => blk: { + .keyword_true => { switch (expect_type) { .none => {}, .type_index => |expected_type| { @@ -324,9 +301,140 @@ const Analyzer = struct { }, } - break :blk Values.getIndex(.bool_true); + // TODO + unreachable; + + // break :blk Values.getIndex(.bool_true); }, - else => try analyzer.module.values.append(analyzer.allocator, try analyzer.resolveNode(scope, expect_type, node_index)), + .compiler_intrinsic_one, .compiler_intrinsic_two => blk: { + const intrinsic_name = analyzer.tokenIdentifier(scope_index, node.token + 1); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; + print("Intrinsic: {s}\n", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + assert(node.id == .compiler_intrinsic_one); + const import_argument = analyzer.getNode(scope_index, node.left); + switch (import_argument.id) { + .string_literal => { + const import_name = analyzer.tokenStringLiteral(scope_index, import_argument.token); + const import_file = try analyzer.module.importFile(analyzer.allocator, analyzer.current_file, import_name); + + if (import_file.file.is_new) { + // TODO: fix error + try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, import_file.file.ptr); + } else { + unreachable; + } + + break :blk .{ + .type = try analyzeFile(value, analyzer.allocator, analyzer.module, import_file.file.ptr, import_file.file.index), + }; + }, + else => unreachable, + } + }, + .syscall => { + var argument_nodes = try analyzer.getArguments(scope_index, node_index); + print("Argument count: {}\n", .{argument_nodes.items.len}); + if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { + const number_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_nodes.items[0]); + const number = number_allocation.index; + assert(number.valid); + var arguments = std.mem.zeroes([6]Value.Index); + for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { + const argument_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_node_index); + arguments[argument_index] = argument_allocation.index; + } + + // TODO: typecheck for usize + for (arguments[0..argument_nodes.items.len]) |argument| { + _ = argument; + } + + break :blk .{ + .syscall = (try analyzer.module.syscalls.append(analyzer.allocator, .{ + .number = number, + .arguments = arguments, + .argument_count = @intCast(argument_nodes.items.len - 1), + })).index, + }; + } else { + unreachable; + } + }, + } + unreachable; + }, + .function_definition => blk: { + const function_prototype_index = try analyzer.functionPrototype(scope_index, node.left); + + const function_body = try analyzer.block(scope_index, .{ + .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), + }, node.right); + + const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = function_prototype_index, + .body = function_body, + }); + break :blk .{ + .function = function_allocation.index, + }; + }, + .simple_while => unreachable, + .block_zero, .block_one => blk: { + const block_index = try analyzer.block(scope_index, expect_type, node_index); + break :blk .{ + .block = block_index, + }; + }, + .number_literal => switch (std.zig.parseNumberLiteral(analyzer.numberBytes(scope_index, node.token))) { + .int => |integer| .{ + .integer = integer, + }, + else => |t| @panic(@tagName(t)), + }, + .call_one => blk: { + const this_value_node_index = node.left; + const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, this_value_node_index); + + const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ + .value = this_value_allocation.index, + .arguments = ArgumentList.Index.invalid, + }); + break :blk .{ + .call = call_allocation.index, + }; + }, + .field_access => blk: { + const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + const identifier = analyzer.tokenIdentifier(scope_index, node.right.value); + _ = identifier; + switch (left_allocation.ptr.*) { + .type => |type_index| { + const left_type = analyzer.module.types.get(type_index); + switch (left_type.*) { + .@"struct" => |struct_index| { + const struct_type = analyzer.module.structs.get(struct_index); + const right_index = try analyzer.doIdentifier(struct_type.scope, ExpectType.none, node.right.value, scope_index); + const right_value = analyzer.module.values.get(right_index); + switch (right_value.*) { + .function => break :blk right_value.*, + else => unreachable, + } + print("Right: {}\n", .{right_value}); + // struct_scope.declarations.get(identifier); + + unreachable; + }, + else => |t| @panic(@tagName(t)), + } + unreachable; + }, + else => |t| @panic(@tagName(t)), + } + unreachable; + }, + else => |t| @panic(@tagName(t)), }; } @@ -335,37 +443,55 @@ const Analyzer = struct { return function_prototype.return_type; } - fn functionPrototype(analyzer: *Analyzer, node_index: Node.Index) !Function.Prototype.Index { - const node = analyzer.nodes[node_index.unwrap()]; + fn functionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Function.Prototype.Index { + const node = analyzer.getNode(scope_index, node_index); switch (node.id) { .simple_function_prototype => { const arguments: ?[]const Field.Index = blk: { - const argument_node = analyzer.nodes[node.left.get() orelse break :blk null]; + if (node.left.get() == null) break :blk null; + const argument_node = analyzer.getNode(scope_index, node.left); switch (argument_node.id) { else => |t| @panic(@tagName(t)), } }; - const return_type_node = analyzer.nodes[node.right.unwrap()]; + const return_type_node = analyzer.getNode(scope_index, node.right); const return_type: Type.Index = switch (return_type_node.id) { .identifier => { unreachable; }, .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, + inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { + const bit_count: u16 = @intCast(return_type_node.left.value); + print("Bit count: {}\n", .{bit_count}); + break :blk switch (bit_count) { + inline 8, 16, 32, 64 => |hardware_bit_count| Type.Index{ + .block = 0, + .index = @ctz(hardware_bit_count) - @ctz(@as(u8, 8)) + switch (int_type_signedness) { + .signed_integer_type => HardwareSignedIntegerType, + .unsigned_integer_type => HardwareUnsignedIntegerType, + else => unreachable, + }.offset, + }, + else => unreachable, + }; + }, else => |t| @panic(@tagName(t)), }; - return try analyzer.module.function_prototypes.append(analyzer.allocator, .{ + const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, .{ .arguments = arguments, .return_type = return_type, }); + + return function_prototype_allocation.index; }, else => |t| @panic(@tagName(t)), } } - fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index { + fn analyzeDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, declaration: *Declaration) !Value.Index { + _ = scope_index; _ = declaration; - _ = scope; _ = analyzer; // switch (declaration.*) { // .unresolved => |node_index| { @@ -394,125 +520,157 @@ const Analyzer = struct { @panic("TODO: analyzeDeclaration"); } - fn structType(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Type.Index { - _ = index; - const new_scope = try analyzer.allocateScope(.{ .parent = parent_scope }); - const scope = new_scope.ptr; - - const is_file = !parent_scope.valid; - assert(is_file); - - const struct_index = try analyzer.module.structs.append(analyzer.allocator, .{ - .scope = new_scope.index, - }); - const struct_type = analyzer.module.structs.get(struct_index); - const type_index = try analyzer.module.types.append(analyzer.allocator, .{ - .@"struct" = struct_index, - }); - scope.type = type_index; - - _ = struct_type; - assert(container_declaration.members.len > 0); - - const count = blk: { - var result: struct { - fields: u32 = 0, - declarations: u32 = 0, - } = .{}; - for (container_declaration.members) |member_index| { - const member = analyzer.nodes[member_index.unwrap()]; - const member_type = getContainerMemberType(member.id); - - switch (member_type) { - .declaration => result.declarations += 1, - .field => result.fields += 1, - } - } - break :blk result; + fn structType(analyzer: *Analyzer, value: *Value, parent_scope_index: Scope.Index, index: Node.Index, file_index: File.Index) !Type.Index { + var node_buffer: [2]Node.Index = undefined; + // We have the file because this might be the first file + const file = analyzer.module.files.get(file_index); + const node = file.syntactic_analyzer_result.nodes.items[index.unwrap()]; + const nodes = switch (node.id) { + .main_one => blk: { + node_buffer[0] = node.left; + break :blk node_buffer[0..1]; + }, + .main_two => blk: { + node_buffer[0] = node.left; + node_buffer[1] = node.right; + break :blk &node_buffer; + }, + else => |t| @panic(@tagName(t)), }; - var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations); - var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); + if (nodes.len > 0) { + const new_scope = try analyzer.allocateScope(.{ + .parent = parent_scope_index, + .file = file_index, + }); + const scope = new_scope.ptr; + const scope_index = new_scope.index; - for (container_declaration.members) |member_index| { - const member = analyzer.nodes[member_index.unwrap()]; - const member_type = getContainerMemberType(member.id); - const array_list = switch (member_type) { - .declaration => &declaration_nodes, - .field => &field_nodes, + const is_file = !parent_scope_index.valid; + assert(is_file); + + const struct_allocation = try analyzer.module.structs.append(analyzer.allocator, .{ + .scope = new_scope.index, + }); + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .@"struct" = struct_allocation.index, + }); + scope.type = type_allocation.index; + value.* = .{ + .type = type_allocation.index, }; - array_list.appendAssumeCapacity(member_index); + + const count = blk: { + var result: struct { + fields: u32 = 0, + declarations: u32 = 0, + } = .{}; + for (nodes) |member_index| { + const member = analyzer.getNode(scope_index, member_index); + const member_type = getContainerMemberType(member.id); + + switch (member_type) { + .declaration => result.declarations += 1, + .field => result.fields += 1, + } + } + break :blk result; + }; + + var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations); + var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); + + for (nodes) |member_index| { + const member = analyzer.getNode(scope_index, member_index); + const member_type = getContainerMemberType(member.id); + const array_list = switch (member_type) { + .declaration => &declaration_nodes, + .field => &field_nodes, + }; + array_list.appendAssumeCapacity(member_index); + } + + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + switch (declaration_node.id) { + .@"comptime" => {}, + .simple_variable_declaration => _ = try analyzer.symbolDeclaration(scope_index, declaration_node_index, .global), + else => unreachable, + } + } + + // TODO: consider iterating over scope declarations instead? + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + switch (declaration_node.id) { + .@"comptime" => _ = try analyzer.comptimeBlock(scope_index, declaration_node_index), + .simple_variable_declaration => {}, + else => |t| @panic(@tagName(t)), + } + } + + for (field_nodes.items) |field_index| { + const field_node = analyzer.getNode(scope_index, field_index); + _ = field_node; + + @panic("TODO: fields"); + } + + return type_allocation.index; + } else { + return Type.Index.invalid; + } + } + + fn symbolDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index, scope_type: ScopeType) !Declaration.Index { + const declaration_node = analyzer.getNode(scope_index, node_index); + assert(declaration_node.id == .simple_variable_declaration); + assert(!declaration_node.left.valid); + const mutability: Compilation.Mutability = switch (analyzer.getToken(scope_index, declaration_node.token).id) { + .fixed_keyword_const => .@"const", + .fixed_keyword_var => .@"var", + else => |t| @panic(@tagName(t)), + }; + const expected_identifier_token_index = declaration_node.token + 1; + const expected_identifier_token = analyzer.getToken(scope_index, expected_identifier_token_index); + if (expected_identifier_token.id != .identifier) { + print("Error: found: {}", .{expected_identifier_token.id}); + @panic("Expected identifier"); + } + // TODO: Check if it is a keyword + + const identifier_index = try analyzer.identifierFromToken(scope_index, expected_identifier_token_index); + + const declaration_name = analyzer.tokenIdentifier(scope_index, expected_identifier_token_index); + // Check if the symbol name is already occupied in the same scope + const scope = analyzer.module.scopes.get(scope_index); + const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); + if (scope_lookup.found_existing) { + std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); } - for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; - switch (declaration_node.id) { - .@"comptime" => {}, - .simple_variable_declaration => { - const mutability: Compilation.Mutability = switch (analyzer.tokens[declaration_node.token].id) { - .fixed_keyword_const => .@"const", - .fixed_keyword_var => .@"var", - else => |t| @panic(@tagName(t)), - }; - const expected_identifier_token_index = declaration_node.token + 1; - const expected_identifier_token = analyzer.tokens[expected_identifier_token_index]; - if (expected_identifier_token.id != .identifier) { - print("Error: found: {}", .{expected_identifier_token.id}); - @panic("Expected identifier"); - } - // TODO: Check if it is a keyword + // Check if the symbol name is already occupied in parent scopes + var upper_scope_index = scope.parent; - const identifier_index = try analyzer.identifierFromToken(expected_identifier_token_index); + while (upper_scope_index.valid) { + @panic("TODO: upper scope"); + } + assert(declaration_node.right.valid); - const declaration_name = analyzer.tokenIdentifier(expected_identifier_token_index); - // Check if the symbol name is already occupied in the same scope - const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); - if (scope_lookup.found_existing) { - std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); - } - - // Check if the symbol name is already occupied in parent scopes - var upper_scope_index = scope.parent; - - while (upper_scope_index.valid) { - @panic("TODO: upper scope"); - } - - const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ - .name = declaration_name, - .scope_type = .global, - .mutability = mutability, - .init_value = try analyzer.module.values.append(analyzer.allocator, .{ - .unresolved = .{ - .node_index = declaration_node.right, - }, - }), - }); - - scope_lookup.value_ptr.* = container_declaration_index; + const declaration_allocation = try analyzer.module.declarations.append(analyzer.allocator, .{ + .name = declaration_name, + .scope_type = scope_type, + .mutability = mutability, + .init_value = (try analyzer.module.values.append(analyzer.allocator, .{ + .unresolved = .{ + .node_index = declaration_node.right, }, - else => unreachable, - } - } + })).index, + }); - // TODO: consider iterating over scope declarations instead? - for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; - switch (declaration_node.id) { - .@"comptime" => _ = try analyzer.comptimeBlock(scope, declaration_node_index), - .simple_variable_declaration => {}, - else => |t| @panic(@tagName(t)), - } - } + scope_lookup.value_ptr.* = declaration_allocation.index; - for (field_nodes.items) |field_index| { - const field_node = analyzer.nodes[field_index.unwrap()]; - _ = field_node; - - @panic("TODO: fields"); - } - - return type_index; + return declaration_allocation.index; } const MemberType = enum { @@ -528,8 +686,8 @@ const Analyzer = struct { }; } - fn identifierFromToken(analyzer: *Analyzer, token_index: Token.Index) !u32 { - const identifier = analyzer.tokenIdentifier(token_index); + fn identifierFromToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) !u32 { + const identifier = analyzer.tokenIdentifier(scope_index, token_index); const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); @@ -541,40 +699,40 @@ const Analyzer = struct { } } - fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 { - const token = analyzer.tokens[token_index]; + fn tokenIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { + const token = analyzer.getToken(scope_index, token_index); assert(token.id == .identifier); - const identifier = analyzer.tokenBytes(token); + const source_file = analyzer.getSourceFile(scope_index); + const identifier = tokenBytes(token, source_file); return identifier; } - fn tokenBytes(analyzer: *Analyzer, token: Token) []const u8 { - return analyzer.source_code[token.start..][0..token.len]; + fn tokenBytes(token: Token, source_code: []const u8) []const u8 { + return source_code[token.start..][0..token.len]; } - fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 { - const token = analyzer.tokens[token_index]; + fn numberBytes(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { + const token = analyzer.getToken(scope_index, token_index); + assert(token.id == .number_literal); + const source_file = analyzer.getSourceFile(scope_index); + const bytes = tokenBytes(token, source_file); + + return bytes; + } + + fn tokenStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { + const token = analyzer.getToken(scope_index, token_index); assert(token.id == .string_literal); + const source_file = analyzer.getSourceFile(scope_index); // Eat double quotes - const string_literal = analyzer.tokenBytes(token)[1..][0 .. token.len - 2]; + const string_literal = tokenBytes(token, source_file)[1..][0 .. token.len - 2]; return string_literal; } - const ScopeAllocation = struct { - ptr: *Scope, - index: Scope.Index, - }; - - fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !ScopeAllocation { - const scope_index = try analyzer.module.scopes.append(analyzer.allocator, scope_value); - const scope = analyzer.module.scopes.get(scope_index); - - return .{ - .ptr = scope, - .index = scope_index, - }; + fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !Scope.Allocation { + return analyzer.module.scopes.append(analyzer.allocator, scope_value); } }; @@ -647,7 +805,8 @@ const HardwareSignedIntegerType = enum { const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; }; -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, file_index: File.Index) !Type.Index { + _ = file_index; inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); } @@ -692,58 +851,40 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) .@"unreachable" = {}, }); - return analyzeExistingPackage(compilation, module, package); + const value_allocation = try module.values.append(compilation.base_allocator, .{ + .unresolved = .{ + .node_index = .{ .value = 0 }, + }, + }); + + return analyzeExistingPackage(value_allocation.ptr, compilation, module, package); } -pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { +pub fn analyzeExistingPackage(value: *Value, compilation: *Compilation, module: *Module, package: *Package) !Type.Index { const package_import = try module.importPackage(compilation.base_allocator, package); - assert(!package_import.is_new); - const package_file = package_import.file; + assert(!package_import.file.is_new); + const package_file = package_import.file.ptr; + const file_index = package_import.file.index; - return try analyzeFile(compilation.base_allocator, module, package_file); + return try analyzeFile(value, compilation.base_allocator, module, package_file, file_index); } -pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Type.Index { +pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file: *File, file_index: File.Index) !Type.Index { + assert(value.* == .unresolved); assert(file.status == .parsed); var analyzer = Analyzer{ - .source_code = file.source_code, - .nodes = file.syntactic_analyzer_result.nodes.items, - .tokens = file.lexical_analyzer_result.tokens.items, - .file = file, + .current_file = file_index, .allocator = allocator, .module = module, }; - const result = try analyzer.structType(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); + var buffer = [2]Node.Index{ + Node.Index.invalid, + Node.Index.invalid, + }; + _ = buffer; + + const result = try analyzer.structType(value, Scope.Index.invalid, .{ .value = 0 }, file_index); return result; } - -fn mainNodeToContainerDeclaration(allocator: Allocator, file: *File) !ContainerDeclaration { - const main_node = getNode(file, 0); - var list_buffer: [2]Node.Index = undefined; - const left_node = getNode(file, main_node.left.value); - const node_list: []const Node.Index = blk: { - if (left_node.id != .node_list) { - const len = @as(u2, @intFromBool(main_node.left.valid)) + @as(u2, @intFromBool(main_node.right.valid)) - @as(u2, @intFromBool(main_node.left.valid and main_node.right.valid and main_node.left.value == main_node.right.value)); - assert(len > 0); - list_buffer[0] = main_node.left; - list_buffer[1] = main_node.right; - break :blk list_buffer[0..len]; - } else { - @panic("TODO: get list"); - } - }; - - const owned_node_list = try allocator.alloc(Node.Index, node_list.len); - @memcpy(owned_node_list, node_list); - - // Deal properly with this allocation - return .{ - .members = owned_node_list, - }; -} - -fn getNode(file: *const File, index: u32) *Node { - return &file.syntactic_analyzer_result.nodes.items[index]; -} diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index a6b00a7..5efc621 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -14,6 +14,7 @@ const Token = lexical_analyzer.Token; pub const Result = struct { nodes: ArrayList(Node), + node_lists: ArrayList(Node.List), time: u64, }; @@ -47,6 +48,11 @@ pub const Node = packed struct(u128) { assert(index.valid); return index.value; } + + pub fn uniqueInteger(index: Index) u32 { + assert(index.valid); + return index.value; + } }; pub const Range = struct { @@ -81,6 +87,15 @@ pub const Node = packed struct(u128) { comptime_block_two = 23, block_two = 24, @"unreachable" = 25, + field_access = 26, + call_one = 27, + comptime_block = 28, + block = 29, + unsigned_integer_type = 30, + signed_integer_type = 31, + main_one = 32, + main_two = 33, + main_zero = 34, }; }; @@ -109,10 +124,37 @@ const Analyzer = struct { } } - fn getIdentifier(analyzer: *const Analyzer, token: Token) []const u8 { - assert(token.id == .identifier); - const identifier = analyzer.file[token.start..][0..token.len]; - return identifier; + fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + return analyzer.file[token.start..][0..token.len]; + } + + fn symbolDeclaration(analyzer: *Analyzer) !Node.Index { + const first = analyzer.token_i; + assert(analyzer.tokens[first].id == .fixed_keyword_var or analyzer.tokens[first].id == .fixed_keyword_const); + analyzer.token_i += 1; + _ = try analyzer.expectToken(.identifier); + + // TODO: type + _ = try analyzer.expectToken(.equal); + + const init_node = try analyzer.expression(); + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = Node.Index.invalid; + const declaration = Node{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + const declaration_init_node = analyzer.nodes.items[init_node.unwrap()]; + std.debug.print("Declaration init node: {}\n", .{declaration_init_node}); + + return analyzer.addNode(declaration); } fn containerMembers(analyzer: *Analyzer) !Members { @@ -121,58 +163,26 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; - const member_node: Node = switch (analyzer.tokens[first].id) { + const member_node_index: Node.Index = switch (analyzer.tokens[first].id) { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .left_brace => blk: { analyzer.token_i += 1; const comptime_block = try analyzer.block(.{ .is_comptime = true }); - break :blk .{ + break :blk try analyzer.addNode(.{ .id = .@"comptime", .token = first, .left = comptime_block, .right = Node.Index.invalid, - }; + }); }, - else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + else => |foo| @panic(@tagName(foo)), }, - .fixed_keyword_const, .fixed_keyword_var => blk: { - analyzer.token_i += 1; - _ = try analyzer.expectToken(.identifier); - - // TODO: type - _ = try analyzer.expectToken(.equal); - - // TODO: do this in a function - const init_node = try analyzer.expression(); - // const init_node = switch (analyzer.tokens[analyzer.token_i].id) { - // .identifier => unreachable, - // .hash => try analyzer.compilerIntrinsic(), - // .left_parenthesis => try analyzer.function(), - // else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), - // }; - - _ = try analyzer.expectToken(.semicolon); - - // TODO: - const type_node = Node.Index.invalid; - const top_level_decl = .{ - .id = .simple_variable_declaration, - .token = first, - .left = type_node, - .right = init_node, - }; - - break :blk top_level_decl; - }, - .identifier => { - unreachable; - }, - else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + .fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(), + else => |t| @panic(@tagName(t)), }; - const member_index = try analyzer.addNode(member_node); - try analyzer.temporal_node_heap.append(analyzer.allocator, member_index); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index); } const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; @@ -263,10 +273,12 @@ const Analyzer = struct { }, else => try analyzer.assignExpressionStatement(), }, - .fixed_keyword_unreachable => try analyzer.assignExpressionStatement(), + .fixed_keyword_unreachable, .fixed_keyword_return => try analyzer.assignExpressionStatement(), .fixed_keyword_while => try analyzer.whileStatement(options), - else => unreachable, + .fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(), + else => |t| @panic(@tagName(t)), }; + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); } @@ -301,7 +313,15 @@ const Analyzer = struct { .left = statement_array[0], .right = statement_array[1], }, - else => |len| std.debug.panic("len: {}", .{len}), + else => .{ + .id = switch (options.is_comptime) { + true => .comptime_block, + false => .block, + }, + .token = left_brace, + .left = try analyzer.nodeList(statement_array), + .right = Node.Index.invalid, + }, }; return analyzer.addNode(node); } @@ -329,7 +349,7 @@ const Analyzer = struct { const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { .semicolon => return expr, .equal => .assign, - else => unreachable, + else => |t| @panic(@tagName(t)), }; const node = Node{ @@ -398,8 +418,8 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis, .right_brace, .comma => -1, - else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), + .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period => -1, + else => |t| @panic(@tagName(t)), }; if (precedence < minimum_precedence) { @@ -446,6 +466,16 @@ const Analyzer = struct { }, .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable => try analyzer.curlySuffixExpression(), .fixed_keyword_fn => analyzer.function(), + .fixed_keyword_return => try analyzer.addNode(.{ + .id = .@"return", + .token = blk: { + const token = analyzer.token_i; + analyzer.token_i += 1; + break :blk token; + }, + .left = try analyzer.expression(), + .right = Node.Index.invalid, + }), // todo:? // .left_brace => try analyzer.block(), else => |id| { @@ -492,14 +522,8 @@ const Analyzer = struct { fn typeExpression(analyzer: *Analyzer) !Node.Index { return switch (analyzer.tokens[analyzer.token_i].id) { - .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash => try analyzer.errorUnionExpression(), - else => |id| blk: { - log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); - - const result = try analyzer.errorUnionExpression(); - - break :blk result; - }, + .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash, .string_literal, .number_literal, .fixed_keyword_unreachable, .keyword_unsigned_integer, .keyword_signed_integer => try analyzer.errorUnionExpression(), + else => |id| @panic(@tagName(id)), }; } @@ -516,14 +540,17 @@ const Analyzer = struct { var result = try analyzer.primaryTypeExpression(); while (true) { - if (analyzer.suffixOperator()) |_| { - unreachable; + const suffix_operator = try analyzer.suffixOperator(result); + if (suffix_operator.valid) { + result = suffix_operator; } else { if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { + const left_parenthesis = analyzer.token_i; analyzer.token_i += 1; var expression_list = ArrayList(Node.Index){}; while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + std.debug.print("Loop\n", .{}); const parameter = try analyzer.expression(); try expression_list.append(analyzer.allocator, parameter); analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { @@ -534,7 +561,16 @@ const Analyzer = struct { } _ = try analyzer.expectToken(.right_parenthesis); - @panic("TODO"); + // const is_comma = analyzer.tokens[analyzer.token_i].id == .comma; + return analyzer.addNode(switch (expression_list.items.len) { + 0 => .{ + .id = .call_one, + .token = left_parenthesis, + .left = result, + .right = Node.Index.invalid, + }, + else => |len| std.debug.panic("len: {}", .{len}), + }); } else { return result; } @@ -569,8 +605,8 @@ const Analyzer = struct { .identifier => switch (analyzer.tokens[token_i + 1].id) { .colon => unreachable, else => blk: { - const identifier = analyzer.getIdentifier(token); - std.debug.print("identifier: {s}\n", .{identifier}); + const identifier = analyzer.bytes(token_i); + // std.debug.print("identifier: {s}\n", .{identifier}); analyzer.token_i += 1; if (equal(u8, identifier, "_")) { break :blk Node.Index.invalid; @@ -594,20 +630,55 @@ const Analyzer = struct { .right = Node.Index.invalid, }), .hash => analyzer.compilerIntrinsic(), + .keyword_unsigned_integer, .keyword_signed_integer => |signedness| try analyzer.addNode(.{ + .id = switch (signedness) { + .keyword_unsigned_integer => .unsigned_integer_type, + .keyword_signed_integer => .signed_integer_type, + else => unreachable, + }, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = @bitCast(@as(u32, std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10) catch unreachable)), + .right = Node.Index.invalid, + }), else => |foo| { switch (foo) { - .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), - else => std.debug.panic("{s}", .{@tagName(foo)}), + .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }), + else => @panic(@tagName(foo)), } }, }; } // TODO: - fn suffixOperator(analyzer: *Analyzer) ?bool { - _ = analyzer; - - return null; + fn suffixOperator(analyzer: *Analyzer, left: Node.Index) !Node.Index { + const token = analyzer.tokens[analyzer.token_i]; + return switch (token.id) { + .left_bracket => unreachable, + .period => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .identifier => analyzer.addNode(.{ + .id = .field_access, + .token = blk: { + const main_token = analyzer.token_i; + analyzer.token_i += 1; + break :blk main_token; + }, + .left = left, + .right = blk: { + //TODO ??? + const right_token = analyzer.token_i; + analyzer.token_i += 1; + const result: Node.Index = @bitCast(right_token); + std.debug.print("WARNING: rhs has node index {} but it's token #{}\n", .{ result, right_token }); + break :blk result; + }, + }), + else => |t| @panic(@tagName(t)), + }, + else => Node.Index.invalid, + }; } fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { @@ -618,27 +689,23 @@ const Analyzer = struct { .value = @intCast(index), }; } + + fn nodeList(analyzer: *Analyzer, input: []const Node.Index) !Node.Index { + const index = analyzer.node_lists.items.len; + var new_node_list = try ArrayList(Node.Index).initCapacity(analyzer.allocator, input.len); + try new_node_list.appendSlice(analyzer.allocator, input); + try analyzer.node_lists.append(analyzer.allocator, new_node_list); + + return .{ + .value = @intCast(index), + }; + } }; const Members = struct { len: usize, left: Node.Index, right: Node.Index, - - pub fn toRange(members: Members) Node.Range { - return switch (members.len) { - 0 => unreachable, - 1 => .{ - .start = members.left.value, - .end = members.left.value, - }, - 2 => .{ - .start = members.left.value, - .end = members.right.value, - }, - else => unreachable, - }; - } }; pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { @@ -657,10 +724,22 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R assert(node_index.value == 0); assert(node_index.valid); + const members = try analyzer.containerMembers(); - const member_range = members.toRange(); - analyzer.nodes.items[0].left = .{ .value = @intCast(member_range.start) }; - analyzer.nodes.items[0].right = .{ .value = @intCast(member_range.end) }; + + switch (members.len) { + 0 => unreachable, + 1 => { + analyzer.nodes.items[0].id = .main_one; + analyzer.nodes.items[0].left = members.left; + }, + 2 => { + analyzer.nodes.items[0].id = .main_two; + analyzer.nodes.items[0].left = members.left; + analyzer.nodes.items[0].right = members.right; + }, + else => unreachable, + } const end = std.time.Instant.now() catch unreachable; @@ -668,6 +747,7 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R return .{ .nodes = analyzer.nodes, + .node_lists = analyzer.node_lists, .time = end.since(start), }; } diff --git a/src/fs.zig b/src/fs.zig index c8c5963..a8ec0ec 100644 --- a/src/fs.zig +++ b/src/fs.zig @@ -1,8 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -pub const first = "src/test/main.b"; - pub fn readFile(allocator: Allocator, file_relative_path: []const u8) ![]const u8 { const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); return file; diff --git a/src/main.zig b/src/main.zig index 93052c3..6ddc0b3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -5,7 +5,7 @@ const assert = std.debug.assert; const Compilation = @import("Compilation.zig"); pub const seed = std.math.maxInt(u64); -const default_src_file = "src/test/main.b"; +const default_src_file = "src/test/main.nat"; pub fn main() !void { try singleCompilation(default_src_file); diff --git a/src/test/main.nat b/src/test/main.nat index 157bd8a..45bfaac 100644 --- a/src/test/main.nat +++ b/src/test/main.nat @@ -1,3 +1,3 @@ -const main = fn() i32 { +const main = fn() s32 { return 0; }; From bca2f024cd2232a089be5b741ac44e80cba07979 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sat, 30 Sep 2023 12:51:58 -0600 Subject: [PATCH 2/4] ir for main function --- src/Compilation.zig | 32 +++- src/backend/emit.zig | 7 +- src/backend/intermediate_representation.zig | 161 ++++++++++++++++++-- src/backend/x86_64.zig | 69 +++------ src/frontend/semantic_analyzer.zig | 5 + 5 files changed, 209 insertions(+), 65 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 6a7e357..0c56802 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -76,6 +76,20 @@ pub const Type = union(enum) { pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; + + pub fn getSize(type_info: Type) u64 { + return switch (type_info) { + .integer => |integer| integer.getSize(), + else => |t| @panic(@tagName(t)), + }; + } + + pub fn getAlignment(type_info: Type) u64 { + return switch (type_info) { + .integer => |integer| @min(16, integer.getSize()), + else => |t| @panic(@tagName(t)), + }; + } }; pub const Integer = struct { @@ -85,6 +99,10 @@ pub const Integer = struct { unsigned = 0, signed = 1, }; + + pub fn getSize(integer: Integer) u64 { + return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); + } }; /// A scope contains a bunch of declarations @@ -201,6 +219,7 @@ pub const Syscall = struct { pub const Call = struct { value: Value.Index, arguments: ArgumentList.Index, + type: Type.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; @@ -251,11 +270,11 @@ pub const Value = union(enum) { }; } - pub fn getType(value: *Value) !void { - switch (value.*) { + pub fn getType(value: *Value, module: *Module) Type.Index { + return switch (value.*) { + .call => |call_index| module.calls.get(call_index).type, else => |t| @panic(@tagName(t)), - } - unreachable; + }; } }; @@ -490,10 +509,7 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); - switch (@import("builtin").cpu.arch) { - .x86_64 => |arch| try emit.get(arch).initialize(compilation.base_allocator, &ir), - else => {}, - } + try emit.get(.x86_64).initialize(compilation.base_allocator, &ir); } fn generateAST() !void {} diff --git a/src/backend/emit.zig b/src/backend/emit.zig index ddc073c..5299d57 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -46,13 +46,18 @@ pub const Result = struct { break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; }, .linux, .macos => |os_tag| blk: { + const jit = switch (os_tag) { + .macos => 0x800, + .linux => 0, + else => unreachable, + }; const execute_flag: switch (os_tag) { .linux => u32, .macos => c_int, else => unreachable, } = if (flags.executable) std.os.PROT.EXEC else 0; const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); - const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; + const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit; break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); }, diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index e817d4a..643609f 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -10,16 +10,20 @@ const Package = Compilation.Package; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const BlockList = data_structures.BlockList; +const AutoHashMap = data_structures.AutoHashMap; pub const Result = struct { - functions: BlockList(Function) = .{}, blocks: BlockList(BasicBlock) = .{}, + calls: BlockList(Call) = .{}, + functions: BlockList(Function) = .{}, instructions: BlockList(Instruction) = .{}, jumps: BlockList(Jump) = .{}, - values: BlockList(Value) = .{}, - syscalls: BlockList(Syscall) = .{}, loads: BlockList(Load) = .{}, phis: BlockList(Phi) = .{}, + stores: BlockList(Store) = .{}, + syscalls: BlockList(Syscall) = .{}, + values: BlockList(Value) = .{}, + stack_references: BlockList(StackReference) = .{}, }; pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { @@ -62,10 +66,12 @@ pub const BasicBlock = struct { }; pub const Instruction = union(enum) { + call: Call.Index, jump: Jump.Index, load: Load.Index, phi: Phi.Index, ret: Ret, + store: Store.Index, syscall: Syscall.Index, @"unreachable", @@ -106,9 +112,34 @@ const Load = struct { pub const Index = List.Index; }; +const Store = struct { + source: Value.Index, + destination: StackReference.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const StackReference = struct { + size: u64, + alignment: u64, + offset: u64, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Call = struct { + function: Function.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + pub const Value = union(enum) { integer: Integer, load: Load.Index, + call: Call.Index, + stack_reference: StackReference.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -116,6 +147,8 @@ pub const Value = union(enum) { return switch (value) { .integer => false, .load => true, + .call => true, + .stack_reference => true, }; } }; @@ -138,11 +171,15 @@ pub const Builder = struct { current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, current_function_index: Function.Index = Function.Index.invalid, return_phi_node: Instruction.Index = Instruction.Index.invalid, + current_stack_offset: usize = 0, + stack_map: AutoHashMap(Compilation.Declaration.Index, StackReference.Index) = .{}, fn function(builder: *Builder, sema_function: Compilation.Function) !void { builder.current_function_index = (try builder.ir.functions.append(builder.allocator, .{})).index; // TODO: arguments builder.current_basic_block = try builder.newBlock(); + builder.current_stack_offset = 0; + builder.stack_map = .{}; const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); const is_noreturn = return_type.* == .noreturn; @@ -301,28 +338,77 @@ pub const Builder = struct { }); }, .declaration => |sema_declaration_index| { - _ = sema_declaration_index; - unreachable; + const sema_declaration = builder.module.declarations.get(sema_declaration_index); + assert(sema_declaration.scope_type == .local); + const sema_init_value = builder.module.values.get(sema_declaration.init_value); + const declaration_type = builder.module.types.get(sema_init_value.getType(builder.module)); + const size = declaration_type.getSize(); + const alignment = declaration_type.getAlignment(); + const stack_offset = switch (size > 0) { + true => builder.allocateStack(size, alignment), + false => 0, + }; + var value_index = try builder.emitValue(sema_declaration.init_value); + const value = builder.ir.values.get(value_index); + print("Value: {}\n", .{value.*}); + value_index = switch (value.isInMemory()) { + false => try builder.load(value_index), + true => value_index, + }; + + if (stack_offset > 0) { + _ = try builder.store(.{ + .source = value_index, + .destination = try builder.stackReference(stack_offset, declaration_type.*, sema_declaration_index), + }); + } }, else => |t| @panic(@tagName(t)), } } } + fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, value: Compilation.Declaration.Index) !StackReference.Index { + const stack_reference_allocation = try builder.ir.stack_references.append(builder.allocator, .{ + .offset = stack_offset, + .size = t.getSize(), + .alignment = t.getAlignment(), + }); + + const index = stack_reference_allocation.index; + + try builder.stack_map.put(builder.allocator, value, index); + + return index; + } + + fn store(builder: *Builder, descriptor: Store) !void { + const store_allocation = try builder.ir.stores.append(builder.allocator, descriptor); + _ = try builder.append(.{ + .store = store_allocation.index, + }); + } + + fn allocateStack(builder: *Builder, size: u64, alignment: u64) u64 { + builder.current_stack_offset = std.mem.alignForward(u64, builder.current_stack_offset, alignment); + builder.current_stack_offset += size; + return builder.current_stack_offset; + } + fn load(builder: *Builder, value_index: Value.Index) !Value.Index { print("Doing load!\n", .{}); - const load_index = try builder.ir.loads.append(builder.allocator, .{ + const load_allocation = try builder.ir.loads.append(builder.allocator, .{ .value = value_index, }); const instruction_index = try builder.append(.{ - .load = load_index, + .load = load_allocation.index, }); _ = instruction_index; const result = try builder.ir.values.append(builder.allocator, .{ - .load = load_index, + .load = load_allocation.index, }); - return result; + return result.index; } fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Value.Index { @@ -335,14 +421,65 @@ pub const Builder = struct { .sign = false, }, })).index, + .call => |sema_call_index| { + const sema_call = builder.module.calls.get(sema_call_index); + const argument_list_index = sema_call.arguments; + if (argument_list_index.valid) { + unreachable; + } + + const call_index = try builder.call(.{ + .function = switch (builder.module.values.get(sema_call.value).*) { + .function => |function_index| .{ + .index = function_index.index, + .block = function_index.block, + }, + else => |t| @panic(@tagName(t)), + }, + }); + + _ = try builder.append(.{ + .call = call_index, + }); + + const value_allocation = try builder.ir.values.append(builder.allocator, .{ + .call = call_index, + }); + + return value_allocation.index; + }, + .declaration_reference => |sema_declaration_index| { + const sema_declaration = builder.module.declarations.get(sema_declaration_index); + const sema_init_value = builder.module.values.get(sema_declaration.init_value); + const init_type = sema_init_value.getType(builder.module); + _ = init_type; + switch (sema_declaration.scope_type) { + .local => { + const stack_reference = builder.stack_map.get(sema_declaration_index).?; + const value = try builder.ir.values.append(builder.allocator, .{ + .stack_reference = stack_reference, + }); + return value.index; + }, + .global => unreachable, + } + // switch (sema_declaration.*) { + // else => |t| @panic(@tagName(t)), + // } + }, else => |t| @panic(@tagName(t)), }; } - fn jump(builder: *Builder, jump_descriptor: Jump) !Jump.Index { - const destination_block = builder.ir.blocks.get(jump_descriptor.destination); + fn call(builder: *Builder, descriptor: Call) !Call.Index { + const call_allocation = try builder.ir.calls.append(builder.allocator, descriptor); + return call_allocation.index; + } + + fn jump(builder: *Builder, descriptor: Jump) !Jump.Index { + const destination_block = builder.ir.blocks.get(descriptor.destination); assert(!destination_block.sealed); - const jump_allocation = try builder.ir.jumps.append(builder.allocator, jump_descriptor); + const jump_allocation = try builder.ir.jumps.append(builder.allocator, descriptor); return jump_allocation.index; } diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 9f473ec..1963e19 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -54,23 +54,6 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * }); // TODO } else unreachable; - // if (integer.value == 0) { - // try function.instructions.append(instruction_selector.allocator, .{ - // .xor_reg32_reg32 = .{ - // .destination = syscall_register, - // .source = syscall_register, - // }, - // }); - // } else if (integer.value < std.math.maxInt(u32)) { - // try function.instructions.append(instruction_selector.allocator, .{ - // .mov_reg_imm32 = .{ - // .destination = syscall_register, - // .source = @intCast(integer.value), - // }, - // }); - // } else { - // unreachable; - // } }, else => |t| @panic(@tagName(t)), } @@ -83,14 +66,14 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * .phi => unreachable, .ret => unreachable, .jump => |jump_index| { - _ = jump_index; - // const jump = intermediate.jumps.get(jump_index); - // const relocation = LocalRelative{ - // .instruction = .jmp_rel_8, - // .source = @intCast(function.block_map.get(jump.source) orelse unreachable), - // .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), - // .offset_in_block = function.block_byte_count, - // }; + const jump = intermediate.jumps.get(jump_index); + const relocation = Displacement{ + .size = .one, + .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + .offset_in_block = function.block_byte_count, + }; + _ = relocation; // const index = function.instructions.items.len; // try function.relocations.append(instruction_selector.allocator, @intCast(index)); // try function.instructions.append(instruction_selector.allocator, .{ @@ -98,6 +81,8 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * // }); unreachable; }, + .call => unreachable, + .store => unreachable, } } @@ -115,6 +100,13 @@ const RegisterMemoryRegister = struct { direct: bool, }; +const Displacement = struct { + size: Size, + source: u16, + destination: u16, + offset_in_block: u16, +}; + const RmResult = struct { rex: Rex, mod_rm: ModRm, @@ -215,24 +207,7 @@ pub fn emitInstruction(result: *emit.Result, instruction: Instruction, intermedi result.appendCodeByte(opcode_byte); emitImmediate(result, intermediate, register_immediate.immediate, register_immediate.immediate_size); }, - // .jmp_rel_8 => unreachable, //result.appendOnlyOpcodeSkipInstructionBytes(instruction), - // inline .mov_reg_imm32 => |content, tag| { - // _ = tag; - // _ = content; - // // const descriptor = instruction_descriptors.get(tag); - // // result.writeOpcode(descriptor.opcode); - // // result.appendCodeByte(descriptor.getOpcode()[0] | @intFromEnum(content.destination)); - // // result.appendCode(std.mem.asBytes(&content.source)); - // unreachable; - // }, - // inline .xor_reg32_reg32 => |content, tag| { - // _ = tag; - // _ = content; - // // const descriptor = instruction_descriptors.get(tag); - // // result.appendCodeByte(descriptor.getOpcode()[0]); - // // result.appendCodeByte(0xc0 | @as(u8, @intFromEnum(content.source)) << 4 | @intFromEnum(content.destination)); - // unreachable; - // }, + .jmp_rel => unreachable, inline .syscall, .ud2 => |_, tag| { const opcode = tag.getOpcode(&.{}); result.appendCode(opcode); @@ -244,6 +219,7 @@ pub fn emitInstruction(result: *emit.Result, instruction: Instruction, intermedi pub const Instruction = union(Id) { xor_rm_r: RegisterMemoryRegister, mov_r_imm: RegisterImmediate, + jmp_rel: Displacement, // jmp_rel_8: LocalRelative, // mov_reg_imm32: struct { // destination: GPRegister, @@ -259,7 +235,7 @@ pub const Instruction = union(Id) { const Id = enum { xor_rm_r, mov_r_imm, - // jmp_rel_8, + jmp_rel, // mov_reg_imm32, // xor_reg32_reg32, syscall, @@ -277,6 +253,11 @@ pub const Instruction = union(Id) { .one => &.{0x30}, .two, .four, .eight => &.{0x31}, }, + .jmp_rel => switch (operands[0].displacement.size) { + .one => unreachable, + .four => unreachable, + else => unreachable, + }, }; } }; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 438cd95..289862b 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -396,10 +396,15 @@ const Analyzer = struct { .call_one => blk: { const this_value_node_index = node.left; const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, this_value_node_index); + const value_type = switch (this_value_allocation.ptr.*) { + .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.functions.get(function_index).prototype).return_type, + else => |t| @panic(@tagName(t)), + }; const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ .value = this_value_allocation.index, .arguments = ArgumentList.Index.invalid, + .type = value_type, }); break :blk .{ .call = call_allocation.index, From c7bcfa1de92c1d96ae63671d3448d9660af9b602 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sun, 1 Oct 2023 14:01:59 -0600 Subject: [PATCH 3/4] instruction selection and register allocation --- src/Compilation.zig | 35 +- src/backend/emit.zig | 78 ++-- src/backend/intermediate_representation.zig | 381 +++++++++++++----- src/backend/x86_64.zig | 403 +++++++++++++++++++- src/data_structures.zig | 1 + src/frontend/semantic_analyzer.zig | 46 ++- 6 files changed, 777 insertions(+), 167 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 0c56802..54f89fa 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -71,12 +71,25 @@ pub const Type = union(enum) { void, noreturn, bool, - integer: Integer, + integer: Type.Integer, @"struct": Struct.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; + pub const Integer = struct { + bit_count: u16, + signedness: Signedness, + pub const Signedness = enum(u1) { + unsigned = 0, + signed = 1, + }; + + pub fn getSize(integer: Type.Integer) u64 { + return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); + } + }; + pub fn getSize(type_info: Type) u64 { return switch (type_info) { .integer => |integer| integer.getSize(), @@ -92,19 +105,6 @@ pub const Type = union(enum) { } }; -pub const Integer = struct { - bit_count: u16, - signedness: Signedness, - pub const Signedness = enum(u1) { - unsigned = 0, - signed = 1, - }; - - pub fn getSize(integer: Integer) u64 { - return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); - } -}; - /// A scope contains a bunch of declarations pub const Scope = struct { declarations: AutoHashMap(u32, Declaration.Index) = .{}, @@ -253,7 +253,7 @@ pub const Value = union(enum) { runtime: Runtime, assign: Assignment.Index, type: Type.Index, - integer: u64, + integer: Integer, syscall: Syscall.Index, call: Call.Index, argument_list: ArgumentList, @@ -278,6 +278,11 @@ pub const Value = union(enum) { } }; +pub const Integer = struct { + value: u64, + type: Type.Integer, +}; + pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 5299d57..97c4c2e 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -107,16 +107,14 @@ pub fn InstructionSelector(comptime Instruction: type) type { pub const Function = struct { instructions: ArrayList(Instruction) = .{}, - block_byte_counts: ArrayList(u16), - block_offsets: ArrayList(u32), relocations: ArrayList(u32) = .{}, block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, - byte_count: u32 = 0, - block_byte_count: u16 = 0, - pub fn selectInstruction(function: *Function, allocator: Allocator, instruction: Instruction) !void { + pub fn addInstruction(function: *Function, allocator: Allocator, instruction: Instruction) !u32 { + const index = function.instructions.items.len; try function.instructions.append(allocator, instruction); - function.block_byte_count += Instruction.descriptors.get(instruction).size; + + return @intCast(index); } }; @@ -130,45 +128,43 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { else => @compileError("Architecture not supported"), }; const Instruction = backend.Instruction; + _ = Instruction; return struct { pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { var result = try Result.create(); - var function_iterator = intermediate.functions.iterator(); - const IS = InstructionSelector(Instruction); - var instruction_selector = IS{ - .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), - .allocator = allocator, - }; - - while (function_iterator.next()) |ir_function| { - const function = instruction_selector.functions.addOneAssumeCapacity(); - function.* = .{ - .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), - .block_offsets = try ArrayList(u32).initCapacity(allocator, ir_function.blocks.items.len), - }; - try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); - for (ir_function.blocks.items, 0..) |block_index, index| { - function.block_map.putAssumeCapacity(block_index, @intCast(index)); - } - - for (ir_function.blocks.items) |block_index| { - const block = intermediate.blocks.get(block_index); - function.block_offsets.appendAssumeCapacity(function.byte_count); - function.block_byte_count = 0; - for (block.instructions.items) |instruction_index| { - const instruction = intermediate.instructions.get(instruction_index).*; - try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); - } - - function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); - function.byte_count += function.block_byte_count; - } - } - - for (instruction_selector.functions.items) |function| { - for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); - } + var mir = try backend.MIR.generate(allocator, intermediate); + try mir.allocateRegisters(allocator, intermediate); + // var function_iterator = intermediate.functions.iterator(); + // const IS = InstructionSelector(Instruction); + // var instruction_selector = IS{ + // .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), + // .allocator = allocator, + // }; + // + // while (function_iterator.next()) |ir_function| { + // const function = instruction_selector.functions.addOneAssumeCapacity(); + // function.* = .{}; + // try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); + // for (ir_function.blocks.items, 0..) |block_index, index| { + // function.block_map.putAssumeCapacity(block_index, @intCast(index)); + // } + // + // for (ir_function.blocks.items) |block_index| { + // const block = intermediate.blocks.get(block_index); + // for (block.instructions.items) |instruction_index| { + // const instruction = intermediate.instructions.get(instruction_index).*; + // try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); + // } + // + // // function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); + // // function.byte_count += function.block_byte_count; + // } + // } + // + // for (instruction_selector.functions.items) |function| { + // for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); + // } // for (instruction_selector.functions.items) |function| { // var fix_size: bool = false; diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 643609f..0ff9b3e 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -10,6 +10,7 @@ const Package = Compilation.Package; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const BlockList = data_structures.BlockList; +const AutoArrayHashMap = data_structures.AutoArrayHashMap; const AutoHashMap = data_structures.AutoHashMap; pub const Result = struct { @@ -38,9 +39,13 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, }; while (function_iterator.next()) |sema_function| { - print("\nFunction: {}\n", .{sema_function}); + const function_index = try builder.buildFunction(sema_function); + try builder.optimizeFunction(function_index); + } - try builder.function(sema_function); + var ir_function_iterator = builder.ir.functions.iterator(); + while (ir_function_iterator.nextPointer()) |function| { + print("\n{}\n", .{function}); } return builder.ir; @@ -70,9 +75,10 @@ pub const Instruction = union(enum) { jump: Jump.Index, load: Load.Index, phi: Phi.Index, - ret: Ret, + ret: Value.Index, store: Store.Index, - syscall: Syscall.Index, + syscall: Value.Index, + copy: Value.Index, @"unreachable", pub const List = BlockList(@This()); @@ -88,10 +94,6 @@ const Phi = struct { pub const Index = List.Index; }; -const Ret = struct { - value: Instruction.Index, -}; - pub const Jump = struct { source: BasicBlock.Index, destination: BasicBlock.Index, @@ -114,12 +116,12 @@ const Load = struct { const Store = struct { source: Value.Index, - destination: StackReference.Index, + destination: Value.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; -const StackReference = struct { +pub const StackReference = struct { size: u64, alignment: u64, offset: u64, @@ -127,7 +129,7 @@ const StackReference = struct { pub const Index = List.Index; }; -const Call = struct { +pub const Call = struct { function: Function.Index, pub const List = BlockList(@This()); @@ -136,10 +138,13 @@ const Call = struct { }; pub const Value = union(enum) { - integer: Integer, + integer: Compilation.Integer, load: Load.Index, call: Call.Index, stack_reference: StackReference.Index, + phi: Phi.Index, + instruction: Instruction.Index, + syscall: Syscall.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -149,88 +154,280 @@ pub const Value = union(enum) { .load => true, .call => true, .stack_reference => true, + .phi => unreachable, + .instruction => unreachable, + .syscall => unreachable, }; } }; -const Integer = struct { - value: u64, - sign: bool, -}; - -const Function = struct { +pub const Function = struct { blocks: ArrayList(BasicBlock.Index) = .{}, + stack_map: AutoHashMap(Compilation.Declaration.Index, Value.Index) = .{}, + current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, + return_phi_node: Instruction.Index = Instruction.Index.invalid, + return_phi_block: BasicBlock.Index = BasicBlock.Index.invalid, + ir: *Result, + current_stack_offset: usize = 0, pub const List = BlockList(@This()); pub const Index = List.Index; + + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try writer.writeAll("Function:\n"); + for (function.blocks.items, 0..) |block_index, function_block_index| { + try writer.print("#{}:\n", .{function_block_index}); + const block = function.ir.blocks.get(block_index); + for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { + try writer.print("%{}: ", .{block_instruction_index}); + const instruction = function.ir.instructions.get(instruction_index).*; + try writer.print("{s}", .{@tagName(instruction)}); + try writer.writeByte('\n'); + } + try writer.writeByte('\n'); + } + _ = options; + _ = fmt; + } }; pub const Builder = struct { allocator: Allocator, ir: Result = .{}, module: *Module, - current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, current_function_index: Function.Index = Function.Index.invalid, - return_phi_node: Instruction.Index = Instruction.Index.invalid, - current_stack_offset: usize = 0, - stack_map: AutoHashMap(Compilation.Declaration.Index, StackReference.Index) = .{}, - fn function(builder: *Builder, sema_function: Compilation.Function) !void { - builder.current_function_index = (try builder.ir.functions.append(builder.allocator, .{})).index; + fn currentFunction(builder: *Builder) *Function { + return builder.ir.functions.get(builder.current_function_index); + } + + fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !Function.Index { + const function_allocation = try builder.ir.functions.append(builder.allocator, .{ + .ir = &builder.ir, + }); + builder.current_function_index = function_allocation.index; + const function = function_allocation.ptr; // TODO: arguments - builder.current_basic_block = try builder.newBlock(); - builder.current_stack_offset = 0; - builder.stack_map = .{}; + function.current_basic_block = try builder.newBlock(); const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); const is_noreturn = return_type.* == .noreturn; if (!is_noreturn) { const exit_block = try builder.newBlock(); - const phi = try builder.ir.phis.addOne(builder.allocator); const phi_instruction = try builder.appendToBlock(exit_block, .{ - .phi = phi.index, + .phi = Phi.Index.invalid, }); - phi.ptr.* = .{ - .value = Value.Index.invalid, - .jump = Jump.Index.invalid, - .block = exit_block, - .next = Phi.Index.invalid, - }; + // phi.ptr.* = .{ + // .value = Value.Index.invalid, + // .jump = Jump.Index.invalid, + // .block = exit_block, + // .next = Phi.Index.invalid, + // }; const ret = try builder.appendToBlock(exit_block, .{ - .ret = .{ - .value = phi_instruction, - }, + .ret = (try builder.ir.values.append(builder.allocator, .{ + .instruction = phi_instruction, + })).index, }); _ = ret; - builder.return_phi_node = phi_instruction; + function.return_phi_node = phi_instruction; + function.return_phi_block = exit_block; } const sema_block = sema_function.getBodyBlock(builder.module); try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); - try builder.dumpFunction(std.io.getStdErr().writer(), builder.current_function_index); + builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + + return builder.current_function_index; } - fn dumpFunction(builder: *Builder, writer: anytype, index: Function.Index) !void { - const f = builder.ir.functions.get(index); - try writer.writeAll("Hello world!\n"); - print("Function blocks: {}\n", .{f.blocks.items.len}); - var function_instruction_index: usize = 0; - for (f.blocks.items, 0..) |block_index, function_block_index| { - print("#{}:\n", .{function_block_index}); - const function_block = builder.ir.blocks.get(block_index); - for (function_block.instructions.items) |instruction_index| { - const instruction = builder.ir.instructions.get(instruction_index); - print("%{}: {}\n", .{ function_instruction_index, instruction }); - function_instruction_index += 1; - } + const BlockSearcher = struct { + to_visit: ArrayList(BasicBlock.Index) = .{}, + visited: AutoArrayHashMap(BasicBlock.Index, void) = .{}, + }; - print("\n", .{}); + fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) ![]const BasicBlock.Index { + var searcher = BlockSearcher{}; + try searcher.to_visit.append(builder.allocator, first); + try searcher.visited.put(builder.allocator, first, {}); + + while (searcher.to_visit.items.len > 0) { + const block_index = searcher.to_visit.swapRemove(0); + const block_to_visit = builder.ir.blocks.get(block_index); + const last_instruction_index = block_to_visit.instructions.items[block_to_visit.instructions.items.len - 1]; + const last_instruction = builder.ir.instructions.get(last_instruction_index); + switch (last_instruction.*) { + .jump => |jump_index| { + const ir_jump = builder.ir.jumps.get(jump_index); + assert(ir_jump.source.eq(block_index)); + const new_block = ir_jump.destination; + if (searcher.visited.get(new_block) == null) { + try searcher.to_visit.append(builder.allocator, new_block); + try searcher.visited.put(builder.allocator, new_block, {}); + } + }, + .@"unreachable", .ret => {}, + else => |t| @panic(@tagName(t)), + } + } + + return searcher.visited.keys(); + } + + fn optimizeFunction(builder: *Builder, function_index: Function.Index) !void { + const function = builder.ir.functions.get(function_index); + const reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); + var did_something = true; + + while (did_something) { + did_something = false; + for (reachable_blocks) |basic_block_index| { + const basic_block = builder.ir.blocks.get(basic_block_index); + for (basic_block.instructions.items) |instruction_index| { + did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks, instruction_index); + did_something = did_something or try builder.removeTrivialPhis(instruction_index); + did_something = did_something or try builder.removeCopies(instruction_index); + } + } } } + fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .phi => blk: { + var did_something = false; + var head = &instruction.phi; + next: while (head.valid) { + const phi = builder.ir.phis.get(head.*); + const phi_jump = builder.ir.jumps.get(phi.jump); + assert(phi_jump.source.valid); + + for (reachable_blocks) |block_index| { + if (phi_jump.source.eq(block_index)) { + head = &phi.next; + continue :next; + } + } + + head.* = phi.next; + did_something = true; + } + + break :blk did_something; + }, + else => false, + }; + } + + fn removeTrivialPhis(builder: *Builder, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .phi => |phi_index| blk: { + const trivial_phi: ?Value.Index = trivial_blk: { + var only_value = Value.Index.invalid; + var it = phi_index; + + while (it.valid) { + const phi = builder.ir.phis.get(it); + const phi_value = builder.ir.values.get(phi.value); + if (phi_value.* == .phi) unreachable; + // TODO: undefined + if (only_value.valid) { + if (!only_value.eq(phi.value)) { + break :trivial_blk null; + } + } else { + only_value = phi.value; + } + + it = phi.next; + } + + break :trivial_blk only_value; + }; + + if (trivial_phi) |trivial_value| { + if (trivial_value.valid) { + // Option to delete + const delete = false; + if (delete) { + unreachable; + } else { + instruction.* = .{ + .copy = trivial_value, + }; + } + } else { + unreachable; + } + } + + break :blk instruction.* != .phi; + }, + else => false, + }; + } + + fn removeCopies(builder: *Builder, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .copy => false, + else => { + var did_something = false; + + const operands: []const *Value.Index = switch (instruction.*) { + .jump, .@"unreachable" => &.{}, + .ret => &.{&instruction.ret}, + // TODO: arguments + .call => blk: { + var list = ArrayList(*Value.Index){}; + break :blk list.items; + }, + .store => |store_index| blk: { + const store_instr = builder.ir.stores.get(store_index); + break :blk &.{ &store_instr.source, &store_instr.destination }; + }, + .syscall => |syscall_value_index| blk: { + const syscall_value = builder.ir.values.get(syscall_value_index); + const syscall = builder.ir.syscalls.get(syscall_value.syscall); + var list = ArrayList(*Value.Index){}; + try list.ensureTotalCapacity(builder.allocator, syscall.arguments.items.len); + for (syscall.arguments.items) |*arg| { + list.appendAssumeCapacity(arg); + } + + break :blk list.items; + }, + else => |t| @panic(@tagName(t)), + }; + + for (operands) |operand_value_index| { + const operand_value = builder.ir.values.get(operand_value_index.*); + switch (operand_value.*) { + .instruction => |operand_instruction_index| { + const operand_instruction = builder.ir.instructions.get(operand_instruction_index); + switch (operand_instruction.*) { + .copy => |copy_value| { + operand_value_index.* = copy_value; + did_something = true; + }, + else => |t| @panic(@tagName(t)), + } + }, + .integer, .stack_reference, .call => {}, + else => |t| @panic(@tagName(t)), + } + } + + return did_something; + }, + }; + } + fn blockInsideBasicBlock(builder: *Builder, sema_block: *Compilation.Block, block_index: BasicBlock.Index) !BasicBlock.Index { - builder.current_basic_block = block_index; + const current_function = builder.currentFunction(); + current_function.current_basic_block = block_index; try builder.block(sema_block, .{}); - return builder.current_basic_block; + return current_function.current_basic_block; } const BlockOptions = packed struct { @@ -253,7 +450,7 @@ pub const Builder = struct { else => |t| @panic(@tagName(t)), }; - const original_block = builder.current_basic_block; + const original_block = builder.currentFunction().current_basic_block; const jump_to_loop = try builder.append(.{ .jump = undefined, }); @@ -271,7 +468,7 @@ pub const Builder = struct { }); const sema_body_block = builder.module.blocks.get(sema_loop_body.block); - builder.current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); + builder.currentFunction().current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); if (loop_prologue_block.valid) { builder.ir.blocks.get(loop_prologue_block).seal(); } @@ -279,20 +476,20 @@ pub const Builder = struct { if (sema_body_block.reaches_end) { _ = try builder.append(.{ .jump = try builder.jump(.{ - .source = builder.current_basic_block, + .source = builder.currentFunction().current_basic_block, .destination = loop_head_block, }), }); } - builder.ir.blocks.get(builder.current_basic_block).filled = true; + builder.ir.blocks.get(builder.currentFunction().current_basic_block).filled = true; builder.ir.blocks.get(loop_body_block).seal(); if (!loop_head_block.eq(loop_body_block)) { unreachable; } if (loop_prologue_block.valid) { - builder.current_basic_block = loop_prologue_block; + builder.currentFunction().current_basic_block = loop_prologue_block; } }, .syscall => |syscall_index| { @@ -307,13 +504,16 @@ pub const Builder = struct { for (sema_syscall.getArguments()) |sema_syscall_argument| { assert(sema_syscall_argument.valid); - const argument_value_index = try builder.emitValue(sema_syscall_argument); + var argument_value_index = try builder.emitValue(sema_syscall_argument); arguments.appendAssumeCapacity(argument_value_index); } + // TODO: undo this mess _ = try builder.append(.{ - .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ - .arguments = arguments, + .syscall = (try builder.ir.values.append(builder.allocator, .{ + .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ + .arguments = arguments, + })).index, })).index, }); }, @@ -323,9 +523,19 @@ pub const Builder = struct { .@"return" => |sema_ret_index| { const sema_ret = builder.module.returns.get(sema_ret_index); const return_value = try builder.emitValue(sema_ret.value); - const phi_instruction = builder.ir.instructions.get(builder.return_phi_node); - const phi = builder.ir.phis.get(phi_instruction.phi); - const exit_jump = try builder.jump(.{ .source = builder.current_basic_block, .destination = phi.block }); + const phi_instruction = builder.ir.instructions.get(builder.currentFunction().return_phi_node); + const phi = switch (phi_instruction.phi.valid) { + true => unreachable, + false => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, + }; //builder.ir.phis.get(phi_instruction.phi); + const exit_jump = try builder.jump(.{ + .source = builder.currentFunction().current_basic_block, + .destination = switch (phi_instruction.phi.valid) { + true => phi.block, + false => builder.currentFunction().return_phi_block, + }, + }); + print("Previous phi: {}\n", .{phi_instruction.phi}); phi_instruction.phi = (try builder.ir.phis.append(builder.allocator, .{ .value = return_value, .jump = exit_jump, @@ -368,18 +578,20 @@ pub const Builder = struct { } } - fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, value: Compilation.Declaration.Index) !StackReference.Index { + fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, sema_declaration: Compilation.Declaration.Index) !Value.Index { const stack_reference_allocation = try builder.ir.stack_references.append(builder.allocator, .{ .offset = stack_offset, .size = t.getSize(), .alignment = t.getAlignment(), }); - const index = stack_reference_allocation.index; + const value_allocation = try builder.ir.values.append(builder.allocator, .{ + .stack_reference = stack_reference_allocation.index, + }); - try builder.stack_map.put(builder.allocator, value, index); + try builder.currentFunction().stack_map.put(builder.allocator, sema_declaration, value_allocation.index); - return index; + return value_allocation.index; } fn store(builder: *Builder, descriptor: Store) !void { @@ -390,9 +602,9 @@ pub const Builder = struct { } fn allocateStack(builder: *Builder, size: u64, alignment: u64) u64 { - builder.current_stack_offset = std.mem.alignForward(u64, builder.current_stack_offset, alignment); - builder.current_stack_offset += size; - return builder.current_stack_offset; + builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); + builder.currentFunction().current_stack_offset += size; + return builder.currentFunction().current_stack_offset; } fn load(builder: *Builder, value_index: Value.Index) !Value.Index { @@ -416,10 +628,7 @@ pub const Builder = struct { return switch (sema_value) { // TODO .integer => |integer| (try builder.ir.values.append(builder.allocator, .{ - .integer = .{ - .value = integer, - .sign = false, - }, + .integer = integer, })).index, .call => |sema_call_index| { const sema_call = builder.module.calls.get(sema_call_index); @@ -455,11 +664,8 @@ pub const Builder = struct { _ = init_type; switch (sema_declaration.scope_type) { .local => { - const stack_reference = builder.stack_map.get(sema_declaration_index).?; - const value = try builder.ir.values.append(builder.allocator, .{ - .stack_reference = stack_reference, - }); - return value.index; + const stack_reference = builder.currentFunction().stack_map.get(sema_declaration_index).?; + return stack_reference; }, .global => unreachable, } @@ -484,11 +690,16 @@ pub const Builder = struct { } fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { - assert(builder.current_basic_block.valid); - return builder.appendToBlock(builder.current_basic_block, instruction); + assert(builder.current_function_index.valid); + const current_function = builder.currentFunction(); + assert(current_function.current_basic_block.valid); + return builder.appendToBlock(current_function.current_basic_block, instruction); } fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { + if (instruction == .phi) { + print("Adding phi: {}\n", .{instruction}); + } const instruction_allocation = try builder.ir.instructions.append(builder.allocator, instruction); try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_allocation.index); diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 1963e19..7e316ab 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -1,10 +1,18 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; const assert = std.debug.assert; const print = std.debug.print; const emit = @import("emit.zig"); const ir = @import("./intermediate_representation.zig"); +const Compilation = @import("../Compilation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const AutoArrayHashMap = data_structures.AutoArrayHashMap; + const InstructionSelector = emit.InstructionSelector(Instruction); +const x86_64 = @This(); const Size = enum(u2) { one = 0, @@ -13,9 +21,372 @@ const Size = enum(u2) { eight = 3, }; +pub const MIR = struct { + functions: ArrayList(Function) = .{}, + const GPRegister = struct { + value: ?x86_64.GPRegister = null, + can_omit_if_present: bool = true, + }; + const Stack = struct { + offset: u64, + }; + const Function = struct { + instructions: ArrayList(MIR.Instruction) = .{}, + blocks: AutoArrayHashMap(ir.BasicBlock.Index, u32) = .{}, + }; + const Instruction = struct { + operands: [4]Operand, + ir: ir.Instruction.Index, + id: Id, + operand_count: u8 = 0, + + pub fn getOperands(instruction: *MIR.Instruction) []Operand { + return instruction.operands[0..instruction.operand_count]; + } + + const Id = enum(u16) { + call, + jmp, + mov, + push, + ret, + sub, + syscall, + ud2, + }; + + fn new(id: Id, reference: ir.Instruction.Index, operands: []const Operand) MIR.Instruction { + var out_operands: [4]Operand = undefined; + @memset(std.mem.asBytes(&out_operands), 0); + @memcpy(out_operands[0..operands.len], operands); + + return .{ + .operands = out_operands, + .ir = reference, + .id = id, + .operand_count = @intCast(operands.len), + }; + } + + const Operand = union(enum) { + gp_register: MIR.GPRegister, + fp_register, + memory, + relative: union(enum) { + block: ir.BasicBlock.Index, + function: ir.Function.Index, + }, + immediate: Compilation.Integer, + stack: Stack, + }; + }; + + const RegisterUse = union(enum) { + general, + ret, + param: x86_64.GPRegister, + syscall_param: x86_64.GPRegister, + }; + + fn movRegImm(function: *Function, allocator: Allocator, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse) !void { + if (integer.type.bit_count <= @bitSizeOf(u64)) { + switch (integer.type.signedness) { + .signed, .unsigned => { + if (integer.value <= std.math.maxInt(u32)) { + try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ + .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => .a, + .param => unreachable, + .syscall_param => |register| register, + }, + }, + }, + .{ .immediate = integer }, + })); + } else { + unreachable; + } + }, + } + } else { + unreachable; + } + } + + fn movRegStack(function: *Function, allocator: Allocator, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { + if (stack_reference.size <= @sizeOf(u64)) { + switch (stack_reference.size) { + @sizeOf(u8) => unreachable, + @sizeOf(u16) => unreachable, + @sizeOf(u32) => { + try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ + .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => unreachable, + .param => unreachable, + .syscall_param => |syscall_register| syscall_register, + }, + }, + }, + .{ + .stack = .{ + .offset = stack_reference.offset, + }, + }, + })); + }, + @sizeOf(u64) => unreachable, + else => unreachable, + } + } else { + unreachable; + } + } + + pub fn generate(allocator: Allocator, intermediate: *ir.Result) !MIR { + var mir = MIR{}; + try mir.functions.ensureTotalCapacity(allocator, intermediate.functions.len); + var ir_function_it = intermediate.functions.iterator(); + + while (ir_function_it.nextPointer()) |ir_function| { + const function = mir.functions.addOneAssumeCapacity(); + function.* = .{}; + try function.blocks.ensureTotalCapacity(allocator, ir_function.blocks.items.len); + for (ir_function.blocks.items) |block_index| { + function.blocks.putAssumeCapacity(block_index, @intCast(function.instructions.items.len)); + const basic_block = intermediate.blocks.get(block_index); + + if (ir_function.current_stack_offset > 0) { + // TODO: switch on ABI + try function.instructions.append(allocator, MIR.Instruction.new(.push, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .bp } }, + })); + + try function.instructions.append(allocator, MIR.Instruction.new(.mov, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .bp } }, + .{ .gp_register = .{ .value = .sp } }, + })); + + try function.instructions.append(allocator, MIR.Instruction.new(.sub, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .sp } }, + .{ + .immediate = Compilation.Integer{ + .value = ir_function.current_stack_offset, + .type = .{ + .bit_count = 8, + .signedness = .unsigned, + }, + }, + }, + })); + } + + for (basic_block.instructions.items) |instruction_index| { + const instruction = intermediate.instructions.get(instruction_index); + switch (instruction.*) { + .jump => |jump_index| { + const jump = intermediate.jumps.get(jump_index); + try function.instructions.append(allocator, MIR.Instruction.new(.jmp, instruction_index, &.{ + .{ .relative = .{ .block = jump.destination } }, + })); + }, + .copy => |copy_value_index| { + const copy_value = intermediate.values.get(copy_value_index); + switch (copy_value.*) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .general), + else => |t| @panic(@tagName(t)), + } + }, + .ret => |ret_value_index| { + const ret_value = intermediate.values.get(ret_value_index); + switch (ret_value.*) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .ret), + else => |t| @panic(@tagName(t)), + } + + if (ir_function.current_stack_offset > 0) { + unreachable; + } + + try function.instructions.append(allocator, MIR.Instruction.new(.ret, instruction_index, &.{})); + }, + .call => |call_value_index| { + // TODO: args + const call = intermediate.calls.get(call_value_index); + try function.instructions.append(allocator, MIR.Instruction.new(.call, instruction_index, &.{ + .{ .relative = .{ .function = call.function } }, + })); + }, + .store => |store_index| { + const store = intermediate.stores.get(store_index); + const source_value = intermediate.values.get(store.source); + const destination_value = intermediate.values.get(store.destination); + switch (destination_value.*) { + .stack_reference => |stack_reference_index| { + const stack_reference = intermediate.stack_references.get(stack_reference_index); + print("stack ref: {}\n", .{stack_reference}); + switch (source_value.*) { + .call => |call_index| { + try storeFunctionCallResult(allocator, function, intermediate, instruction_index, stack_reference.*, call_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + } + }, + .syscall => |syscall_value_index| { + const syscall_value = intermediate.values.get(syscall_value_index); + const syscall = intermediate.syscalls.get(syscall_value.syscall); + for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { + const argument = intermediate.values.get(argument_index).*; + switch (argument) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .{ .syscall_param = syscall_register }), + .stack_reference => |stack_reference_index| { + const stack_reference = intermediate.stack_references.get(stack_reference_index); + try movRegStack(function, allocator, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + } + + try function.instructions.append(allocator, MIR.Instruction.new(.syscall, instruction_index, &.{})); + }, + .@"unreachable" => try function.instructions.append(allocator, MIR.Instruction.new(.ud2, instruction_index, &.{})), + else => |t| @panic(@tagName(t)), + } + } + } + } + + return mir; + } + + const RegisterAllocator = struct { + gp_registers: RegisterSet(x86_64.GPRegister) = .{}, + + fn init(allocator: Allocator) !RegisterAllocator { + var register_allocator = RegisterAllocator{}; + try register_allocator.gp_registers.free.ensureTotalCapacity(allocator, @typeInfo(x86_64.GPRegister).Enum.fields.len); + inline for (@typeInfo(x86_64.GPRegister).Enum.fields) |enum_field| { + register_allocator.gp_registers.free.putAssumeCapacity(@field(x86_64.GPRegister, enum_field.name), {}); + } + + return register_allocator; + } + }; + + fn RegisterSet(comptime RegisterEnum: type) type { + return struct { + used: AutoArrayHashMap(RegisterEnum, ir.Value.Index) = .{}, + free: AutoArrayHashMap(RegisterEnum, void) = .{}, + + fn allocate(register_set: *@This(), allocator: Allocator, register: RegisterEnum, intermediate: *ir.Result, instruction: MIR.Instruction, value_index: ir.Value.Index) !void { + switch (intermediate.instructions.get(instruction.ir).*) { + .store => {}, + else => { + switch (register_set.free.orderedRemove(register)) { + true => try register_set.used.put(allocator, register, value_index), + false => unreachable, + } + }, + } + } + }; + } + + fn getValueFromInstruction(intermediate: *ir.Result, instruction_index: ir.Instruction.Index) ir.Value.Index { + const instruction = intermediate.instructions.get(instruction_index); + const value_index: ir.Value.Index = switch (instruction.*) { + .copy, .ret, .syscall => |value_index| value_index, + .store => |store_index| blk: { + const store = intermediate.stores.get(store_index); + break :blk store.source; + }, + else => |t| @panic(@tagName(t)), + }; + + return value_index; + } + + pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { + for (mir.functions.items) |*function| { + var register_allocator = try RegisterAllocator.init(allocator); + for (function.instructions.items) |*instruction| { + for (instruction.getOperands()) |*operand| { + switch (operand.*) { + .relative, .immediate, .stack => {}, + .gp_register => |gp_register| switch (instruction.ir.valid) { + true => operand.gp_register.value = blk: { + const value_index = getValueFromInstruction(intermediate, instruction.ir); + + if (gp_register.value) |expected_register| { + if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { + const allocated = intermediate.values.get(allocated_value); + const value = intermediate.values.get(value_index); + print("\nAllocated: {}.\nValue: {}\n", .{ allocated.*, value.* }); + switch (value_index.eq(allocated_value)) { + true => {}, + false => unreachable, + } + } else { + if (register_allocator.gp_registers.free.get(expected_register)) |_| { + try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); + } else { + unreachable; + } + } + + break :blk expected_register; + } else { + for (register_allocator.gp_registers.free.keys()) |register| { + try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); + break :blk register; + } else { + unreachable; + } + } + }, + false => {}, + }, + else => |t| @panic(@tagName(t)), + } + } + } + } + } + + fn storeFunctionCallResult(allocator: Allocator, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { + _ = call_index; + _ = intermediate; + if (stack_reference.size <= @sizeOf(u64)) { + switch (stack_reference.size) { + @sizeOf(u8) => unreachable, + @sizeOf(u16) => unreachable, + @sizeOf(u32) => try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction, &.{ + .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a } }, + })), + @sizeOf(u64) => unreachable, + else => unreachable, + } + } else { + unreachable; + } + } +}; + pub fn selectInstruction(instruction_selector: *InstructionSelector, function: *InstructionSelector.Function, intermediate: *ir.Result, instruction: ir.Instruction) !void { switch (instruction) { - .@"unreachable" => try function.instructions.append(instruction_selector.allocator, .{ .ud2 = {} }), + .copy => |copy_value| { + _ = copy_value; + unreachable; + }, + .@"unreachable" => _ = try function.addInstruction(instruction_selector.allocator, .{ .ud2 = {} }), .load => |load_index| { const load = intermediate.loads.get(load_index).*; const load_value = intermediate.values.get(load.value).*; @@ -35,7 +406,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * switch (argument) { .integer => |integer| { if (integer.value == 0) { - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .xor_rm_r = .{ .destination = @enumFromInt(@intFromEnum(syscall_register)), .source = @enumFromInt(@intFromEnum(syscall_register)), @@ -44,7 +415,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * }, }); } else if (integer.value <= std.math.maxInt(u32)) { - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .mov_r_imm = .{ .register_size = .four, .register = @enumFromInt(@intFromEnum(syscall_register)), @@ -59,7 +430,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * } } - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .syscall = {}, }); }, @@ -67,19 +438,15 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * .ret => unreachable, .jump => |jump_index| { const jump = intermediate.jumps.get(jump_index); - const relocation = Displacement{ - .size = .one, - .source = @intCast(function.block_map.get(jump.source) orelse unreachable), - .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), - .offset_in_block = function.block_byte_count, - }; - _ = relocation; - // const index = function.instructions.items.len; - // try function.relocations.append(instruction_selector.allocator, @intCast(index)); - // try function.instructions.append(instruction_selector.allocator, .{ - // .jmp_rel_8 = relocation, - // }); - unreachable; + const instruction_index = try function.addInstruction(instruction_selector.allocator, .{ + .jmp_rel = Displacement{ + .size = .one, + .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + .instruction_index = @intCast(function.instructions.items.len), + }, + }); + try function.relocations.append(instruction_selector.allocator, instruction_index); }, .call => unreachable, .store => unreachable, @@ -101,10 +468,10 @@ const RegisterMemoryRegister = struct { }; const Displacement = struct { + instruction_index: u16, size: Size, source: u16, destination: u16, - offset_in_block: u16, }; const RmResult = struct { diff --git a/src/data_structures.zig b/src/data_structures.zig index 7afff5d..39b0df9 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -2,6 +2,7 @@ const std = @import("std"); const assert = std.debug.assert; pub const Allocator = std.mem.Allocator; +pub const AutoArrayHashMap = std.AutoArrayHashMapUnmanaged; pub const ArrayList = std.ArrayListUnmanaged; pub const AutoHashMap = std.AutoHashMapUnmanaged; pub const HashMap = std.HashMapUnmanaged; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 289862b..1ad2c3d 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -94,7 +94,6 @@ const Analyzer = struct { } fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { - _ = expect_type; var reaches_end = true; const block_node = analyzer.getNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; @@ -142,9 +141,9 @@ const Analyzer = struct { }, }; try analyzer.resolveNode(right_value_allocation.ptr, scope_index, ExpectType.none, statement_node.right); - switch (right_value_allocation.ptr.*) { - else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), - } + // switch (right_value_allocation.ptr.*) { + // else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), + // } try statements.append(analyzer.allocator, right_value_allocation.index); continue; }, @@ -208,7 +207,7 @@ const Analyzer = struct { .node_index = statement_node.left, }, }; - try analyzer.resolveNode(return_value_allocation.ptr, scope_index, ExpectType.none, statement_node.left); + try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, statement_node.left); break :ret return_value_allocation.index; }, false => @panic("TODO: ret void"), @@ -299,6 +298,7 @@ const Analyzer = struct { @panic("TODO: compile error"); } }, + else => unreachable, } // TODO @@ -337,7 +337,11 @@ const Analyzer = struct { var argument_nodes = try analyzer.getArguments(scope_index, node_index); print("Argument count: {}\n", .{argument_nodes.items.len}); if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { - const number_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_nodes.items[0]); + const number_allocation = try analyzer.unresolvedAllocate(scope_index, .{ + .flexible_integer = .{ + .byte_count = 8, + }, + }, argument_nodes.items[0]); const number = number_allocation.index; assert(number.valid); var arguments = std.mem.zeroes([6]Value.Index); @@ -388,8 +392,28 @@ const Analyzer = struct { }; }, .number_literal => switch (std.zig.parseNumberLiteral(analyzer.numberBytes(scope_index, node.token))) { - .int => |integer| .{ - .integer = integer, + .int => |integer| blk: { + assert(expect_type != .none); + const int_type = switch (expect_type) { + .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ + .bit_count = flexible_integer_type.byte_count << 3, + .signedness = .unsigned, + }, + .type_index => |type_index| a: { + const type_info = analyzer.module.types.get(type_index); + break :a switch (type_info.*) { + .integer => |int| int, + else => |t| @panic(@tagName(t)), + }; + }, + else => |t| @panic(@tagName(t)), + }; + break :blk .{ + .integer = .{ + .value = integer, + .type = int_type, + }, + }; }, else => |t| @panic(@tagName(t)), }, @@ -744,6 +768,7 @@ const Analyzer = struct { const ExpectType = union(enum) { none, type_index: Type.Index, + flexible_integer: FlexibleInteger, pub const none = ExpectType{ .none = {}, @@ -751,6 +776,11 @@ const ExpectType = union(enum) { pub const boolean = ExpectType{ .type_index = type_boolean, }; + + const FlexibleInteger = struct { + byte_count: u8, + sign: ?bool = null, + }; }; const type_boolean = Type.Index{ From d391898b95626b3ea269737f7d9860adaa124b7f Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sun, 1 Oct 2023 21:04:59 -0600 Subject: [PATCH 4/4] instruction encoding --- src/Compilation.zig | 1 + src/backend/emit.zig | 80 +- src/backend/intermediate_representation.zig | 26 +- src/backend/x86_64.zig | 1016 +++++++++++-------- src/frontend/semantic_analyzer.zig | 49 +- 5 files changed, 629 insertions(+), 543 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 54f89fa..d2268d4 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -303,6 +303,7 @@ pub const Module = struct { calls: BlockList(Call) = .{}, argument_list: BlockList(ArgumentList) = .{}, returns: BlockList(Return) = .{}, + entry_point: ?u32 = null, pub const Descriptor = struct { main_package_path: []const u8, diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 97c4c2e..c35c0f3 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -27,7 +27,7 @@ pub const Result = struct { }, entry_point: u32 = 0, - fn create() !Result { + pub fn create() !Result { return Result{ .sections = .{ .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, @@ -82,14 +82,6 @@ pub const Result = struct { image.sections.text.index += 1; } - // fn appendOnlyOpcodeSkipInstructionBytes(image: *Result, instruction: Instruction) void { - // const instruction_descriptor = instruction_descriptors.get(instruction); - // assert(instruction_descriptor.opcode_byte_count == instruction_descriptor.operand_offset); - // image.appendCode(instruction_descriptor.getOpcode()); - // - // image.sections.text.index += instruction_descriptor.size - instruction_descriptor.opcode_byte_count; - // } - fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { comptime { assert(@typeInfo(FunctionType) == .Fn); @@ -127,79 +119,13 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { .x86_64 => @import("x86_64.zig"), else => @compileError("Architecture not supported"), }; - const Instruction = backend.Instruction; - _ = Instruction; return struct { pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { - var result = try Result.create(); + std.debug.print("Entry point: {}\n", .{intermediate.entry_point}); var mir = try backend.MIR.generate(allocator, intermediate); try mir.allocateRegisters(allocator, intermediate); - // var function_iterator = intermediate.functions.iterator(); - // const IS = InstructionSelector(Instruction); - // var instruction_selector = IS{ - // .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), - // .allocator = allocator, - // }; - // - // while (function_iterator.next()) |ir_function| { - // const function = instruction_selector.functions.addOneAssumeCapacity(); - // function.* = .{}; - // try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); - // for (ir_function.blocks.items, 0..) |block_index, index| { - // function.block_map.putAssumeCapacity(block_index, @intCast(index)); - // } - // - // for (ir_function.blocks.items) |block_index| { - // const block = intermediate.blocks.get(block_index); - // for (block.instructions.items) |instruction_index| { - // const instruction = intermediate.instructions.get(instruction_index).*; - // try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); - // } - // - // // function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); - // // function.byte_count += function.block_byte_count; - // } - // } - // - // for (instruction_selector.functions.items) |function| { - // for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); - // } - - // for (instruction_selector.functions.items) |function| { - // var fix_size: bool = false; - // _ = fix_size; - // for (function.relocations.items) |instruction_index| { - // const instruction = function.instructions.items[instruction_index]; - // const relative = instruction.jmp_rel_8; - // const source_block = relative.source; - // const destination_block = relative.destination; - // const source_offset = function.block_offsets.items[source_block]; - // const destination_offset = function.block_offsets.items[destination_block]; - // std.debug.print("Source offset: {}. Destination: {}\n", .{ source_offset, destination_offset }); - // const instruction_descriptor = instruction_descriptors.get(relative.instruction); - // const instruction_offset = source_offset + relative.block_offset; - // const really_source_offset = instruction_offset + instruction_descriptor.size; - // const displacement = @as(i64, destination_offset) - @as(i64, really_source_offset); - // - // const operands = instruction_descriptor.getOperands(); - // switch (operands.len) { - // 1 => switch (operands[0].size) { - // @sizeOf(u8) => { - // if (displacement >= std.math.minInt(i8) and displacement <= std.math.maxInt(i8)) { - // const writer_index = instruction_offset + instruction_descriptor.operand_offset; - // std.debug.print("Instruction offset: {}. Operand offset: {}. Writer index: {}. displacement: {}\n", .{ instruction_offset, instruction_descriptor.operand_offset, writer_index, displacement }); - // result.sections.text.content[writer_index] = @bitCast(@as(i8, @intCast(displacement))); - // } else { - // unreachable; - // } - // }, - // else => unreachable, - // }, - // else => unreachable, - // } - // } - // } + const result = try mir.encode(intermediate); const text_section = result.sections.text.content[0..result.sections.text.index]; for (text_section) |byte| { diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 0ff9b3e..b396aa7 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -25,6 +25,7 @@ pub const Result = struct { syscalls: BlockList(Syscall) = .{}, values: BlockList(Value) = .{}, stack_references: BlockList(StackReference) = .{}, + entry_point: u32 = 0, }; pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { @@ -38,6 +39,8 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, .module = module, }; + builder.ir.entry_point = module.entry_point orelse unreachable; + while (function_iterator.next()) |sema_function| { const function_index = try builder.buildFunction(sema_function); try builder.optimizeFunction(function_index); @@ -284,10 +287,29 @@ pub const Builder = struct { for (basic_block.instructions.items) |instruction_index| { did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks, instruction_index); did_something = did_something or try builder.removeTrivialPhis(instruction_index); - did_something = did_something or try builder.removeCopies(instruction_index); + const copy = try builder.removeCopyReferences(instruction_index); + did_something = did_something or copy; } } } + + var instructions_to_delete = ArrayList(u32){}; + for (reachable_blocks) |basic_block_index| { + instructions_to_delete.clearRetainingCapacity(); + const basic_block = builder.ir.blocks.get(basic_block_index); + for (basic_block.instructions.items, 0..) |instruction_index, index| { + const instruction = builder.ir.instructions.get(instruction_index); + switch (instruction.*) { + .copy => try instructions_to_delete.append(builder.allocator, @intCast(index)), + else => {}, + } + } + + var deleted_instruction_count: usize = 0; + for (instructions_to_delete.items) |instruction_to_delete| { + _ = basic_block.instructions.orderedRemove(instruction_to_delete - deleted_instruction_count); + } + } } fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { @@ -367,7 +389,7 @@ pub const Builder = struct { }; } - fn removeCopies(builder: *Builder, instruction_index: Instruction.Index) !bool { + fn removeCopyReferences(builder: *Builder, instruction_index: Instruction.Index) !bool { const instruction = builder.ir.instructions.get(instruction_index); return switch (instruction.*) { .copy => false, diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 7e316ab..f56900e 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -11,7 +11,6 @@ const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; -const InstructionSelector = emit.InstructionSelector(Instruction); const x86_64 = @This(); const Size = enum(u2) { @@ -19,12 +18,57 @@ const Size = enum(u2) { two = 1, four = 2, eight = 3, + + fn fromByteCount(byte_count: u8) Size { + return @enumFromInt(@as(u2, @intCast(std.math.log2(byte_count)))); + } + + fn fromBitCount(bit_count: u16) Size { + assert(bit_count % @bitSizeOf(u8) == 0); + const byte_count: u8 = @intCast(bit_count >> 3); + return fromByteCount(byte_count); + } + + fn toInteger(comptime size: Size) type { + return switch (size) { + .one => u8, + .two => u16, + .four => u32, + .eight => u64, + }; + } +}; + +fn Relocation(comptime Target: type) type { + return struct { + target: Target, + instruction_byte_offset: u32, + instruction_length: u8, + source_address_writer_offset: u8, + size: Size, + }; +} + +const LocalRelocation = Relocation(ir.BasicBlock.Index); +const GlobalRelocation = Relocation(u32); + +fn RelocationIndex(comptime relocation_type: RelocationType) type { + return switch (relocation_type) { + .local => ir.BasicBlock.Index, + .global => u32, + }; +} +const RelocationType = enum { + local, + global, }; pub const MIR = struct { functions: ArrayList(Function) = .{}, + allocator: Allocator, const GPRegister = struct { value: ?x86_64.GPRegister = null, + size: Size, can_omit_if_present: bool = true, }; const Stack = struct { @@ -33,6 +77,7 @@ pub const MIR = struct { const Function = struct { instructions: ArrayList(MIR.Instruction) = .{}, blocks: AutoArrayHashMap(ir.BasicBlock.Index, u32) = .{}, + instruction_byte_offset: u32 = 0, }; const Instruction = struct { operands: [4]Operand, @@ -53,32 +98,22 @@ pub const MIR = struct { sub, syscall, ud2, + xor, }; - - fn new(id: Id, reference: ir.Instruction.Index, operands: []const Operand) MIR.Instruction { - var out_operands: [4]Operand = undefined; - @memset(std.mem.asBytes(&out_operands), 0); - @memcpy(out_operands[0..operands.len], operands); - - return .{ - .operands = out_operands, - .ir = reference, - .id = id, - .operand_count = @intCast(operands.len), - }; - } - - const Operand = union(enum) { - gp_register: MIR.GPRegister, - fp_register, - memory, - relative: union(enum) { + }; + const Operand = union(enum) { + gp_register: MIR.GPRegister, + fp_register, + memory, + displacement: struct { + source: ir.BasicBlock.Index, + destination: union(enum) { block: ir.BasicBlock.Index, function: ir.Function.Index, }, - immediate: Compilation.Integer, - stack: Stack, - }; + }, + immediate: Compilation.Integer, + stack: Stack, }; const RegisterUse = union(enum) { @@ -88,41 +123,78 @@ pub const MIR = struct { syscall_param: x86_64.GPRegister, }; - fn movRegImm(function: *Function, allocator: Allocator, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse) !void { + fn addInstruction(mir: *MIR, function: *Function, instruction_id: Instruction.Id, ir_instruction: ir.Instruction.Index, operands: []const Operand) !void { + var out_operands: [4]Operand = undefined; + @memset(std.mem.asBytes(&out_operands), 0); + @memcpy(out_operands[0..operands.len], operands); + + const instruction = MIR.Instruction{ + .operands = out_operands, + .ir = ir_instruction, + .id = instruction_id, + .operand_count = @intCast(operands.len), + }; + print("Adding instruction {s}\n", .{@tagName(instruction_id)}); + try function.instructions.append(mir.allocator, instruction); + } + + fn emitMovRegImm(mir: *MIR, function: *Function, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse, register_size: Size) !void { if (integer.type.bit_count <= @bitSizeOf(u64)) { - switch (integer.type.signedness) { - .signed, .unsigned => { - if (integer.value <= std.math.maxInt(u32)) { - try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ - .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => .a, - .param => unreachable, - .syscall_param => |register| register, - }, - }, + if (integer.value == 0) { + const operand = .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => .a, + .param => unreachable, + .syscall_param => |register| register, + }, + .size = register_size, + }, + }; + + try mir.addInstruction(function, .xor, instruction_index, &.{ + operand, + operand, + }); + } else if (integer.value <= std.math.maxInt(u32)) { + try mir.addInstruction(function, .mov, instruction_index, &.{ + .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => .a, + .param => unreachable, + .syscall_param => |register| register, }, - .{ .immediate = integer }, - })); - } else { - unreachable; - } - }, + .size = .four, + }, + }, + .{ + .immediate = .{ + .value = integer.value, + .type = .{ + .signedness = integer.type.signedness, + .bit_count = 32, + }, + }, + }, + }); + } else { + unreachable; } } else { unreachable; } } - fn movRegStack(function: *Function, allocator: Allocator, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { + fn emitMovRegStack(mir: *MIR, function: *Function, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { if (stack_reference.size <= @sizeOf(u64)) { switch (stack_reference.size) { @sizeOf(u8) => unreachable, @sizeOf(u16) => unreachable, @sizeOf(u32) => { - try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ + try mir.addInstruction(function, .mov, instruction_index, &.{ .{ .gp_register = .{ .value = switch (use) { @@ -131,6 +203,7 @@ pub const MIR = struct { .param => unreachable, .syscall_param => |syscall_register| syscall_register, }, + .size = Size.fromByteCount(@intCast(stack_reference.size)), }, }, .{ @@ -138,7 +211,7 @@ pub const MIR = struct { .offset = stack_reference.offset, }, }, - })); + }); }, @sizeOf(u64) => unreachable, else => unreachable, @@ -149,7 +222,9 @@ pub const MIR = struct { } pub fn generate(allocator: Allocator, intermediate: *ir.Result) !MIR { - var mir = MIR{}; + var mir = MIR{ + .allocator = allocator, + }; try mir.functions.ensureTotalCapacity(allocator, intermediate.functions.len); var ir_function_it = intermediate.functions.iterator(); @@ -163,17 +238,15 @@ pub const MIR = struct { if (ir_function.current_stack_offset > 0) { // TODO: switch on ABI - try function.instructions.append(allocator, MIR.Instruction.new(.push, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .bp } }, - })); + try mir.addInstruction(function, .push, ir.Instruction.Index.invalid, &.{.{ .gp_register = .{ .value = .bp, .size = .eight } }}); - try function.instructions.append(allocator, MIR.Instruction.new(.mov, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .bp } }, - .{ .gp_register = .{ .value = .sp } }, - })); + try mir.addInstruction(function, .mov, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .bp, .size = .eight } }, + .{ .gp_register = .{ .value = .sp, .size = .eight } }, + }); - try function.instructions.append(allocator, MIR.Instruction.new(.sub, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .sp } }, + try mir.addInstruction(function, .sub, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .sp, .size = .eight } }, .{ .immediate = Compilation.Integer{ .value = ir_function.current_stack_offset, @@ -183,7 +256,7 @@ pub const MIR = struct { }, }, }, - })); + }); } for (basic_block.instructions.items) |instruction_index| { @@ -191,21 +264,24 @@ pub const MIR = struct { switch (instruction.*) { .jump => |jump_index| { const jump = intermediate.jumps.get(jump_index); - try function.instructions.append(allocator, MIR.Instruction.new(.jmp, instruction_index, &.{ - .{ .relative = .{ .block = jump.destination } }, - })); + try mir.addInstruction(function, .jmp, instruction_index, &.{ + .{ .displacement = .{ + .source = jump.source, + .destination = .{ .block = jump.destination }, + } }, + }); }, .copy => |copy_value_index| { const copy_value = intermediate.values.get(copy_value_index); switch (copy_value.*) { - .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .general), + .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .general, Size.fromBitCount(integer.type.bit_count)), else => |t| @panic(@tagName(t)), } }, .ret => |ret_value_index| { const ret_value = intermediate.values.get(ret_value_index); switch (ret_value.*) { - .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .ret), + .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .ret, Size.fromBitCount(integer.type.bit_count)), else => |t| @panic(@tagName(t)), } @@ -213,14 +289,21 @@ pub const MIR = struct { unreachable; } - try function.instructions.append(allocator, MIR.Instruction.new(.ret, instruction_index, &.{})); + try mir.addInstruction(function, .ret, instruction_index, &.{}); }, .call => |call_value_index| { // TODO: args const call = intermediate.calls.get(call_value_index); - try function.instructions.append(allocator, MIR.Instruction.new(.call, instruction_index, &.{ - .{ .relative = .{ .function = call.function } }, - })); + try mir.addInstruction(function, .call, instruction_index, &.{ + .{ + .displacement = .{ + .source = block_index, + .destination = .{ + .function = call.function, + }, + }, + }, + }); }, .store => |store_index| { const store = intermediate.stores.get(store_index); @@ -231,9 +314,7 @@ pub const MIR = struct { const stack_reference = intermediate.stack_references.get(stack_reference_index); print("stack ref: {}\n", .{stack_reference}); switch (source_value.*) { - .call => |call_index| { - try storeFunctionCallResult(allocator, function, intermediate, instruction_index, stack_reference.*, call_index); - }, + .call => |call_index| try mir.emitStoreForFunctionCallResult(function, intermediate, instruction_index, stack_reference.*, call_index), else => |t| @panic(@tagName(t)), } }, @@ -246,18 +327,18 @@ pub const MIR = struct { for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { const argument = intermediate.values.get(argument_index).*; switch (argument) { - .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .{ .syscall_param = syscall_register }), + .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .{ .syscall_param = syscall_register }, Size.eight), .stack_reference => |stack_reference_index| { const stack_reference = intermediate.stack_references.get(stack_reference_index); - try movRegStack(function, allocator, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); + try mir.emitMovRegStack(function, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); }, else => |t| @panic(@tagName(t)), } } - try function.instructions.append(allocator, MIR.Instruction.new(.syscall, instruction_index, &.{})); + try mir.addInstruction(function, .syscall, instruction_index, &.{}); }, - .@"unreachable" => try function.instructions.append(allocator, MIR.Instruction.new(.ud2, instruction_index, &.{})), + .@"unreachable" => try mir.addInstruction(function, .ud2, instruction_index, &.{}), else => |t| @panic(@tagName(t)), } } @@ -267,6 +348,85 @@ pub const MIR = struct { return mir; } + pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { + for (mir.functions.items) |*function| { + var register_allocator = try RegisterAllocator.init(allocator); + var instructions_to_delete = AutoArrayHashMap(u32, void){}; + for (function.instructions.items, 0..) |*instruction, instruction_index| { + print("#{} {s}\n", .{ instruction_index, @tagName(instruction.id) }); + var allocated_gp_register: ?x86_64.GPRegister = null; + for (instruction.getOperands()) |*operand| { + switch (operand.*) { + .displacement, .immediate, .stack => {}, + .gp_register => |gp_register| switch (instruction.ir.valid) { + true => operand.gp_register.value = blk: { + const value_index = getValueFromInstruction(intermediate, instruction.ir); + + if (gp_register.value) |expected_register| { + if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { + switch (value_index.eq(allocated_value)) { + // TODO delete the instruction + true => if (allocated_gp_register == null) unreachable else { + assert(allocated_gp_register.? == expected_register); + }, + // _ = try instructions_to_delete.getOrPut(allocator, @intCast(instruction_index)), //.append(allocator, @intCast(instruction_index)), + false => unreachable, + } + } else { + if (register_allocator.gp_registers.free.get(expected_register)) |_| { + try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); + allocated_gp_register = expected_register; + } else { + unreachable; + } + } + + break :blk expected_register; + } else { + for (register_allocator.gp_registers.free.keys()) |register| { + try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); + break :blk register; + } else { + unreachable; + } + } + }, + false => {}, + }, + else => |t| @panic(@tagName(t)), + } + } + } + + if (instructions_to_delete.keys().len > 0) { + var next_instruction_to_delete_index: usize = 0; + print("Instructions to delete: ", .{}); + for (instructions_to_delete.keys()) |instruction| { + print("#{}, ", .{instruction}); + } + print("\n", .{}); + for (function.blocks.keys(), function.blocks.values()) |*block_index, *instruction_offset| { + _ = block_index; + while (instructions_to_delete.keys()[next_instruction_to_delete_index] <= instruction_offset.*) : (next_instruction_to_delete_index += 1) { + unreachable; + } + } + + var removed_instruction_count: usize = 0; + for (instructions_to_delete.keys()) |instruction_to_delete_index| { + _ = function.instructions.orderedRemove(instruction_to_delete_index - removed_instruction_count); + removed_instruction_count += 1; + } + + print("Instructions after deletion\n", .{}); + for (function.instructions.items, 0..) |instruction, index| { + print("#{} {s}\n", .{ index, @tagName(instruction.id) }); + } + print("\n", .{}); + } + } + } + const RegisterAllocator = struct { gp_registers: RegisterSet(x86_64.GPRegister) = .{}, @@ -314,63 +474,16 @@ pub const MIR = struct { return value_index; } - pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { - for (mir.functions.items) |*function| { - var register_allocator = try RegisterAllocator.init(allocator); - for (function.instructions.items) |*instruction| { - for (instruction.getOperands()) |*operand| { - switch (operand.*) { - .relative, .immediate, .stack => {}, - .gp_register => |gp_register| switch (instruction.ir.valid) { - true => operand.gp_register.value = blk: { - const value_index = getValueFromInstruction(intermediate, instruction.ir); - - if (gp_register.value) |expected_register| { - if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { - const allocated = intermediate.values.get(allocated_value); - const value = intermediate.values.get(value_index); - print("\nAllocated: {}.\nValue: {}\n", .{ allocated.*, value.* }); - switch (value_index.eq(allocated_value)) { - true => {}, - false => unreachable, - } - } else { - if (register_allocator.gp_registers.free.get(expected_register)) |_| { - try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); - } else { - unreachable; - } - } - - break :blk expected_register; - } else { - for (register_allocator.gp_registers.free.keys()) |register| { - try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); - break :blk register; - } else { - unreachable; - } - } - }, - false => {}, - }, - else => |t| @panic(@tagName(t)), - } - } - } - } - } - - fn storeFunctionCallResult(allocator: Allocator, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { + fn emitStoreForFunctionCallResult(mir: *MIR, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { _ = call_index; _ = intermediate; if (stack_reference.size <= @sizeOf(u64)) { switch (stack_reference.size) { @sizeOf(u8) => unreachable, @sizeOf(u16) => unreachable, - @sizeOf(u32) => try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction, &.{ - .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a } }, - })), + @sizeOf(u32) => try mir.addInstruction(function, .mov, instruction, &.{ + .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a, .size = Size.fromByteCount(@intCast(stack_reference.size)) } }, + }), @sizeOf(u64) => unreachable, else => unreachable, } @@ -378,80 +491,339 @@ pub const MIR = struct { unreachable; } } -}; -pub fn selectInstruction(instruction_selector: *InstructionSelector, function: *InstructionSelector.Function, intermediate: *ir.Result, instruction: ir.Instruction) !void { - switch (instruction) { - .copy => |copy_value| { - _ = copy_value; - unreachable; - }, - .@"unreachable" => _ = try function.addInstruction(instruction_selector.allocator, .{ .ud2 = {} }), - .load => |load_index| { - const load = intermediate.loads.get(load_index).*; - const load_value = intermediate.values.get(load.value).*; - switch (load_value) { - .integer => |integer| { - _ = integer; - unreachable; - }, - else => |t| @panic(@tagName(t)), - } - unreachable; - }, - .syscall => |syscall_index| { - const syscall = intermediate.syscalls.get(syscall_index); - for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { - const argument = intermediate.values.get(argument_index).*; - switch (argument) { - .integer => |integer| { - if (integer.value == 0) { - _ = try function.addInstruction(instruction_selector.allocator, .{ - .xor_rm_r = .{ - .destination = @enumFromInt(@intFromEnum(syscall_register)), - .source = @enumFromInt(@intFromEnum(syscall_register)), - .size = .four, - .direct = true, - }, - }); - } else if (integer.value <= std.math.maxInt(u32)) { - _ = try function.addInstruction(instruction_selector.allocator, .{ - .mov_r_imm = .{ - .register_size = .four, - .register = @enumFromInt(@intFromEnum(syscall_register)), - .immediate = argument_index, - .immediate_size = .four, - }, - }); - // TODO - } else unreachable; + pub fn encode(mir: *const MIR, intermediate: *const ir.Result) !emit.Result { + var local_relocations = ArrayList(LocalRelocation){}; + var global_relocations = ArrayList(GlobalRelocation){}; + var block_index: usize = 0; + + var image = try emit.Result.create(); + + for (mir.functions.items) |*function| { + local_relocations.clearRetainingCapacity(); + function.instruction_byte_offset = @intCast(image.sections.text.index); + for (function.instructions.items, 0..) |*instruction, instruction_index| { + if (block_index < function.blocks.values().len) { + if (instruction_index == function.blocks.values()[block_index]) { + function.blocks.values()[block_index] = @intCast(image.sections.text.index); + block_index += 1; + } + } + + const operands = instruction.getOperands(); + switch (operands.len) { + 0 => switch (instruction.id) { + .ret => image.appendCodeByte(0xc3), + .syscall => image.appendCode(&.{ 0x0f, 0x05 }), + .ud2 => image.appendCode(&.{ 0x0f, 0x0b }), + else => |t| @panic(@tagName(t)), }, - else => |t| @panic(@tagName(t)), + 1 => switch (instruction.id) { + .call => { + const operand = operands[0]; + assert(operand == .displacement); + switch (operand.displacement.destination) { + .function => |ir_function_index| { + const function_index = ir_function_index.uniqueInteger(); + const current_function_index = @divExact(@intFromPtr(function) - @intFromPtr(mir.functions.items.ptr), @sizeOf(MIR.Function)); + + if (current_function_index < function_index) { + try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.global, .{ + .relocations = &global_relocations, + .target = function_index, + .opcode = 0xe8, + }); + } else { + try encodeRel32Instruction(&image, .{ + .target = mir.functions.items[function_index].instruction_byte_offset, + .opcode = 0xe8, + }); + } + }, + else => |t| @panic(@tagName(t)), + } + }, + .jmp => { + const operand = operands[0]; + assert(operand == .displacement); + if (operand.displacement.source.uniqueInteger() < operand.displacement.destination.block.uniqueInteger()) { + try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.local, .{ + .relocations = &local_relocations, + .target = operand.displacement.destination.block, + .opcode = 0xe9, + }); + } else if (operand.displacement.source.uniqueInteger() == operand.displacement.destination.block.uniqueInteger()) { + unreachable; + } else { + unreachable; + } + }, + .push => { + const operand = operands[0]; + switch (operand) { + .gp_register => |gp_register| { + assert(gp_register.size == .eight); + if (Rex.create(.{ .rm = gp_register.value.? })) |rex_byte| { + image.appendCodeByte(@bitCast(rex_byte)); + } + const opcode = @as(u8, 0x50) | @as(u3, @truncate(@intFromEnum(gp_register.value.?))); + image.appendCodeByte(opcode); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + }, + 2 => switch (operands[0]) { + .gp_register => |dst_gp_register| switch (operands[1]) { + .gp_register => |src_gp_register| { + assert(dst_gp_register.size == src_gp_register.size); + const direct = true; + const rm = dst_gp_register.value.?; + const reg = src_gp_register.value.?; + + if (Rex.create(.{ + .rm = rm, + .reg = reg, + .rm_size = dst_gp_register.size, + })) |rex_byte| { + image.appendCodeByte(@bitCast(rex_byte)); + } + + const opcode_option: [2]u8 = switch (instruction.id) { + .mov => .{ 0x88, 0x89 }, + .xor => .{ 0x30, 0x31 }, + else => |t| @panic(@tagName(t)), + }; + + image.appendCodeByte(switch (dst_gp_register.size) { + .one => opcode_option[0], + else => opcode_option[1], + }); + + const modrm = ModRm{ + .rm = @truncate(@intFromEnum(rm)), + .reg = @truncate(@intFromEnum(reg)), + .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), + }; + image.appendCodeByte(@bitCast(modrm)); + }, + .immediate => |src_immediate| { + assert(src_immediate.type.bit_count % @bitSizeOf(u8) == 0); + print("DST GP register: {}. SRC immediate: {}\n", .{ dst_gp_register, src_immediate }); + switch (instruction.id) { + .mov => switch (@intFromEnum(dst_gp_register.value.?) > std.math.maxInt(u3)) { + true => unreachable, // Use RM encoding + false => { + const opcode: u8 = switch (dst_gp_register.size) { + .one => 0xb0, + else => 0xb8, + }; + const opcode_byte = opcode | @intFromEnum(dst_gp_register.value.?); + image.appendCodeByte(opcode_byte); + const immediate_byte_count = @as(usize, 1) << @intFromEnum(dst_gp_register.size); + print("Immediate byte count: {}\n", .{immediate_byte_count}); + for (std.mem.asBytes(&src_immediate.value)[0..immediate_byte_count]) |immediate_byte| { + image.appendCodeByte(immediate_byte); + } + }, + }, + else => { + const immediate8_different_than_register = src_immediate.type.bit_count == 8 and dst_gp_register.size != .one; + switch (dst_gp_register.value.? == .a and !immediate8_different_than_register) { + true => unreachable, + false => { + const reg: x86_64.GPRegister = @enumFromInt(@as(u3, switch (instruction.id) { + .sub => 5, + else => |t| @panic(@tagName(t)), + })); + if (Rex.create(.{ .reg = reg, .rm = dst_gp_register.value.?, .rm_size = dst_gp_register.size })) |rex_byte| { + image.appendCodeByte(@bitCast(rex_byte)); + } + const opcode: u8 = switch (immediate8_different_than_register) { + true => switch (instruction.id) { + .sub => 0x83, + else => |t| @panic(@tagName(t)), + }, + false => unreachable, + }; + image.appendCodeByte(opcode); + + const rm = dst_gp_register.value.?; + const direct = true; + const modrm = ModRm{ + .rm = @truncate(@intFromEnum(rm)), + .reg = @truncate(@intFromEnum(reg)), + .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (Size.fromBitCount(src_immediate.type.bit_count)) { + inline else => |size| image.appendCode(std.mem.asBytes(&@as(size.toInteger(), @intCast(src_immediate.value)))), + } + }, + } + }, + } + }, + .stack => |src_stack| { + const stack_offset = -@as(i64, @intCast(src_stack.offset)); + for (std.mem.asBytes(&stack_offset)) |stack_byte| { + print("0x{x} ", .{stack_byte}); + } + print("\n", .{}); + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const reg = dst_gp_register.value.?; + if (Rex.create(.{ .reg = reg, .rm_size = dst_gp_register.size })) |rex_byte| { + image.appendCodeByte(@bitCast(rex_byte)); + } + const opcode_option: [2]u8 = switch (instruction.id) { + .mov => .{ 0x8a, 0x8b }, + else => |t| @panic(@tagName(t)), + }; + + image.appendCodeByte(switch (dst_gp_register.size) { + .one => opcode_option[0], + else => opcode_option[1], + }); + + const rm = x86_64.GPRegister.bp; + const modrm = ModRm{ + .rm = @truncate(@intFromEnum(rm)), + .reg = @truncate(@intFromEnum(reg)), + .mod = 0b01, + }; + image.appendCodeByte(@bitCast(modrm)); + + image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); + }, + else => |t| @panic(@tagName(t)), + }, + .stack => |dst_stack| switch (operands[1]) { + .gp_register => |src_gp_register| switch (instruction.id) { + .mov => { + const stack_offset = -@as(i64, @intCast(dst_stack.offset)); + for (std.mem.asBytes(&stack_offset)) |stack_byte| { + print("0x{x} ", .{stack_byte}); + } + print("\n", .{}); + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const reg = src_gp_register.value.?; + if (Rex.create(.{ .reg = reg, .rm_size = src_gp_register.size })) |rex_byte| { + image.appendCodeByte(@bitCast(rex_byte)); + } + const opcode_option: [2]u8 = switch (instruction.id) { + .mov => .{ 0x88, 0x89 }, + else => |t| @panic(@tagName(t)), + }; + + image.appendCodeByte(switch (src_gp_register.size) { + .one => opcode_option[0], + else => opcode_option[1], + }); + + const rm = x86_64.GPRegister.bp; + const modrm = ModRm{ + .rm = @truncate(@intFromEnum(rm)), + .reg = @truncate(@intFromEnum(reg)), + .mod = 0b01, + }; + image.appendCodeByte(@bitCast(modrm)); + + image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); + }, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + }, + 3 => switch (instruction.id) { + else => |t| @panic(@tagName(t)), + }, + 4 => switch (instruction.id) { + else => |t| @panic(@tagName(t)), + }, + else => unreachable, } } - _ = try function.addInstruction(instruction_selector.allocator, .{ - .syscall = {}, - }); - }, - .phi => unreachable, - .ret => unreachable, - .jump => |jump_index| { - const jump = intermediate.jumps.get(jump_index); - const instruction_index = try function.addInstruction(instruction_selector.allocator, .{ - .jmp_rel = Displacement{ - .size = .one, - .source = @intCast(function.block_map.get(jump.source) orelse unreachable), - .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), - .instruction_index = @intCast(function.instructions.items.len), - }, - }); - try function.relocations.append(instruction_selector.allocator, instruction_index); - }, - .call => unreachable, - .store => unreachable, + for (local_relocations.items) |relocation| { + const source_offset: i64 = relocation.instruction_byte_offset + relocation.instruction_length; + const destination_offset: i64 = function.blocks.get(relocation.target).?; + print("Source: {}. Destination: {}\n", .{ source_offset, destination_offset }); + const displacement_offset = destination_offset - source_offset; + const address_to_address = @intFromPtr(&image.sections.text.content[relocation.instruction_byte_offset + relocation.source_address_writer_offset]); + switch (relocation.size) { + inline .one, .four => |relocation_size| { + const RelocationInteger = switch (relocation_size) { + .one => i8, + .four => i32, + else => @compileError("Unreachable"), + }; + const ptr: *align(1) RelocationInteger = @ptrFromInt(address_to_address); + ptr.* = @intCast(displacement_offset); + }, + else => unreachable, + } + } + + print("Function code:\n", .{}); + for (image.sections.text.content[function.instruction_byte_offset..][0..image.sections.text.index]) |code_byte| { + print("0x{x:0>2} ", .{code_byte}); + } + print("\n", .{}); + } + + for (global_relocations.items) |global_relocation| { + _ = global_relocation; + unreachable; + } + + image.entry_point = mir.functions.items[intermediate.entry_point].instruction_byte_offset; + + return image; } -} + + fn encodeRel32Instruction(image: *emit.Result, arguments: struct { + target: u32, + opcode: u8, + }) !void { + const instruction_byte_offset: u32 = @intCast(image.sections.text.index); + const instruction_length = 5; + + const source_offset: i64 = instruction_byte_offset + instruction_length; + const destination_offset: i64 = arguments.target; + const offset: i32 = @intCast(destination_offset - source_offset); + + image.appendCodeByte(arguments.opcode); + image.appendCode(std.mem.asBytes(&offset)); + } + + fn encodeRel32InstructionWithRelocation(mir: *const MIR, image: *emit.Result, comptime relocation_type: RelocationType, arguments: struct { + relocations: *ArrayList(Relocation(RelocationIndex(relocation_type))), + target: RelocationIndex(relocation_type), + opcode: u8, + }) !void { + const instruction_byte_offset = image.sections.text.index; + const source_address_writer_offset = 1; + const instruction_length = 5; + const size = .four; + + image.appendCodeByte(arguments.opcode); + image.appendCode(&(.{0} ** 4)); + + try arguments.relocations.append(mir.allocator, .{ + .instruction_byte_offset = @intCast(instruction_byte_offset), + .source_address_writer_offset = source_address_writer_offset, + .instruction_length = instruction_length, + .target = arguments.target, + .size = size, + }); + } +}; const RegisterImmediate = struct { immediate: ir.Value.Index, @@ -486,247 +858,38 @@ const RmAndRexArguments = packed struct { bit64: bool, sib: bool, }; - -// fn computeRmAndRex(args: RmAndRexArguments) RmResult { -// _ = register_memory_register; -// const rex_byte = Rex{ -// .b = @intFromEnum(args.rm) > std.math.maxInt(u3), -// .x = args.sib, -// .r = @intFromEnum(args.reg) > std.math.maxInt(u3), -// .w = args.bit64, -// }; -// var rex_byte = std.mem.zeroes(Rex); -// if (@intFromEnum(rm) > std.math.maxInt(u3)) -// } -fn emitImmediate(result: *emit.Result, intermediate: *ir.Result, value_index: ir.Value.Index, size: Size) void { - const value = intermediate.values.get(value_index); - const integer = value.integer.value; - const integer_bytes = switch (size) { - .one => std.mem.asBytes(&@as(u8, @intCast(integer))), - .two => std.mem.asBytes(&@as(u16, @intCast(integer))), - .four => std.mem.asBytes(&@as(u32, @intCast(integer))), - .eight => std.mem.asBytes(&@as(u64, @intCast(integer))), - }; - result.appendCode(integer_bytes); -} - const ModRm = packed struct(u8) { rm: u3, reg: u3, mod: u2, }; -pub fn emitInstruction(result: *emit.Result, instruction: Instruction, intermediate: *ir.Result) void { - switch (instruction) { - inline .xor_rm_r => |register_memory_register, tag| { - const rm = register_memory_register.destination; - const reg = register_memory_register.source; - const rex_byte = Rex{ - .b = @intFromEnum(rm) > std.math.maxInt(u3), - .x = false, //args.sib, - .r = @intFromEnum(reg) > std.math.maxInt(u3), - .w = register_memory_register.size == .eight, - }; - - if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { - result.appendCodeByte(@bitCast(rex_byte)); - } - - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = @as(u2, @intFromBool(register_memory_register.direct)) << 1 | @intFromBool(register_memory_register.direct), - }; - // _ = modrm; - const opcode = tag.getOpcode(&.{ - .{ - .register_memory = .{ - .value = register_memory_register.destination, - .size = register_memory_register.size, - .direct = register_memory_register.direct, - }, - }, - .{ - .register = .{ - .value = register_memory_register.source, - .size = register_memory_register.size, - }, - }, - }); - - result.appendCode(opcode); - result.appendCodeByte(@bitCast(modrm)); - }, - inline .mov_r_imm => |register_immediate, tag| { - const opcode = tag.getOpcode(&.{ - .{ - .register = .{ - .value = register_immediate.register, - .size = register_immediate.register_size, - }, - }, - .{ - .immediate = register_immediate.immediate_size, - }, - }); - assert(opcode.len == 1); - const opcode_byte = opcode[0] | @intFromEnum(register_immediate.register); - result.appendCodeByte(opcode_byte); - emitImmediate(result, intermediate, register_immediate.immediate, register_immediate.immediate_size); - }, - .jmp_rel => unreachable, - inline .syscall, .ud2 => |_, tag| { - const opcode = tag.getOpcode(&.{}); - result.appendCode(opcode); - }, - // else => unreachable, - } -} - -pub const Instruction = union(Id) { - xor_rm_r: RegisterMemoryRegister, - mov_r_imm: RegisterImmediate, - jmp_rel: Displacement, - // jmp_rel_8: LocalRelative, - // mov_reg_imm32: struct { - // destination: GPRegister, - // source: u32, - // }, - // xor_reg32_reg32: struct { - // destination: GPRegister, - // source: GPRegister, - // }, - syscall, - ud2, - - const Id = enum { - xor_rm_r, - mov_r_imm, - jmp_rel, - // mov_reg_imm32, - // xor_reg32_reg32, - syscall, - ud2, - - fn getOpcode(comptime instruction: Instruction.Id, operands: []const Operand) []const u8 { - return switch (instruction) { - .mov_r_imm => switch (operands[0].register.size) { - .one => &.{0xb0}, - .two, .four, .eight => &.{0xb8}, - }, - .syscall => &.{ 0x0f, 0x05 }, - .ud2 => &.{ 0x0f, 0x0b }, - .xor_rm_r => switch (operands[0].register_memory.size) { - .one => &.{0x30}, - .two, .four, .eight => &.{0x31}, - }, - .jmp_rel => switch (operands[0].displacement.size) { - .one => unreachable, - .four => unreachable, - else => unreachable, - }, - }; - } - }; - - const Operand = union(enum) { - displacement, - register: struct { - value: GPRegister, - size: Size, - }, - // TODO - register_memory: struct { - value: GPRegister, - size: Size, - direct: bool, - }, - immediate: Size, - - const Id = enum { - displacement, - register, - register_memory, - immediate, - }; - }; - - pub const descriptors = blk: { - var result = std.EnumArray(Instruction.Id, Instruction.Descriptor).initUndefined(); - result.getPtr(.jmp_rel_8).* = Instruction.Descriptor.new(&.{0xeb}, &[_]Instruction.Operand{rel8}); - result.getPtr(.mov_reg_imm32).* = Instruction.Descriptor.new(&.{0xb8}, &[_]Instruction.Operand{ reg32, imm32 }); - result.getPtr(.xor_reg_reg).* = Instruction.Descriptor.new(&.{0x31}, &[_]Instruction.Operand{ reg32, reg32 }); - result.getPtr(.syscall).* = Instruction.Descriptor.new(&.{ 0x0f, 0x05 }, &.{}); - result.getPtr(.ud2).* = Instruction.Descriptor.new(&.{ 0x0f, 0x0b }, &.{}); - break :blk result; - }; - - const Descriptor = struct { - operands: [4]Operand, - operand_count: u3, - operand_offset: u5, - size: u8, - opcode: [3]u8, - opcode_byte_count: u8, - - fn getOperands(descriptor: Descriptor) []const Operand { - return descriptor.operands[0..descriptor.operand_count]; - } - - fn new(opcode_bytes: []const u8, operands: []const Operand) Descriptor { - // TODO: prefixes - var result = Descriptor{ - .operands = undefined, - .operand_count = @intCast(operands.len), - .operand_offset = opcode_bytes.len, - .size = opcode_bytes.len, - .opcode = .{ 0, 0 }, - .opcode_byte_count = opcode_bytes.len, - }; - - if (opcode_bytes.len == 1) { - result.opcode[1] = opcode_bytes[0]; - } else for (opcode_bytes, result.opcode[0..opcode_bytes.len]) |opcode_byte, *out_opcode| { - out_opcode.* = opcode_byte; - } - - for (operands, result.operands[0..operands.len]) |operand, *out_operand| { - out_operand.* = operand; - result.size += operand.size; - } - - return result; - } - }; -}; -const LocalRelative = struct { - instruction: Instruction.Id, - source: u16, - destination: u16, - offset_in_block: u16, -}; - -const rel8 = Instruction.Operand{ - .type = .relative, - .size = @sizeOf(u8), -}; - -const reg32 = Instruction.Operand{ - .type = .register, - .size = @sizeOf(u32), -}; - -const imm32 = Instruction.Operand{ - .type = .immediate, - .size = @sizeOf(u32), -}; - const Rex = packed struct(u8) { b: bool, x: bool, r: bool, w: bool, fixed: u4 = 0b0100, + + fn create(args: struct { + rm: ?GPRegister = null, + reg: ?GPRegister = null, + sib: bool = false, + rm_size: ?Size = null, + }) ?Rex { + const rex_byte = Rex{ + .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, + .x = args.sib, + .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, + .w = if (args.rm_size) |rm_size| rm_size == .eight else false, + }; + + if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { + return rex_byte; + } else { + return null; + } + } }; const GPRegister = enum(u4) { @@ -748,15 +911,4 @@ const GPRegister = enum(u4) { r15 = 15, }; -// pub const BasicGPRegister = enum(u3) { -// a = 0, -// c = 1, -// d = 2, -// b = 3, -// sp = 4, -// bp = 5, -// si = 6, -// di = 7, -// }; - const syscall_registers = [7]GPRegister{ .a, .di, .si, .d, .r10, .r8, .r9 }; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 1ad2c3d..ce75292 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -518,37 +518,6 @@ const Analyzer = struct { } } - fn analyzeDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, declaration: *Declaration) !Value.Index { - _ = scope_index; - _ = declaration; - _ = analyzer; - // switch (declaration.*) { - // .unresolved => |node_index| { - // const declaration_node = analyzer.nodes[node_index.unwrap()]; - // return switch (declaration_node.id) { - // .simple_variable_declaration => blk: { - // const expect_type = switch (declaration_node.left.valid) { - // true => unreachable, - // false => @unionInit(ExpectType, "none", {}), - // }; - // - // const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); - // const value = analyzer.module.values.get(initialization_expression); - // if (value.is_comptime and value.is_const) { - // break :blk initialization_expression; - // } - // - // unreachable; - // }, - // else => |t| @panic(@tagName(t)), - // }; - // }, - // .struct_type => unreachable, - // } - - @panic("TODO: analyzeDeclaration"); - } - fn structType(analyzer: *Analyzer, value: *Value, parent_scope_index: Scope.Index, index: Node.Index, file_index: File.Index) !Type.Index { var node_buffer: [2]Node.Index = undefined; // We have the file because this might be the first file @@ -892,7 +861,23 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, }, }); - return analyzeExistingPackage(value_allocation.ptr, compilation, module, package); + const result = analyzeExistingPackage(value_allocation.ptr, compilation, module, package); + + var decl_iterator = module.declarations.iterator(); + while (decl_iterator.nextPointer()) |decl| { + if (equal(u8, decl.name, "_start")) { + const value = module.values.get(decl.init_value); + module.entry_point = switch (value.*) { + .function => |function_index| function_index.uniqueInteger(), + else => |t| @panic(@tagName(t)), + }; + break; + } + } else { + @panic("Entry point not found"); + } + + return result; } pub fn analyzeExistingPackage(value: *Value, compilation: *Compilation, module: *Module, package: *Package) !Type.Index {