From 77e54285f53baa3458b1140ca1a8aca8b76c2e1b Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Mon, 6 Nov 2023 13:30:35 -0600 Subject: [PATCH 1/6] instruction selection --- .vscode/launch.json | 6 +- build.zig | 99 +- lib/std/builtin.nat | 16 + lib/std/os.nat | 46 + lib/std/os/linux.nat | 1 + lib/std/os/macos.nat | 3 + lib/std/os/windows.nat | 3 + lib/std/start.nat | 6 +- lib/std/std.nat | 3 + src/Compilation.zig | 515 ++- src/backend/aarch64.zig | 0 src/backend/elf.zig | 281 ++ src/backend/emit.zig | 115 +- src/backend/intermediate_representation.zig | 986 +++-- src/backend/macho.zig | 682 +++ src/backend/pe.zig | 266 ++ src/backend/x86_64.zig | 4214 +++++++++++++++---- src/data_structures.zig | 154 +- src/frontend/lexical_analyzer.zig | 24 +- src/frontend/semantic_analyzer.zig | 1421 +++++-- src/frontend/syntactic_analyzer.zig | 658 ++- src/main.zig | 69 +- test/first/main.nat | 2 +- test/hello_world/main.nat | 6 + test/stack/main.nat | 4 + 25 files changed, 7819 insertions(+), 1761 deletions(-) create mode 100644 lib/std/builtin.nat create mode 100644 lib/std/os.nat create mode 100644 lib/std/os/linux.nat create mode 100644 lib/std/os/macos.nat create mode 100644 lib/std/os/windows.nat create mode 100644 src/backend/aarch64.zig create mode 100644 src/backend/elf.zig create mode 100644 src/backend/macho.zig create mode 100644 src/backend/pe.zig create mode 100644 test/hello_world/main.nat create mode 100644 test/stack/main.nat diff --git a/.vscode/launch.json b/.vscode/launch.json index 46a561c..3fbf72a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,8 +8,10 @@ "type": "lldb", "request": "launch", "name": "Launch", - "program": "${workspaceFolder}/zig-out/bin/compiler", - "args": [], + "program": "${workspaceFolder}/zig-out/bin/nativity", + "args": [ + "test/hello_world/main.nat" + ], "cwd": "${workspaceFolder}", "preLaunchTask": "zig build" }, diff --git a/build.zig b/build.zig index e2dd39c..0ac8555 100644 --- a/build.zig +++ b/build.zig @@ -1,15 +1,18 @@ const std = @import("std"); var all: bool = false; -fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget, optimization: std.builtin.OptimizeMode, unit_tests: []const []const u8, test_step: *std.Build.Step) !void { - const name = if (all) try std.mem.concat(b.allocator, u8, &.{ "nativity_", @tagName(optimization) }) else "nativity"; +pub fn build(b: *std.Build) !void { + all = b.option(bool, "all", "All") orelse false; + const target = b.standardTargetOptions(.{}); + const optimization = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ - .name = name, + .name = "nativity", .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimization, + .use_llvm = true, + .use_lld = false, }); - b.installArtifact(exe); b.installDirectory(.{ .source_dir = std.Build.LazyPath.relative("lib"), @@ -17,100 +20,50 @@ fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget .install_subdir = "lib", }); - const run_cmd = b.addRunArtifact(exe); - - run_cmd.step.dependOn(b.getInstallStep()); - - if (b.args) |args| { - run_cmd.addArgs(args); - } - - const run_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "run_", @tagName(optimization) }) else "run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - const debug_command = addDebugCommand(b, exe); - const debug_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "debug_", @tagName(optimization) }) else "debug", "Debug the app"); - debug_step.dependOn(&debug_command.step); - const zig_tests = b.addTest(.{ .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimization, }); - const run_zig_tests = b.addRunArtifact(zig_tests); - run_zig_tests.has_side_effects = true; - test_step.dependOn(&run_zig_tests.step); + const run_command = b.addRunArtifact(exe); - for (unit_tests) |unit_test_main_source_file| { - const unit_test = b.addRunArtifact(exe); - unit_test.has_side_effects = true; - unit_test.addArg(unit_test_main_source_file); - test_step.dependOn(&unit_test.step); - } -} - -fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run { - return switch (@import("builtin").os.tag) { + const debug_command = switch (@import("builtin").os.tag) { .linux => blk: { const result = b.addSystemCommand(&.{"gf2"}); - result.addArtifactArg(artifact); - - if (artifact.kind == .@"test") { - result.addArgs(&.{ "-ex", "r" }); - } - + result.addArg("--args"); + result.addArtifactArg(exe); break :blk result; }, .windows => blk: { const result = b.addSystemCommand(&.{"remedybg"}); result.addArg("-g"); - result.addArtifactArg(artifact); + result.addArtifactArg(exe); break :blk result; }, .macos => blk: { // not tested - const result = b.addSystemCommand(&.{"gdb"}); - result.addArtifactArg(artifact); + const result = b.addSystemCommand(&.{"lldb"}); + result.addArg("--"); + result.addArtifactArg(exe); break :blk result; }, - else => @compileError("Operating system not supported"), + else => @compileError("OS not supported"), }; -} -pub fn build(b: *std.Build) !void { - all = b.option(bool, "all", "All") orelse false; + const test_command = b.addRunArtifact(zig_tests); - var unit_test_list = std.ArrayList([]const u8).init(b.allocator); - var test_dir = try std.fs.cwd().openIterableDir("test", .{ .access_sub_paths = true }); - defer test_dir.close(); - - var test_dir_iterator = test_dir.iterate(); - - while (try test_dir_iterator.next()) |entry| { - switch (entry.kind) { - .directory => { - const dir_name = entry.name; - const main_unit_test_source_file = try std.mem.concat(b.allocator, u8, &.{ "test/", dir_name, "/main.nat" }); - try unit_test_list.append(main_unit_test_source_file); - }, - .file => {}, - else => @panic("Don't put crap on test directory"), - } + if (b.args) |args| { + run_command.addArgs(args); + test_command.addArgs(args); + debug_command.addArgs(args); } - const target = b.standardTargetOptions(.{}); - const unit_tests = unit_test_list.items; + const run_step = b.step("run", "Test the Nativity compiler"); + run_step.dependOn(&run_command.step); const test_step = b.step("test", "Test the Nativity compiler"); - - if (all) { - inline for (@typeInfo(std.builtin.OptimizeMode).Enum.fields) |enum_field| { - const optimization = @field(std.builtin.OptimizeMode, enum_field.name); - try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step); - } - } else { - const optimization = b.standardOptimizeOption(.{}); - _ = try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step); - } + test_step.dependOn(&test_command.step); + const debug_step = b.step("debug", "Debug the Nativity compiler"); + debug_step.dependOn(&debug_command.step); } diff --git a/lib/std/builtin.nat b/lib/std/builtin.nat new file mode 100644 index 0000000..835cfdf --- /dev/null +++ b/lib/std/builtin.nat @@ -0,0 +1,16 @@ +const Os = enum{ + linux, + macos, + windows, +}; + +const Cpu = enum{ + aarch64, + x86_64, +}; + +const Abi = enum{ + none, + gnu, + msvc, +}; diff --git a/lib/std/os.nat b/lib/std/os.nat new file mode 100644 index 0000000..4aced36 --- /dev/null +++ b/lib/std/os.nat @@ -0,0 +1,46 @@ +const current = #import("builtin").os; +const system = switch (current) { + .linux => linux, + .macos => macos, + .windows => windows, +}; + +const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize { + switch (current) { + .linux => return #syscall(1, file_descriptor, bytes_ptr, bytes_len), + .macos => return macos.write(file_descriptor, bytes_ptr, bytes_len), + .windows => { + var written_bytes: u32 = 0; + if (windows.WriteFile(file_descriptor, bytes_ptr, bytes_len, @written_bytes, false) != 0) { + return written_bytes; + } else { + unreachable; + } + }, + } +} + +const FileDescriptor = system.FileDescriptor; + +const print = fn(bytes_ptr: [@]const u8, bytes_len: usize) void { + const file_descriptor = switch (current) { + .linux, .macos => 2, + .windows => windows.GetStdHandle(windows.STD_OUTPUT_HANDLE), + }; + + _ = write(file_descriptor, bytes_ptr, bytes_len); +} + +const exit = fn(exit_code: s32) noreturn { + switch (current) { + .linux => _ = #syscall(231, exit_code), + .macos => macos.exit(exit_code), + .windows => windows.ExitProcess(exit_code), + } + + unreachable; +} + +const linux = #import("os/linux.nat"); +const macos = #import("os/macos.nat"); +const windows = #import("os/windows.nat"); diff --git a/lib/std/os/linux.nat b/lib/std/os/linux.nat new file mode 100644 index 0000000..3183d18 --- /dev/null +++ b/lib/std/os/linux.nat @@ -0,0 +1 @@ +const FileDescriptor = s32; diff --git a/lib/std/os/macos.nat b/lib/std/os/macos.nat new file mode 100644 index 0000000..7300cab --- /dev/null +++ b/lib/std/os/macos.nat @@ -0,0 +1,3 @@ +const FileDescriptor = s32; +const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize extern; +const exit = fn (exit_code: u32) noreturn extern; diff --git a/lib/std/os/windows.nat b/lib/std/os/windows.nat new file mode 100644 index 0000000..c5f37a5 --- /dev/null +++ b/lib/std/os/windows.nat @@ -0,0 +1,3 @@ +const HANDLE = u64; +const FileDescriptor = HANDLE; +const GetStdHandle = fn(handle_descriptor: u32) HANDLE extern; diff --git a/lib/std/start.nat b/lib/std/start.nat index 031890b..1799dcf 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -1,9 +1,9 @@ +const std = #import("std"); comptime { _ = _start; } const _start = fn () noreturn { const result = #import("main").main(); - _ = #syscall(231, result); - unreachable; -}; + std.os.exit(0); +} diff --git a/lib/std/std.nat b/lib/std/std.nat index 3ce1556..6215821 100644 --- a/lib/std/std.nat +++ b/lib/std/std.nat @@ -2,4 +2,7 @@ comptime { _ = start; } +const builtin = #import("builtin.nat"); +const os = #import("os.nat"); +const print = os.print; const start = #import("start.nat"); diff --git a/src/Compilation.zig b/src/Compilation.zig index 3669529..765f7f7 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -13,6 +13,7 @@ const AutoHashMap = data_structures.AutoHashMap; const BlockList = data_structures.BlockList; const HashMap = data_structures.HashMap; const SegmentedList = data_structures.SegmentedList; +const StringKeyMap = data_structures.StringKeyMap; const StringHashMap = data_structures.StringHashMap; const StringArrayHashMap = data_structures.StringArrayHashMap; @@ -67,12 +68,57 @@ pub const Struct = struct { pub const Allocation = List.Allocation; }; +pub const Enum = struct { + scope: Scope.Index, + fields: ArrayList(Enum.Field.Index) = .{}, + type: Type.Index, + + pub const Field = struct { + name: u32, + value: Value.Index, + parent: Enum.Index, + + pub const List = BlockList(@This()); + pub const Index = Enum.Field.List.Index; + pub const Allocation = Enum.Field.List.Allocation; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Array = struct { + element_type: Type.Index, + element_count: u32, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + pub const Type = union(enum) { void, noreturn, bool, + type, + comptime_int, integer: Type.Integer, + slice: Slice, + pointer: Pointer, @"struct": Struct.Index, + @"enum": Enum.Index, + function: Function.Prototype.Index, + array: Array, + + const Slice = struct { + element_type: Type.Index, + }; + const Pointer = struct { + element_type: Type.Index, + many: bool, + @"const": bool, + }; pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; @@ -85,14 +131,37 @@ pub const Type = union(enum) { signed = 1, }; - pub fn getSize(integer: Type.Integer) u64 { + pub fn getSize(integer: Integer) u64 { return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); } + + pub fn getIndex(integer: Integer) Compilation.Type.Index { + return .{ + .block = 0, + .index = @ctz(integer.bit_count) - @ctz(@as(u8, 8)) + @as(u6, switch (integer.signedness) { + .signed => Compilation.HardwareSignedIntegerType.offset, + .unsigned => Compilation.HardwareUnsignedIntegerType.offset, + }), + }; + } + + // pub fn get(bit_count: u16, comptime signedness: Signedness) @This().Type(signedness) { + // _ = bit_count; + // } + + fn Type(comptime signedness: Signedness) type { + return switch (signedness) { + .unsigned => HardwareUnsignedIntegerType, + .signed => HardwareSignedIntegerType, + }; + } }; pub fn getSize(type_info: Type) u64 { return switch (type_info) { .integer => |integer| integer.getSize(), + .pointer => 8, + .comptime_int => @panic("This call should never happen"), else => |t| @panic(@tagName(t)), }; } @@ -100,9 +169,106 @@ pub const Type = union(enum) { pub fn getAlignment(type_info: Type) u64 { return switch (type_info) { .integer => |integer| @min(16, integer.getSize()), + .pointer => 8, else => |t| @panic(@tagName(t)), }; } + + pub const @"void" = FixedTypeKeyword.void.toType(); + pub const boolean = FixedTypeKeyword.bool.toType(); + pub const ssize = FixedTypeKeyword.ssize.toType(); + pub const @"usize" = FixedTypeKeyword.usize.toType(); + pub const @"noreturn" = FixedTypeKeyword.noreturn.toType(); + pub const @"type" = FixedTypeKeyword.type.toType(); + pub const @"comptime_int" = FixedTypeKeyword.comptime_int.toType(); + pub const string_literal = ExtraCommonType.string_literal.toType(); + pub const @"u8" = Type.Integer.getIndex(.{ + .bit_count = 8, + .signedness = .unsigned, + }); +}; + +// Each time an enum is added here, a corresponding insertion in the initialization must be made +pub const Values = enum { + bool_false, + bool_true, + @"unreachable", + + pub fn getIndex(value: Values) Value.Index { + const absolute: u32 = @intFromEnum(value); + const foo = @as(Value.Index, undefined); + const ElementT = @TypeOf(@field(foo, "index")); + const BlockT = @TypeOf(@field(foo, "block")); + const divider = std.math.maxInt(ElementT); + const element_index: ElementT = @intCast(absolute % divider); + const block_index: BlockT = @intCast(absolute / divider); + return .{ + .index = element_index, + .block = block_index, + }; + } +}; + +pub const Intrinsic = enum { + @"error", + import, + syscall, +}; + +pub const FixedTypeKeyword = enum { + void, + noreturn, + bool, + usize, + ssize, + type, + comptime_int, + + const offset = 0; + + fn toType(fixed_type_keyword: FixedTypeKeyword) Type.Index { + return Type.Index.fromInteger(offset + @intFromEnum(fixed_type_keyword)); + } +}; + +pub const HardwareUnsignedIntegerType = enum { + u8, + u16, + u32, + u64, + + pub const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; +}; + +pub const HardwareSignedIntegerType = enum { + s8, + s16, + s32, + s64, + + pub const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; +}; + +pub const ExtraCommonType = enum { + string_literal, + pub const offset = HardwareSignedIntegerType.offset + @typeInfo(HardwareSignedIntegerType).Enum.fields.len; + + fn toType(t: ExtraCommonType) Type.Index { + return Type.Index.fromInteger(offset + @intFromEnum(t)); + } +}; + +pub const extra_common_type_data = blk: { + var result: [@typeInfo(ExtraCommonType).Enum.fields.len]Type = undefined; + result[@intFromEnum(ExtraCommonType.string_literal)] = .{ + .pointer = .{ + .many = true, + .@"const" = true, + .element_type = Type.u8, + }, + }; + + break :blk result; }; /// A scope contains a bunch of declarations @@ -131,7 +297,14 @@ pub const Declaration = struct { scope_type: ScopeType, mutability: Mutability, init_value: Value.Index, - name: []const u8, + name: u32, + argument_index: ?u32, + type: Type.Index, + + pub const Reference = struct { + value: Declaration.Index, + type: Type.Index, + }; pub const List = BlockList(@This()); pub const Index = List.Index; @@ -139,15 +312,31 @@ pub const Declaration = struct { }; pub const Function = struct { + scope: Scope.Index, body: Block.Index, - prototype: Prototype.Index, + prototype: Type.Index, pub const Prototype = struct { - arguments: ?[]const Field.Index, + arguments: ?[]const Declaration.Index, return_type: Type.Index, + attributes: Attributes = .{}, pub const List = BlockList(@This()); pub const Index = Prototype.List.Index; + + pub const Attributes = packed struct { + @"extern": bool = false, + @"export": bool = false, + @"inline": Inline = .none, + calling_convention: CallingConvention = .system_v, + + pub const Inline = enum { + none, + suggestion_optimizer, + force_semantic, + force_optimizer, + }; + }; }; pub fn getBodyBlock(function: Function, module: *Module) *Block { @@ -168,7 +357,8 @@ pub const Block = struct { }; pub const Field = struct { - foo: u32 = 0, + name: u32, + type: Type.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -239,10 +429,23 @@ pub const Return = struct { pub const Allocation = List.Allocation; }; +pub const Cast = struct { + value: Value.Index, + type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const CallingConvention = enum { + system_v, +}; + pub const Value = union(enum) { unresolved: Unresolved, declaration: Declaration.Index, - declaration_reference: Declaration.Index, + declaration_reference: Declaration.Reference, void, bool: bool, undefined, @@ -258,35 +461,118 @@ pub const Value = union(enum) { call: Call.Index, argument_list: ArgumentList, @"return": Return.Index, + argument: Declaration.Index, + string_literal: u32, + enum_field: Enum.Field.Index, + extern_function: Function.Prototype.Index, + sign_extend: Cast.Index, + zero_extend: Cast.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; + pub const Integer = struct { + value: u64, + type: Type.Index, + signedness: Type.Integer.Signedness, + + pub fn getBitCount(integer: Integer, module: *Module) u16 { + return module.types.get(integer.type).integer.bit_count; + } + }; + pub fn isComptime(value: Value) bool { return switch (value) { - .bool, .void, .undefined, .function => true, - else => false, - }; - } - - pub fn getType(value: *Value, module: *Module) Type.Index { - return switch (value.*) { - .call => |call_index| module.calls.get(call_index).type, + .bool, .void, .undefined, .function, .type, .enum_field => true, + .integer => |integer| integer.type.eq(Type.comptime_int), + .call => false, else => |t| @panic(@tagName(t)), }; } -}; -pub const Integer = struct { - value: u64, - type: Type.Integer, + pub fn getType(value: Value, module: *Module) Type.Index { + const result = switch (value) { + .call => |call_index| module.calls.get(call_index).type, + .integer => |integer| integer.type, + .declaration_reference => |declaration_reference| declaration_reference.type, + .string_literal => |string_literal_hash| module.string_literal_types.get(@intCast(module.getStringLiteral(string_literal_hash).?.len)).?, + .type => Type.type, + .enum_field => |enum_field_index| module.enums.get(module.enum_fields.get(enum_field_index).parent).type, + .function => |function_index| module.functions.get(function_index).prototype, + else => |t| @panic(@tagName(t)), + }; + + return result; + } + + // pub fn setType(value: *Value, new_type: Type.Index) void { + // switch (value.*) { + // .integer => value.integer.type = new_type, + // else => |t| @panic(@tagName(t)), + // } + // } + const TypeCheckError = error{ + integer_size, + pointer_many_differ, + pointer_element_type_differ, + }; + + pub fn typeCheck(value: *Value, module: *Module, type_to_check_index: Type.Index) TypeCheckError!void { + const value_type_index = value.getType(module); + + if (!value_type_index.eq(type_to_check_index)) { + const value_type = module.types.get(value_type_index); + const check_type = module.types.get(type_to_check_index); + if (std.meta.activeTag(value_type.*) == std.meta.activeTag(check_type.*)) { + switch (value_type.*) { + .integer => |coercee_int| { + if (check_type.integer.getSize() < coercee_int.getSize()) { + return error.integer_size; + } + }, + .pointer => |coercee_pointer| { + if (coercee_pointer.many != check_type.pointer.many) { + return error.pointer_many_differ; + } + + if (!coercee_pointer.element_type.eq(check_type.pointer.element_type)) { + if (check_type.pointer.many) { + const coercee_element_type = module.types.get(coercee_pointer.element_type); + switch (coercee_element_type.*) { + .array => |array| if (!array.element_type.eq(check_type.pointer.element_type)) { + return error.pointer_element_type_differ; + }, + else => |t| @panic(@tagName(t)), + } + } + } + }, + else => |t| @panic(@tagName(t)), + } + } else { + switch (check_type.*) { + .integer => { + switch (value_type.*) { + .comptime_int => switch (value.*) { + .integer => value.integer.type = type_to_check_index, + .declaration_reference => value.declaration_reference.type = type_to_check_index, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + } + } + } + } }; pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, - string_table: AutoHashMap(u32, []const u8) = .{}, + string_table: StringKeyMap([]const u8) = .{}, declarations: BlockList(Declaration) = .{}, structs: BlockList(Struct) = .{}, scopes: BlockList(Scope) = .{}, @@ -301,12 +587,22 @@ pub const Module = struct { assignments: BlockList(Assignment) = .{}, syscalls: BlockList(Syscall) = .{}, calls: BlockList(Call) = .{}, - argument_list: BlockList(ArgumentList) = .{}, + argument_lists: BlockList(ArgumentList) = .{}, returns: BlockList(Return) = .{}, + string_literals: StringKeyMap([]const u8) = .{}, + enums: BlockList(Enum) = .{}, + enum_fields: BlockList(Enum.Field) = .{}, + function_name_map: data_structures.AutoArrayHashMap(Function.Index, u32) = .{}, + arrays: BlockList(Array) = .{}, + casts: BlockList(Cast) = .{}, + string_literal_types: data_structures.AutoArrayHashMap(u32, Type.Index) = .{}, + array_types: data_structures.AutoArrayHashMap(Array, Type.Index) = .{}, entry_point: ?u32 = null, pub const Descriptor = struct { main_package_path: []const u8, + executable_path: []const u8, + target: std.Target, }; const ImportFileResult = struct { @@ -343,8 +639,10 @@ pub const Module = struct { unreachable; } - const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); - const file_relative_path = std.fs.path.basename(full_path); + const current_file_relative_path_to_package_directory = std.fs.path.dirname(current_file.relative_path) orelse ""; + const import_file_relative_path = try std.fs.path.join(allocator, &.{ current_file_relative_path_to_package_directory, import_name }); + const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_file_relative_path }); + const file_relative_path = import_file_relative_path; const package = current_file.package; const import_file = try module.getFile(allocator, full_path, file_relative_path, package); @@ -358,12 +656,6 @@ pub const Module = struct { return result; } - fn lookupDeclaration(module: *Module, hashed: u32) !noreturn { - _ = hashed; - _ = module; - while (true) {} - } - fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { const path_lookup = try module.import_table.getOrPut(allocator, full_path); const file, const index = switch (path_lookup.found_existing) { @@ -399,6 +691,7 @@ pub const Module = struct { pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); + print("Import full path: {s}\n", .{full_path}); const import_file = try module.getFile(allocator, full_path, package.source_path, package); try import_file.ptr.addPackageReference(allocator, package); @@ -408,8 +701,8 @@ pub const Module = struct { }; } - pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { - _ = module; + pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file_index: File.Index) !void { + const file = module.files.get(file_index); const source_file = file.package.directory.handle.openFile(file.relative_path, .{}) catch |err| { std.debug.panic("Can't find file {s} in directory {s} for error {s}", .{ file.relative_path, file.package.directory.path, @errorName(err) }); }; @@ -425,8 +718,70 @@ pub const Module = struct { file.source_code = file_buffer[0..read_byte_count]; file.status = .loaded_into_memory; - try file.lex(allocator); - try file.parse(allocator); + try file.lex(allocator, file_index); + print("Start of parsing file #{}\n", .{file_index.uniqueInteger()}); + try file.parse(allocator, file_index); + print("End of parsing file #{}\n", .{file_index.uniqueInteger()}); + } + + fn getString(map: *StringKeyMap([]const u8), key: u32) ?[]const u8 { + return map.getValue(key); + } + + fn addString(map: *StringKeyMap([]const u8), allocator: Allocator, string: []const u8) !u32 { + const lookup_result = try map.getOrPut(allocator, string, string); + + { + const lookup_name = map.getValue(lookup_result.key) orelse unreachable; + assert(equal(u8, lookup_name, string)); + } + + return lookup_result.key; + } + + pub fn getName(module: *Module, key: u32) ?[]const u8 { + return getString(&module.string_table, key); + } + + pub fn addName(module: *Module, allocator: Allocator, name: []const u8) !u32 { + return addString(&module.string_table, allocator, name); + } + + pub fn getStringLiteral(module: *Module, key: u32) ?[]const u8 { + return getString(&module.string_literals, key); + } + + pub fn addStringLiteral(module: *Module, allocator: Allocator, string_literal: []const u8) !u32 { + const result = addString(&module.string_literals, allocator, string_literal); + + const len: u32 = @intCast(string_literal.len); + // try analyzer.module. + const string_literal_type_gop = try module.string_literal_types.getOrPut(allocator, len); + if (!string_literal_type_gop.found_existing) { + const array = Array{ + .element_type = Type.u8, + .element_count = len, + }; + const array_type_gop = try module.array_types.getOrPut(allocator, array); + if (!array_type_gop.found_existing) { + const array_type_allocation = try module.types.append(allocator, .{ + .array = array, + }); + array_type_gop.value_ptr.* = array_type_allocation.index; + } + + const array_type_index = array_type_gop.value_ptr.*; + const pointer_type_allocation = try module.types.append(allocator, .{ + .pointer = .{ + .@"const" = true, + .many = true, + .element_type = array_type_index, + }, + }); + string_literal_type_gop.value_ptr.* = pointer_type_allocation.index; + } + + return result; } }; @@ -448,7 +803,18 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! // TODO: generate an actual file const builtin_file_name = "builtin.nat"; var cache_dir = try compilation.build_directory.openDir("cache", .{}); - const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); + const builtin_file = try cache_dir.createFile(builtin_file_name, .{}); + try builtin_file.writer().print( + \\const builtin = #import("std").builtin; + \\const cpu = builtin.Cpu.{s}; + \\const os = builtin.Os.{s}; + \\const abi = builtin.Abi.{s}; + \\ + , .{ + @tagName(descriptor.target.cpu.arch), + @tagName(descriptor.target.os.tag), + @tagName(descriptor.target.abi), + }); builtin_file.close(); const module: *Module = try compilation.base_allocator.create(Module); @@ -508,14 +874,80 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); for (module.import_table.values()) |import| { - try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); + try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, module.files.indexOf(import)); } - const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0], .{ .block = 0, .index = 0 }); + inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, switch (@field(FixedTypeKeyword, enum_field.name)) { + .usize => @unionInit(Type, "integer", .{ + .bit_count = 64, + .signedness = .unsigned, + }), + .ssize => @unionInit(Type, "integer", .{ + .bit_count = 64, + .signedness = .signed, + }), + else => @unionInit(Type, enum_field.name, {}), + }); + } - var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); + inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .unsigned, + .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { + .u8 => 8, + .u16 => 16, + .u32 => 32, + .u64 => 64, + }, + }, + }); + } - try emit.get(.x86_64).initialize(compilation.base_allocator, &ir); + inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .signed, + .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { + .s8 => 8, + .s16 => 16, + .s32 => 32, + .s64 => 64, + }, + }, + }); + } + + for (extra_common_type_data) |type_data| { + _ = try module.types.append(compilation.base_allocator, type_data); + } + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = false, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = true, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .@"unreachable" = {}, + }); + + const value_allocation = try module.values.append(compilation.base_allocator, .{ + .unresolved = .{ + .node_index = .{ .value = 0 }, + }, + }); + + try semantic_analyzer.initialize(compilation, module, packages[0], value_allocation.ptr); + + const ir = try intermediate_representation.initialize(compilation, module); + + switch (descriptor.target.cpu.arch) { + inline else => |arch| try emit.get(arch).initialize(compilation.base_allocator, ir, descriptor), + } } fn generateAST() !void {} @@ -544,6 +976,7 @@ pub const File = struct { syntactic_analyzer_result: syntactic_analyzer.Result = undefined, package_references: ArrayList(*Package) = .{}, file_references: ArrayList(*File) = .{}, + type: Type.Index = Type.Index.invalid, relative_path: []const u8, package: *Package, @@ -569,18 +1002,18 @@ pub const File = struct { try file.file_references.append(allocator, affected); } - fn lex(file: *File, allocator: Allocator) !void { + fn lex(file: *File, allocator: Allocator, file_index: File.Index) !void { assert(file.status == .loaded_into_memory); - file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); + file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code, file_index); // if (!@import("builtin").is_test) { // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); // } file.status = .lexed; } - fn parse(file: *File, allocator: Allocator) !void { + fn parse(file: *File, allocator: Allocator, file_index: File.Index) !void { assert(file.status == .lexed); - file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); + file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code, file_index); // if (!@import("builtin").is_test) { // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); // } diff --git a/src/backend/aarch64.zig b/src/backend/aarch64.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/elf.zig b/src/backend/elf.zig new file mode 100644 index 0000000..14fab53 --- /dev/null +++ b/src/backend/elf.zig @@ -0,0 +1,281 @@ +const std = @import("std"); +const assert = std.debug.assert; +const equal = std.mem.eql; + +const data_structures = @import("../data_structures.zig"); +const Allocator = data_structures.Allocator; +const ArrayList = data_structures.ArrayList; + +const emit = @import("emit.zig"); + +pub const Writer = struct { + bytes: ArrayList(u8), + allocator: Allocator, + + pub fn init(allocator: Allocator) !Writer { + return .{ + .bytes = try ArrayList(u8).initCapacity(allocator, 0x10000), + .allocator = allocator, + }; + } + + pub fn getHeader(writer: *Writer) *Header { + return @ptrCast(@alignCast(writer.bytes.items.ptr)); + } + + pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void { + const section_fields = @typeInfo(@TypeOf(image.sections)).Struct.fields; + const section_count = blk: { + var result: u16 = 0; + inline for (section_fields) |section_field| { + const section_size = @field(image.sections, section_field.name).index; + result += @intFromBool(section_size > 0); + } + break :blk result; + }; + + const program_header_count = section_count; + const program_start_offset = @sizeOf(Header) + program_header_count * @sizeOf(ProgramHeader); + + var section_offsets: [section_fields.len]u32 = undefined; + + const program_end_offset = blk: { + var result: u32 = program_start_offset; + inline for (section_fields, 0..) |section_field, section_index| { + const section = &@field(image.sections, section_field.name); + if (section.index > 0) { + const section_offset = std.mem.alignForward(u32, result, section.alignment); + section_offsets[section_index] = section_offset; + result = std.mem.alignForward(u32, section_offset + @as(u32, @intCast(section.index)), section.alignment); + } + } + + break :blk result; + }; + + const elf_file_end_offset = program_end_offset + @sizeOf(SectionHeader) * section_count; + try writer.bytes.resize(writer.allocator, elf_file_end_offset); + + const base_address = 0x200000; + + writer.getHeader().* = Header{ + .endianness = .little, + .machine = switch (image.target.cpu.arch) { + .x86_64 => .AMD64, + else => unreachable, + }, + .os_abi = switch (image.target.os.tag) { + .linux => .systemv, + else => unreachable, + }, + .entry = base_address + section_offsets[0] + image.entry_point, + .section_header_offset = program_end_offset, + .program_header_count = program_header_count, + .section_header_count = section_count, + .name_section_header_index = 0, + }; + + var program_header_offset: usize = @sizeOf(Header); + var section_header_offset = program_end_offset; + inline for (section_fields, section_offsets) |section_field, section_offset| { + const section_name = section_field.name; + const section = &@field(image.sections, section_name); + if (section.index > 0) { + const program_header: *ProgramHeader = @ptrCast(@alignCast(writer.bytes.items[program_header_offset..].ptr)); + program_header.* = .{ + .type = .load, + .flags = .{ + .executable = equal(u8, section_name, "text"), + .writable = equal(u8, section_name, "data"), + .readable = true, + }, + .offset = 0, + .virtual_address = base_address, + .physical_address = base_address, + .size_in_file = section.index, + .size_in_memory = section.index, + .alignment = 0, + }; + + const source = section.content[0..section.index]; + const destination = writer.bytes.items[section_offset..][0..source.len]; + @memcpy(destination, source); + + const section_header: *SectionHeader = @ptrCast(@alignCast(writer.bytes.items[section_header_offset..].ptr)); + section_header.* = .{ + .name_offset = 0, + .type = .program_data, + .flags = .{ + .alloc = equal(u8, section_name, "text"), + .executable = equal(u8, section_name, "text"), + .writable = equal(u8, section_name, "data"), + }, + .address = base_address + section_offset, + .offset = section_offset, + .size = section.index, + .link = 0, + .info = 0, + .alignment = 0, + .entry_size = 0, + }; + } + } + } + + pub fn writeToFile(writer: *const Writer, file_path: []const u8) !void { + std.debug.print("Writing file to {s}\n", .{file_path}); + const flags = switch (@import("builtin").os.tag) { + .windows => .{}, + else => .{ + .mode = 0o777, + }, + }; + const file_descriptor = try std.fs.cwd().createFile(file_path, flags); + try file_descriptor.writeAll(writer.bytes.items); + file_descriptor.close(); + } + + pub fn writeToFileAbsolute(writer: *const Writer, absolute_file_path: []const u8) !void { + const file = try std.fs.createFileAbsolute(absolute_file_path, .{}); + defer file.close(); + try file.writeAll(writer.bytes.items); + } +}; + +const Header = extern struct { + magic: u8 = 0x7f, + elf_id: [3]u8 = "ELF".*, + bit_count: BitCount = .@"64", + endianness: Endianness = .little, + header_version: u8 = 1, + os_abi: ABI, + abi_version: u8 = 0, + padding: [7]u8 = [_]u8{0} ** 7, + object_type: ObjectFileType = .executable, // e_type + machine: Machine, + version: u32 = 1, + entry: u64, + program_header_offset: u64 = std.mem.alignForward(u16, @sizeOf(Header), @alignOf(ProgramHeader)), + section_header_offset: u64, + flags: u32 = 0, + header_size: u16 = 0x40, + program_header_size: u16 = @sizeOf(ProgramHeader), + program_header_count: u16 = 1, + section_header_size: u16 = @sizeOf(SectionHeader), + section_header_count: u16, + name_section_header_index: u16, + + const BitCount = enum(u8) { + @"32" = 1, + @"64" = 2, + }; + + const ABI = enum(u8) { + systemv = 0, + }; + + const ObjectFileType = enum(u16) { + none = 0, + relocatable = 1, + executable = 2, + dynamic = 3, + core = 4, + lo_os = 0xfe00, + hi_os = 0xfeff, + lo_proc = 0xff00, + hi_proc = 0xffff, + }; + + const Machine = enum(u16) { + AMD64 = 0x3e, + }; + + const Endianness = enum(u8) { + little = 1, + big = 2, + }; +}; + +const ProgramHeader = extern struct { + type: Type = .load, + flags: Flags, + offset: u64, + virtual_address: u64, + physical_address: u64, + size_in_file: u64, + size_in_memory: u64, + alignment: u64 = 0, + + const Type = enum(u32) { + null = 0, + load = 1, + dynamic = 2, + interpreter = 3, + note = 4, + shlib = 5, // reserved + program_header = 6, + tls = 7, + lo_os = 0x60000000, + hi_os = 0x6fffffff, + lo_proc = 0x70000000, + hi_proc = 0x7fffffff, + }; + + const Flags = packed struct(u32) { + executable: bool, + writable: bool, + readable: bool, + reserved: u29 = 0, + }; +}; +const SectionHeader = extern struct { + name_offset: u32, + type: Type, + flags: Flags, + address: u64, + offset: u64, + size: u64, + // section index + link: u32, + info: u32, + alignment: u64, + entry_size: u64, + + // type + const Type = enum(u32) { + null = 0, + program_data = 1, + symbol_table = 2, + string_table = 3, + relocation_entries_addends = 4, + symbol_hash_table = 5, + dynamic_linking_info = 6, + notes = 7, + program_space_no_data = 8, + relocation_entries = 9, + reserved = 10, + dynamic_linker_symbol_table = 11, + array_of_constructors = 14, + array_of_destructors = 15, + array_of_pre_constructors = 16, + section_group = 17, + extended_section_indices = 18, + number_of_defined_types = 19, + start_os_specific = 0x60000000, + }; + + const Flags = packed struct(u64) { + writable: bool, + alloc: bool, + executable: bool, + reserved: bool = false, + mergeable: bool = false, + contains_null_terminated_strings: bool = false, + info_link: bool = false, + link_order: bool = false, + os_non_conforming: bool = false, + section_group: bool = false, + tls: bool = false, + _reserved: u53 = 0, + }; +}; diff --git a/src/backend/emit.zig b/src/backend/emit.zig index c35c0f3..87876c5 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -6,17 +6,25 @@ const assert = std.debug.assert; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; +const Compilation = @import("../Compilation.zig"); + const ir = @import("intermediate_representation.zig"); const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const AutoHashMap = data_structures.AutoHashMap; +const mmap = data_structures.mmap; + +const elf = @import("elf.zig"); +const pe = @import("pe.zig"); +const macho = @import("macho.zig"); const jit_callconv = .SysV; const Section = struct { content: []align(page_size) u8, index: usize = 0, + alignment: u32 = 0x10, }; pub const Result = struct { @@ -26,63 +34,34 @@ pub const Result = struct { data: Section, }, entry_point: u32 = 0, + target: std.Target, - pub fn create() !Result { + pub fn create(target: std.Target) !Result { return Result{ .sections = .{ .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, }, - }; - } - - fn mmap(size: usize, flags: packed struct { - executable: bool, - }) ![]align(page_size) u8 { - return switch (@import("builtin").os.tag) { - .windows => blk: { - const windows = std.os.windows; - break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; - }, - .linux, .macos => |os_tag| blk: { - const jit = switch (os_tag) { - .macos => 0x800, - .linux => 0, - else => unreachable, - }; - const execute_flag: switch (os_tag) { - .linux => u32, - .macos => c_int, - else => unreachable, - } = if (flags.executable) std.os.PROT.EXEC else 0; - const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); - const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit; - - break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); - }, - else => @compileError("OS not supported"), + .target = target, }; } pub fn appendCode(image: *Result, code: []const u8) void { - std.debug.print("New code: ", .{}); - for (code) |byte| { - std.debug.print("0x{x} ", .{byte}); - } - std.debug.print("\n", .{}); const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; @memcpy(destination, code); image.sections.text.index += code.len; } pub fn appendCodeByte(image: *Result, code_byte: u8) void { - std.debug.print("New code: 0x{x}\n", .{code_byte}); image.sections.text.content[image.sections.text.index] = code_byte; image.sections.text.index += 1; } fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { + if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) { + data_structures.pthread_jit_write_protect_np(true); + } comptime { assert(@typeInfo(FunctionType) == .Fn); } @@ -90,6 +69,18 @@ pub const Result = struct { assert(image.sections.text.content.len > 0); return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); } + + fn writeElf(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void { + var writer = try elf.Writer.init(allocator); + try writer.writeToMemory(image); + try writer.writeToFile(executable_relative_path); + } + + fn writePe(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void { + var writer = try pe.Writer.init(allocator); + try writer.writeToMemory(image); + try writer.writeToFile(executable_relative_path); + } }; pub fn InstructionSelector(comptime Instruction: type) type { @@ -117,33 +108,45 @@ pub fn InstructionSelector(comptime Instruction: type) type { pub fn get(comptime arch: std.Target.Cpu.Arch) type { const backend = switch (arch) { .x86_64 => @import("x86_64.zig"), - else => @compileError("Architecture not supported"), + .aarch64 => @import("aarch64.zig"), + else => {}, }; return struct { - pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { - std.debug.print("Entry point: {}\n", .{intermediate.entry_point}); - var mir = try backend.MIR.generate(allocator, intermediate); - try mir.allocateRegisters(allocator, intermediate); - const result = try mir.encode(intermediate); + pub fn initialize(allocator: Allocator, intermediate: *ir.Result, descriptor: Compilation.Module.Descriptor) !void { + switch (arch) { + .x86_64 => { + var mir = try backend.MIR.selectInstructions(allocator, intermediate, descriptor.target); + try mir.allocateRegisters(); + const os = descriptor.target.os.tag; + _ = os; + const image = try mir.encode(); + _ = image; - const text_section = result.sections.text.content[0..result.sections.text.index]; - for (text_section) |byte| { - std.debug.print("0x{x}\n", .{byte}); - } - - switch (@import("builtin").os.tag) { - .linux => switch (@import("builtin").cpu.arch == arch) { - true => { - std.debug.print("Executing...\n", .{}); - const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn); - entryPoint(); - std.debug.print("This should not print...\n", .{}); - }, - false => {}, + // switch (os) { + // .linux => try image.writeElf(allocator, descriptor.executable_path), + // .windows => try image.writePe(allocator, descriptor.executable_path), + // else => unreachable, + // } + }, + else => { + const file = try std.fs.cwd().readFileAlloc(allocator, "main", std.math.maxInt(u64)); + try macho.interpretFile(allocator, descriptor, file); }, - else => {}, } + + // switch (@import("builtin").os.tag) { + // .linux => switch (@import("builtin").cpu.arch == arch) { + // true => { + // std.debug.print("Executing...\n", .{}); + // const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn); + // entryPoint(); + // std.debug.print("This should not print...\n", .{}); + // }, + // false => {}, + // }, + // else => {}, + // } } }; } diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index b396aa7..2b4fd0a 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -12,46 +12,50 @@ const ArrayList = data_structures.ArrayList; const BlockList = data_structures.BlockList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; const AutoHashMap = data_structures.AutoHashMap; +const StringKeyMap = data_structures.StringKeyMap; pub const Result = struct { blocks: BlockList(BasicBlock) = .{}, calls: BlockList(Call) = .{}, - functions: BlockList(Function) = .{}, + function_declarations: BlockList(Function.Declaration) = .{}, + function_definitions: BlockList(Function) = .{}, instructions: BlockList(Instruction) = .{}, jumps: BlockList(Jump) = .{}, loads: BlockList(Load) = .{}, phis: BlockList(Phi) = .{}, stores: BlockList(Store) = .{}, syscalls: BlockList(Syscall) = .{}, - values: BlockList(Value) = .{}, + arguments: BlockList(Argument) = .{}, + returns: BlockList(Return) = .{}, stack_references: BlockList(StackReference) = .{}, + string_literals: BlockList(StringLiteral) = .{}, + casts: BlockList(Cast) = .{}, + readonly_data: ArrayList(u8) = .{}, + module: *Module, entry_point: u32 = 0, + + pub fn getFunctionName(ir: *Result, function_index: Function.Declaration.Index) []const u8 { + return ir.module.getName(ir.module.function_name_map.get(@bitCast(function_index)).?).?; + } }; -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { - _ = main_file; - _ = package; - print("\nFunction count: {}\n", .{module.functions.len}); - +pub fn initialize(compilation: *Compilation, module: *Module) !*Result { var function_iterator = module.functions.iterator(); - var builder = Builder{ + const builder = try compilation.base_allocator.create(Builder); + builder.* = .{ .allocator = compilation.base_allocator, .module = module, }; + builder.ir.module = module; builder.ir.entry_point = module.entry_point orelse unreachable; while (function_iterator.next()) |sema_function| { const function_index = try builder.buildFunction(sema_function); - try builder.optimizeFunction(function_index); + _ = function_index; } - var ir_function_iterator = builder.ir.functions.iterator(); - while (ir_function_iterator.nextPointer()) |function| { - print("\n{}\n", .{function}); - } - - return builder.ir; + return &builder.ir; } pub const BasicBlock = struct { @@ -71,25 +75,20 @@ pub const BasicBlock = struct { basic_block.sealed = true; } -}; -pub const Instruction = union(enum) { - call: Call.Index, - jump: Jump.Index, - load: Load.Index, - phi: Phi.Index, - ret: Value.Index, - store: Store.Index, - syscall: Value.Index, - copy: Value.Index, - @"unreachable", - - pub const List = BlockList(@This()); - pub const Index = List.Index; + fn hasJump(basic_block: *BasicBlock, ir: *Result) bool { + if (basic_block.instructions.items.len > 0) { + const last_instruction = ir.instructions.get(basic_block.instructions.getLast()); + return switch (last_instruction.*) { + .jump => true, + else => false, + }; + } else return false; + } }; const Phi = struct { - value: Value.Index, + instruction: Instruction.Index, jump: Jump.Index, block: BasicBlock.Index, next: Phi.Index, @@ -105,27 +104,28 @@ pub const Jump = struct { }; const Syscall = struct { - arguments: ArrayList(Value.Index), + arguments: ArrayList(Instruction.Index), pub const List = BlockList(@This()); pub const Index = List.Index; }; -const Load = struct { - value: Value.Index, +pub const Load = struct { + instruction: Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; -const Store = struct { - source: Value.Index, - destination: Value.Index, +pub const Store = struct { + source: Instruction.Index, + destination: Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; pub const StackReference = struct { - size: u64, + type: Type, + count: usize = 1, alignment: u64, offset: u64, pub const List = BlockList(@This()); @@ -133,59 +133,242 @@ pub const StackReference = struct { }; pub const Call = struct { - function: Function.Index, + function: Function.Declaration.Index, + arguments: []const Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; }; -pub const Value = union(enum) { - integer: Compilation.Integer, - load: Load.Index, - call: Call.Index, - stack_reference: StackReference.Index, - phi: Phi.Index, - instruction: Instruction.Index, - syscall: Syscall.Index, +pub const Argument = struct { + type: Type, + // index: usize, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; - pub fn isInMemory(value: Value) bool { - return switch (value) { - .integer => false, - .load => true, - .call => true, - .stack_reference => true, - .phi => unreachable, - .instruction => unreachable, - .syscall => unreachable, +pub const Return = struct { + instruction: Instruction.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Copy = struct { + foo: u64 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Cast = struct { + value: Instruction.Index, + type: Type, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const CastType = enum { + sign_extend, +}; + +pub const Type = enum { + void, + noreturn, + i8, + i16, + i32, + i64, + + fn isInteger(t: Type) bool { + return switch (t) { + .i8, + .i16, + .i32, + .i64, + => true, + .void, + .noreturn, + => false, + }; + } + + pub fn getSize(t: Type) u64 { + return switch (t) { + .i8 => @sizeOf(i8), + .i16 => @sizeOf(i16), + .i32 => @sizeOf(i32), + .i64 => @sizeOf(i64), + .void, + .noreturn, + => unreachable, + }; + } + + pub fn getAlignment(t: Type) u64 { + return switch (t) { + .i8 => @alignOf(i8), + .i16 => @alignOf(i16), + .i32 => @alignOf(i32), + .i64 => @alignOf(i64), + .void, + .noreturn, + => unreachable, }; } }; +pub const Instruction = union(enum) { + call: Call.Index, + jump: Jump.Index, + load: Load.Index, + phi: Phi.Index, + ret: Return.Index, + store: Store.Index, + syscall: Syscall.Index, + copy: Instruction.Index, + @"unreachable", + argument: Argument.Index, + load_integer: Integer, + load_string_literal: StringLiteral.Index, + stack: StackReference.Index, + sign_extend: Cast.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const StringLiteral = struct { + offset: u32, + hash: u32, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + pub const Function = struct { + declaration: Declaration.Index = Declaration.Index.invalid, blocks: ArrayList(BasicBlock.Index) = .{}, - stack_map: AutoHashMap(Compilation.Declaration.Index, Value.Index) = .{}, + stack_map: AutoHashMap(Compilation.Declaration.Index, Instruction.Index) = .{}, current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, return_phi_node: Instruction.Index = Instruction.Index.invalid, return_phi_block: BasicBlock.Index = BasicBlock.Index.invalid, ir: *Result, current_stack_offset: usize = 0, + pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Declaration = struct { + definition: Function.Index = Function.Index.invalid, + arguments: AutoArrayHashMap(Compilation.Declaration.Index, Instruction.Index) = .{}, + calling_convention: Compilation.CallingConvention, + return_type: Type, + + pub const List = BlockList(@This()); + pub const Index = Declaration.List.Index; + }; + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try writer.writeAll("Function:\n"); + const function_index = function.declaration; + const sema_function_index: Compilation.Function.Index = @bitCast(function_index); + const function_name_hash = function.ir.module.function_name_map.get(sema_function_index).?; + const function_name = function.ir.module.getName(function_name_hash).?; + try writer.print("Function #{} \"{s}\"\n", .{ function_index.uniqueInteger(), function_name }); for (function.blocks.items, 0..) |block_index, function_block_index| { - try writer.print("#{}:\n", .{function_block_index}); + try writer.print("#{}: ({})\n", .{ function_block_index, block_index.uniqueInteger() }); const block = function.ir.blocks.get(block_index); for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { - try writer.print("%{}: ", .{block_instruction_index}); - const instruction = function.ir.instructions.get(instruction_index).*; - try writer.print("{s}", .{@tagName(instruction)}); + try writer.print("%{} (${}): ", .{ block_instruction_index, instruction_index.uniqueInteger() }); + const instruction = function.ir.instructions.get(instruction_index); + try writer.print("{s}", .{@tagName(instruction.*)}); + switch (instruction.*) { + .syscall => |syscall_index| { + const syscall = function.ir.syscalls.get(syscall_index); + try writer.writeAll(" ("); + for (syscall.arguments.items, 0..) |arg_index, i| { + const arg_value = function.ir.instructions.get(arg_index).*; + + try writer.print("${}: {s}", .{ i, @tagName(arg_value) }); + + if (i < syscall.arguments.items.len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(")"); + }, + .jump => |jump_index| { + const jump = function.ir.jumps.get(jump_index); + try writer.print(" ${}", .{jump.destination.uniqueInteger()}); + }, + .phi => {}, + .ret => |ret_index| { + const ret = function.ir.returns.get(ret_index); + switch (ret.instruction.valid) { + true => { + const ret_value = function.ir.instructions.get(ret.instruction).*; + try writer.print(" {s}", .{@tagName(ret_value)}); + }, + false => try writer.writeAll(" void"), + } + }, + // .load => |load_index| { + // const load = function.ir.loads.get(load_index); + // try writer.print(" {s}", .{@tagName(function.ir.values.get(load.value).*)}); + // }, + .store => |store_index| { + const store = function.ir.stores.get(store_index); + const source = function.ir.instructions.get(store.source).*; + const destination = function.ir.instructions.get(store.destination).*; + try writer.print(" {s}, {s}", .{ @tagName(destination), @tagName(source) }); + }, + .call => |call_index| { + const call = function.ir.calls.get(call_index); + try writer.print(" ${} {s}(", .{ call.function.uniqueInteger(), function.ir.getFunctionName(call.function) }); + for (call.arguments, 0..) |arg_index, i| { + const arg_value = function.ir.instructions.get(arg_index).*; + + try writer.print("${}: {s}", .{ i, @tagName(arg_value) }); + + if (i < call.arguments.len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(")"); + }, + .load_integer => |integer| { + try writer.print(" {s} (unsigned: 0x{x}, signed {})", .{ @tagName(integer.type), integer.value.unsigned, integer.value.unsigned }); + }, + .@"unreachable" => {}, + .load_string_literal => |string_literal_index| { + const string_literal = function.ir.string_literals.get(string_literal_index); + try writer.print(" at 0x{x}", .{string_literal.offset}); + }, + .stack => |stack_index| { + const stack = function.ir.stack_references.get(stack_index); + try writer.print(" offset: {}. size: {}. alignment: {}", .{ stack.offset, stack.type.getSize(), stack.alignment }); + }, + .argument => |argument_index| { + const argument = function.ir.arguments.get(argument_index); + try writer.print("${}, size: {}. alignment: {}", .{ argument_index, argument.type.getSize(), argument.type.getAlignment() }); + }, + .sign_extend => |cast_index| { + const cast = function.ir.casts.get(cast_index); + try writer.print(" {s} ${}", .{ @tagName(cast.type), cast.value.uniqueInteger() }); + }, + .load => |load_index| { + const load = function.ir.loads.get(load_index); + try writer.print(" ${}", .{load.instruction.uniqueInteger()}); + }, + else => |t| @panic(@tagName(t)), + } + try writer.writeByte('\n'); } + try writer.writeByte('\n'); } _ = options; @@ -195,51 +378,117 @@ pub const Function = struct { pub const Builder = struct { allocator: Allocator, - ir: Result = .{}, + ir: Result = .{ + .module = undefined, + }, module: *Module, current_function_index: Function.Index = Function.Index.invalid, fn currentFunction(builder: *Builder) *Function { - return builder.ir.functions.get(builder.current_function_index); + return builder.ir.function_definitions.get(builder.current_function_index); } - fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !Function.Index { - const function_allocation = try builder.ir.functions.append(builder.allocator, .{ - .ir = &builder.ir, - }); - builder.current_function_index = function_allocation.index; - const function = function_allocation.ptr; - // TODO: arguments - function.current_basic_block = try builder.newBlock(); + fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !void { + const sema_prototype = builder.module.function_prototypes.get(builder.module.types.get(sema_function.prototype).function); + const function_declaration_allocation = try builder.ir.function_declarations.addOne(builder.allocator); + const function_declaration = function_declaration_allocation.ptr; + function_declaration.* = .{ + .calling_convention = sema_prototype.attributes.calling_convention, + .return_type = try builder.translateType(sema_prototype.return_type), + }; - const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); - const is_noreturn = return_type.* == .noreturn; - if (!is_noreturn) { - const exit_block = try builder.newBlock(); - const phi_instruction = try builder.appendToBlock(exit_block, .{ - .phi = Phi.Index.invalid, - }); - // phi.ptr.* = .{ - // .value = Value.Index.invalid, - // .jump = Jump.Index.invalid, - // .block = exit_block, - // .next = Phi.Index.invalid, - // }; - const ret = try builder.appendToBlock(exit_block, .{ - .ret = (try builder.ir.values.append(builder.allocator, .{ - .instruction = phi_instruction, - })).index, - }); - _ = ret; - function.return_phi_node = phi_instruction; - function.return_phi_block = exit_block; + const function_decl_name = builder.ir.getFunctionName(function_declaration_allocation.index); + + if (sema_prototype.arguments) |sema_arguments| { + try function_declaration.arguments.ensureTotalCapacity(builder.allocator, @intCast(sema_arguments.len)); + for (sema_arguments) |sema_argument_declaration_index| { + const sema_argument_declaration = builder.module.declarations.get(sema_argument_declaration_index); + const argument_allocation = try builder.ir.arguments.append(builder.allocator, .{ + .type = try builder.translateType(sema_argument_declaration.type), + }); + const value_allocation = try builder.ir.instructions.append(builder.allocator, .{ + .argument = argument_allocation.index, + }); + function_declaration.arguments.putAssumeCapacity(sema_argument_declaration_index, value_allocation.index); + } } - const sema_block = sema_function.getBodyBlock(builder.module); - try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); - builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + switch (sema_prototype.attributes.@"extern") { + true => {}, + false => { + const function_allocation = try builder.ir.function_definitions.append(builder.allocator, .{ + .ir = &builder.ir, + }); + const function = function_allocation.ptr; - return builder.current_function_index; + builder.current_function_index = function_allocation.index; + function.declaration = function_declaration_allocation.index; + + // TODO: arguments + function.current_basic_block = try builder.newBlock(); + + const return_type = builder.module.types.get(sema_prototype.return_type); + const is_noreturn = return_type.* == .noreturn; + if (std.mem.eql(u8, function_decl_name, "print")) { + print("WTDASDAS", .{}); + } + + if (!is_noreturn) { + const exit_block = try builder.newBlock(); + const phi_instruction = try builder.appendToBlock(exit_block, .{ + .phi = Phi.Index.invalid, + }); + // phi.ptr.* = .{ + // .value = Value.Index.invalid, + // .jump = Jump.Index.invalid, + // .block = exit_block, + // .next = Phi.Index.invalid, + // }; + const ret = try builder.appendToBlock(exit_block, .{ + .ret = (try builder.ir.returns.append(builder.allocator, .{ + .instruction = phi_instruction, + })).index, + }); + _ = ret; + function.return_phi_node = phi_instruction; + function.return_phi_block = exit_block; + } + + try function.stack_map.ensureUnusedCapacity(builder.allocator, @intCast(function_declaration.arguments.keys().len)); + + for (function_declaration.arguments.keys(), function_declaration.arguments.values()) |sema_argument_index, ir_argument_instruction_index| { + const ir_argument_instruction = builder.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = builder.ir.arguments.get(ir_argument_instruction.argument); + + const stack_reference = try builder.stackReference(.{ + .type = ir_argument.type, + .sema = sema_argument_index, + }); + + _ = try builder.store(.{ + .source = ir_argument_instruction_index, + .destination = stack_reference, + }); + } + + const sema_block = sema_function.getBodyBlock(builder.module); + try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); + + if (!is_noreturn and sema_block.reaches_end) { + if (!builder.ir.blocks.get(builder.currentFunction().current_basic_block).hasJump(&builder.ir)) { + _ = try builder.append(.{ + .jump = try builder.jump(.{ + .source = builder.currentFunction().current_basic_block, + .destination = builder.currentFunction().return_phi_block, + }), + }); + } + } + + builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + try builder.optimizeFunction(builder.currentFunction()); + }, + } } const BlockSearcher = struct { @@ -247,7 +496,7 @@ pub const Builder = struct { visited: AutoArrayHashMap(BasicBlock.Index, void) = .{}, }; - fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) ![]const BasicBlock.Index { + fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) !ArrayList(BasicBlock.Index) { var searcher = BlockSearcher{}; try searcher.to_visit.append(builder.allocator, first); try searcher.visited.put(builder.allocator, first, {}); @@ -257,44 +506,100 @@ pub const Builder = struct { const block_to_visit = builder.ir.blocks.get(block_index); const last_instruction_index = block_to_visit.instructions.items[block_to_visit.instructions.items.len - 1]; const last_instruction = builder.ir.instructions.get(last_instruction_index); - switch (last_instruction.*) { - .jump => |jump_index| { + const block_to_search = switch (last_instruction.*) { + .jump => |jump_index| blk: { const ir_jump = builder.ir.jumps.get(jump_index); assert(ir_jump.source.eq(block_index)); const new_block = ir_jump.destination; - if (searcher.visited.get(new_block) == null) { - try searcher.to_visit.append(builder.allocator, new_block); - try searcher.visited.put(builder.allocator, new_block, {}); + break :blk new_block; + }, + .call => |call_index| blk: { + const ir_call = builder.ir.calls.get(call_index); + const function_declaration_index = ir_call.function; + const function_declaration = builder.ir.function_declarations.get(function_declaration_index); + const function_definition_index = function_declaration.definition; + switch (function_definition_index.valid) { + true => { + const function = builder.ir.function_definitions.get(function_definition_index); + const first_block = function.blocks.items[0]; + break :blk first_block; + }, + false => continue, } }, - .@"unreachable", .ret => {}, + .@"unreachable", .ret, .store => continue, else => |t| @panic(@tagName(t)), + }; + + if (searcher.visited.get(block_to_search) == null) { + try searcher.to_visit.append(builder.allocator, block_to_search); + try searcher.visited.put(builder.allocator, block_to_search, {}); } } - return searcher.visited.keys(); + var list = try ArrayList(BasicBlock.Index).initCapacity(builder.allocator, searcher.visited.keys().len); + list.appendSliceAssumeCapacity(searcher.visited.keys()); + + return list; } - fn optimizeFunction(builder: *Builder, function_index: Function.Index) !void { - const function = builder.ir.functions.get(function_index); - const reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); + fn optimizeFunction(builder: *Builder, function: *Function) !void { + // HACK + print("\n[BEFORE OPTIMIZE]:\n{}", .{function}); + var reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); var did_something = true; while (did_something) { did_something = false; - for (reachable_blocks) |basic_block_index| { + for (reachable_blocks.items) |basic_block_index| { const basic_block = builder.ir.blocks.get(basic_block_index); for (basic_block.instructions.items) |instruction_index| { - did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks, instruction_index); + did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks.items, instruction_index); did_something = did_something or try builder.removeTrivialPhis(instruction_index); const copy = try builder.removeCopyReferences(instruction_index); did_something = did_something or copy; } + + if (basic_block.instructions.items.len > 0) { + const instruction = builder.ir.instructions.get(basic_block.instructions.getLast()); + switch (instruction.*) { + .jump => |jump_index| { + const jump_instruction = builder.ir.jumps.get(jump_index); + const source = basic_block_index; + assert(source.eq(jump_instruction.source)); + const destination = jump_instruction.destination; + + const source_index = for (function.blocks.items, 0..) |bi, index| { + if (source.eq(bi)) break index; + } else unreachable; + const destination_index = for (function.blocks.items, 0..) |bi, index| { + if (destination.eq(bi)) break index; + } else unreachable; + + if (destination_index == source_index + 1) { + const destination_block = builder.ir.blocks.get(destination); + _ = basic_block.instructions.pop(); + try basic_block.instructions.appendSlice(builder.allocator, destination_block.instructions.items); + _ = function.blocks.orderedRemove(destination_index); + const reachable_index = for (reachable_blocks.items, 0..) |bi, index| { + if (destination.eq(bi)) break index; + } else unreachable; + _ = reachable_blocks.swapRemove(reachable_index); + did_something = true; + break; + } + }, + .ret, .@"unreachable", .call => {}, + else => |t| @panic(@tagName(t)), + } + } else { + unreachable; + } } } var instructions_to_delete = ArrayList(u32){}; - for (reachable_blocks) |basic_block_index| { + for (reachable_blocks.items) |basic_block_index| { instructions_to_delete.clearRetainingCapacity(); const basic_block = builder.ir.blocks.get(basic_block_index); for (basic_block.instructions.items, 0..) |instruction_index, index| { @@ -310,6 +615,8 @@ pub const Builder = struct { _ = basic_block.instructions.orderedRemove(instruction_to_delete - deleted_instruction_count); } } + + print("[AFTER OPTIMIZE]:\n{}", .{function}); } fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { @@ -344,21 +651,21 @@ pub const Builder = struct { const instruction = builder.ir.instructions.get(instruction_index); return switch (instruction.*) { .phi => |phi_index| blk: { - const trivial_phi: ?Value.Index = trivial_blk: { - var only_value = Value.Index.invalid; + const trivial_phi: ?Instruction.Index = trivial_blk: { + var only_value = Instruction.Index.invalid; var it = phi_index; while (it.valid) { const phi = builder.ir.phis.get(it); - const phi_value = builder.ir.values.get(phi.value); + const phi_value = builder.ir.instructions.get(phi.instruction); if (phi_value.* == .phi) unreachable; // TODO: undefined if (only_value.valid) { - if (!only_value.eq(phi.value)) { + if (!only_value.eq(phi.instruction)) { break :trivial_blk null; } } else { - only_value = phi.value; + only_value = phi.instruction; } it = phi.next; @@ -379,7 +686,10 @@ pub const Builder = struct { }; } } else { - unreachable; + print("TODO: maybe this phi removal is wrong?", .{}); + instruction.* = .{ + .copy = trivial_value, + }; } } @@ -396,22 +706,21 @@ pub const Builder = struct { else => { var did_something = false; - const operands: []const *Value.Index = switch (instruction.*) { - .jump, .@"unreachable" => &.{}, - .ret => &.{&instruction.ret}, + const operands: []const *Instruction.Index = switch (instruction.*) { + .jump, .@"unreachable", .load_integer, .load_string_literal, .stack, .argument => &.{}, + .ret => &.{&builder.ir.returns.get(instruction.ret).instruction}, // TODO: arguments .call => blk: { - var list = ArrayList(*Value.Index){}; + var list = ArrayList(*Instruction.Index){}; break :blk list.items; }, .store => |store_index| blk: { const store_instr = builder.ir.stores.get(store_index); break :blk &.{ &store_instr.source, &store_instr.destination }; }, - .syscall => |syscall_value_index| blk: { - const syscall_value = builder.ir.values.get(syscall_value_index); - const syscall = builder.ir.syscalls.get(syscall_value.syscall); - var list = ArrayList(*Value.Index){}; + .syscall => |syscall_index| blk: { + const syscall = builder.ir.syscalls.get(syscall_index); + var list = ArrayList(*Instruction.Index){}; try list.ensureTotalCapacity(builder.allocator, syscall.arguments.items.len); for (syscall.arguments.items) |*arg| { list.appendAssumeCapacity(arg); @@ -419,24 +728,38 @@ pub const Builder = struct { break :blk list.items; }, + .sign_extend => |cast_index| blk: { + const cast = builder.ir.casts.get(cast_index); + break :blk &.{&cast.value}; + }, + .load => |load_index| blk: { + const load = builder.ir.loads.get(load_index); + break :blk &.{&load.instruction}; + }, else => |t| @panic(@tagName(t)), }; - for (operands) |operand_value_index| { - const operand_value = builder.ir.values.get(operand_value_index.*); - switch (operand_value.*) { - .instruction => |operand_instruction_index| { - const operand_instruction = builder.ir.instructions.get(operand_instruction_index); - switch (operand_instruction.*) { + for (operands) |operand_instruction_index_pointer| { + switch (operand_instruction_index_pointer.valid) { + true => { + const operand_value = builder.ir.instructions.get(operand_instruction_index_pointer.*); + switch (operand_value.*) { .copy => |copy_value| { - operand_value_index.* = copy_value; + operand_instruction_index_pointer.* = copy_value; did_something = true; }, + .load_integer, + .stack, + .call, + .argument, + .syscall, + .sign_extend, + .load, + => {}, else => |t| @panic(@tagName(t)), } }, - .integer, .stack_reference, .call => {}, - else => |t| @panic(@tagName(t)), + false => {}, } } @@ -456,7 +779,95 @@ pub const Builder = struct { emit_exit_block: bool = true, }; - fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) error{OutOfMemory}!void { + fn emitSyscallArgument(builder: *Builder, sema_syscall_argument_value_index: Compilation.Value.Index) !Instruction.Index { + const sema_syscall_argument_value = builder.module.values.get(sema_syscall_argument_value_index); + return switch (sema_syscall_argument_value.*) { + .integer => |integer| try builder.processInteger(integer), + .sign_extend => |cast_index| try builder.processCast(cast_index, .sign_extend), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + } + + fn processCast(builder: *Builder, sema_cast_index: Compilation.Cast.Index, cast_type: CastType) !Instruction.Index { + const sema_cast = builder.module.casts.get(sema_cast_index); + const sema_source_value = builder.module.values.get(sema_cast.value); + const source_value = switch (sema_source_value.*) { + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + + const cast_allocation = try builder.ir.casts.append(builder.allocator, .{ + .value = source_value, + .type = try builder.translateType(sema_cast.type), + }); + + const result = try builder.append(@unionInit(Instruction, switch (cast_type) { + inline else => |ct| @tagName(ct), + }, cast_allocation.index)); + + return result; + } + + fn processDeclarationReferenceRaw(builder: *Builder, declaration_index: Compilation.Declaration.Index) !Instruction.Index { + const sema_declaration = builder.module.declarations.get(declaration_index); + const result = switch (sema_declaration.scope_type) { + .local => builder.currentFunction().stack_map.get(declaration_index).?, + .global => unreachable, + }; + return result; + } + + fn loadDeclarationReference(builder: *Builder, declaration_index: Compilation.Declaration.Index) !Instruction.Index { + const stack_instruction = try builder.processDeclarationReferenceRaw(declaration_index); + const load = try builder.ir.loads.append(builder.allocator, .{ + .instruction = stack_instruction, + }); + return try builder.append(.{ + .load = load.index, + }); + } + + fn processInteger(builder: *Builder, integer_value: Compilation.Value.Integer) !Instruction.Index { + const integer = Integer{ + .value = .{ + .unsigned = integer_value.value, + }, + .type = try builder.translateType(integer_value.type), + }; + assert(integer.type.isInteger()); + const load_integer = try builder.append(.{ + .load_integer = integer, + }); + return load_integer; + } + + fn processSyscall(builder: *Builder, sema_syscall_index: Compilation.Syscall.Index) anyerror!Instruction.Index { + const sema_syscall = builder.module.syscalls.get(sema_syscall_index); + var arguments = try ArrayList(Instruction.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); + + const sema_syscall_number = sema_syscall.number; + assert(sema_syscall_number.valid); + const number_value_index = try builder.emitSyscallArgument(sema_syscall_number); + + arguments.appendAssumeCapacity(number_value_index); + + for (sema_syscall.getArguments()) |sema_syscall_argument| { + assert(sema_syscall_argument.valid); + const argument_value_index = try builder.emitSyscallArgument(sema_syscall_argument); + arguments.appendAssumeCapacity(argument_value_index); + } + + // TODO: undo this mess + const syscall_allocation = try builder.ir.syscalls.append(builder.allocator, .{ + .arguments = arguments, + }); + + const instruction_index = try builder.append(.{ .syscall = syscall_allocation.index }); + return instruction_index; + } + + fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) anyerror!void { for (sema_block.statements.items) |sema_statement_index| { const sema_statement = builder.module.values.get(sema_statement_index); switch (sema_statement.*) { @@ -514,37 +925,13 @@ pub const Builder = struct { builder.currentFunction().current_basic_block = loop_prologue_block; } }, - .syscall => |syscall_index| { - const sema_syscall = builder.module.syscalls.get(syscall_index); - var arguments = try ArrayList(Value.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); - - const sema_syscall_number = sema_syscall.number; - assert(sema_syscall_number.valid); - const number_value_index = try builder.emitValue(sema_syscall_number); - - arguments.appendAssumeCapacity(number_value_index); - - for (sema_syscall.getArguments()) |sema_syscall_argument| { - assert(sema_syscall_argument.valid); - var argument_value_index = try builder.emitValue(sema_syscall_argument); - arguments.appendAssumeCapacity(argument_value_index); - } - - // TODO: undo this mess - _ = try builder.append(.{ - .syscall = (try builder.ir.values.append(builder.allocator, .{ - .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ - .arguments = arguments, - })).index, - })).index, - }); - }, + .syscall => |sema_syscall_index| _ = try builder.processSyscall(sema_syscall_index), .@"unreachable" => _ = try builder.append(.{ .@"unreachable" = {}, }), .@"return" => |sema_ret_index| { const sema_ret = builder.module.returns.get(sema_ret_index); - const return_value = try builder.emitValue(sema_ret.value); + const return_value = try builder.emitReturnValue(sema_ret.value); const phi_instruction = builder.ir.instructions.get(builder.currentFunction().return_phi_node); const phi = switch (phi_instruction.phi.valid) { true => unreachable, @@ -557,9 +944,8 @@ pub const Builder = struct { false => builder.currentFunction().return_phi_block, }, }); - print("Previous phi: {}\n", .{phi_instruction.phi}); phi_instruction.phi = (try builder.ir.phis.append(builder.allocator, .{ - .value = return_value, + .instruction = return_value, .jump = exit_jump, .next = phi_instruction.phi, .block = phi.block, @@ -571,49 +957,83 @@ pub const Builder = struct { }, .declaration => |sema_declaration_index| { const sema_declaration = builder.module.declarations.get(sema_declaration_index); + print("Name: {s}\n", .{builder.module.getName(sema_declaration.name).?}); assert(sema_declaration.scope_type == .local); - const sema_init_value = builder.module.values.get(sema_declaration.init_value); - const declaration_type = builder.module.types.get(sema_init_value.getType(builder.module)); - const size = declaration_type.getSize(); - const alignment = declaration_type.getAlignment(); - const stack_offset = switch (size > 0) { - true => builder.allocateStack(size, alignment), - false => 0, - }; - var value_index = try builder.emitValue(sema_declaration.init_value); - const value = builder.ir.values.get(value_index); - print("Value: {}\n", .{value.*}); - value_index = switch (value.isInMemory()) { - false => try builder.load(value_index), - true => value_index, - }; + const declaration_type = builder.module.types.get(sema_declaration.type); + switch (declaration_type.*) { + .comptime_int => unreachable, + else => { + var value_index = try builder.emitDeclarationInitValue(sema_declaration.init_value); + const value = builder.ir.instructions.get(value_index); + value_index = switch (value.*) { + .load_integer, + .call, + => value_index, + // .call => try builder.load(value_index), + else => |t| @panic(@tagName(t)), + }; - if (stack_offset > 0) { - _ = try builder.store(.{ - .source = value_index, - .destination = try builder.stackReference(stack_offset, declaration_type.*, sema_declaration_index), - }); + const ir_type = try builder.translateType(sema_declaration.type); + _ = try builder.store(.{ + .source = value_index, + .destination = try builder.stackReference(.{ + .type = ir_type, + .sema = sema_declaration_index, + }), + }); + }, } }, + .call => |sema_call_index| _ = try builder.processCall(sema_call_index), else => |t| @panic(@tagName(t)), } } } - fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, sema_declaration: Compilation.Declaration.Index) !Value.Index { + fn emitDeclarationInitValue(builder: *Builder, declaration_init_value_index: Compilation.Value.Index) !Instruction.Index { + const declaration_init_value = builder.module.values.get(declaration_init_value_index); + return switch (declaration_init_value.*) { + .call => |call_index| try builder.processCall(call_index), + .integer => |integer| try builder.processInteger(integer), + else => |t| @panic(@tagName(t)), + }; + } + + fn emitReturnValue(builder: *Builder, return_value_index: Compilation.Value.Index) !Instruction.Index { + const return_value = builder.module.values.get(return_value_index); + return switch (return_value.*) { + .syscall => |syscall_index| try builder.processSyscall(syscall_index), + .integer => |integer| try builder.processInteger(integer), + .call => |call_index| try builder.processCall(call_index), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + } + + fn stackReference(builder: *Builder, arguments: struct { + type: Type, + sema: Compilation.Declaration.Index, + alignment: ?u64 = null, + }) !Instruction.Index { + const size = arguments.type.getSize(); + assert(size > 0); + const alignment = if (arguments.alignment) |a| a else arguments.type.getAlignment(); + builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); + builder.currentFunction().current_stack_offset += size; + const stack_offset = builder.currentFunction().current_stack_offset; const stack_reference_allocation = try builder.ir.stack_references.append(builder.allocator, .{ .offset = stack_offset, - .size = t.getSize(), - .alignment = t.getAlignment(), + .type = arguments.type, + .alignment = alignment, }); - const value_allocation = try builder.ir.values.append(builder.allocator, .{ - .stack_reference = stack_reference_allocation.index, + const instruction_index = try builder.append(.{ + .stack = stack_reference_allocation.index, }); - try builder.currentFunction().stack_map.put(builder.allocator, sema_declaration, value_allocation.index); + try builder.currentFunction().stack_map.put(builder.allocator, arguments.sema, instruction_index); - return value_allocation.index; + return instruction_index; } fn store(builder: *Builder, descriptor: Store) !void { @@ -623,78 +1043,123 @@ pub const Builder = struct { }); } - fn allocateStack(builder: *Builder, size: u64, alignment: u64) u64 { - builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); - builder.currentFunction().current_stack_offset += size; - return builder.currentFunction().current_stack_offset; + fn emitCallArgument(builder: *Builder, call_argument_value_index: Compilation.Value.Index) !Instruction.Index { + const call_argument_value = builder.module.values.get(call_argument_value_index); + return switch (call_argument_value.*) { + .integer => |integer| try builder.processInteger(integer), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + .string_literal => |string_literal_index| try builder.processStringLiteral(string_literal_index), + else => |t| @panic(@tagName(t)), + }; } - fn load(builder: *Builder, value_index: Value.Index) !Value.Index { - print("Doing load!\n", .{}); + fn processCall(builder: *Builder, sema_call_index: Compilation.Call.Index) anyerror!Instruction.Index { + const sema_call = builder.module.calls.get(sema_call_index); + const sema_argument_list_index = sema_call.arguments; + const argument_list: []const Instruction.Index = switch (sema_argument_list_index.valid) { + true => blk: { + var argument_list = ArrayList(Instruction.Index){}; + const sema_argument_list = builder.module.argument_lists.get(sema_argument_list_index); + try argument_list.ensureTotalCapacity(builder.allocator, sema_argument_list.array.items.len); + for (sema_argument_list.array.items) |sema_argument_value_index| { + const argument_value_index = try builder.emitCallArgument(sema_argument_value_index); + argument_list.appendAssumeCapacity(argument_value_index); + } + break :blk argument_list.items; + }, + false => &.{}, + }; - const load_allocation = try builder.ir.loads.append(builder.allocator, .{ - .value = value_index, + const call_index = try builder.call(.{ + .function = switch (builder.module.values.get(sema_call.value).*) { + .function => |function_index| .{ + .index = function_index.index, + .block = function_index.block, + }, + else => |t| @panic(@tagName(t)), + }, + .arguments = argument_list, }); + const instruction_index = try builder.append(.{ - .load = load_allocation.index, + .call = call_index, }); - _ = instruction_index; - const result = try builder.ir.values.append(builder.allocator, .{ - .load = load_allocation.index, - }); - return result.index; + + return instruction_index; } - fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Value.Index { - const sema_value = builder.module.values.get(sema_value_index).*; - return switch (sema_value) { + fn processStringLiteral(builder: *Builder, string_literal_hash: u32) !Instruction.Index { + const string_literal = builder.module.string_literals.getValue(string_literal_hash).?; + + const readonly_offset = builder.ir.readonly_data.items.len; + try builder.ir.readonly_data.appendSlice(builder.allocator, string_literal); + try builder.ir.readonly_data.append(builder.allocator, 0); + + const string_literal_allocation = try builder.ir.string_literals.append(builder.allocator, .{ + .offset = @intCast(readonly_offset), + .hash = string_literal_hash, + }); + const result = try builder.append(.{ + .load_string_literal = string_literal_allocation.index, + }); + + return result; + } + + // fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Instruction.Index { + // const sema_value = builder.module.values.get(sema_value_index).*; + // return switch (sema_value) { + // .integer => |integer| try builder.append(.{ + // .integer = integer, + // }), + // .call => |sema_call_index| try builder.processCall(sema_call_index), + // .declaration_reference => |sema_declaration_reference| blk: { + // }, + // .syscall => |sema_syscall_index| try builder.processSyscall(sema_syscall_index), + // .string_literal => |string_literal_hash| blk: { + // const string_literal = builder.module.string_literals.getValue(string_literal_hash).?; + // + // const readonly_offset = builder.ir.readonly_data.items.len; + // try builder.ir.readonly_data.appendSlice(builder.allocator, string_literal); + // + // const string_literal_allocation = try builder.ir.string_literals.append(builder.allocator, .{ + // .offset = @intCast(readonly_offset), + // .hash = string_literal_hash, + // }); + // break :blk try builder.append(.{ + // .string_literal = string_literal_allocation.index, + // }); + // }, + // .sign_extend => |sema_cast_index| blk: { + // const sema_sign_extend = builder.module.casts.get(sema_cast_index); + // + // const sign_extend = try builder.ir.casts.append(builder.allocator, .{ + // .value = try builder.emitValue(sema_sign_extend.value), + // .type = try builder.translateType(sema_sign_extend.type), + // }); + // + // break :blk try builder.append(.{ + // .sign_extend = sign_extend.index, + // }); + // }, + // else => |t| @panic(@tagName(t)), + // }; + // } + + fn translateType(builder: *Builder, type_index: Compilation.Type.Index) !Type { + const sema_type = builder.module.types.get(type_index); + return switch (sema_type.*) { + .integer => |integer| switch (integer.bit_count) { + 8 => .i8, + 16 => .i16, + 32 => .i32, + 64 => .i64, + else => unreachable, + }, // TODO - .integer => |integer| (try builder.ir.values.append(builder.allocator, .{ - .integer = integer, - })).index, - .call => |sema_call_index| { - const sema_call = builder.module.calls.get(sema_call_index); - const argument_list_index = sema_call.arguments; - if (argument_list_index.valid) { - unreachable; - } - - const call_index = try builder.call(.{ - .function = switch (builder.module.values.get(sema_call.value).*) { - .function => |function_index| .{ - .index = function_index.index, - .block = function_index.block, - }, - else => |t| @panic(@tagName(t)), - }, - }); - - _ = try builder.append(.{ - .call = call_index, - }); - - const value_allocation = try builder.ir.values.append(builder.allocator, .{ - .call = call_index, - }); - - return value_allocation.index; - }, - .declaration_reference => |sema_declaration_index| { - const sema_declaration = builder.module.declarations.get(sema_declaration_index); - const sema_init_value = builder.module.values.get(sema_declaration.init_value); - const init_type = sema_init_value.getType(builder.module); - _ = init_type; - switch (sema_declaration.scope_type) { - .local => { - const stack_reference = builder.currentFunction().stack_map.get(sema_declaration_index).?; - return stack_reference; - }, - .global => unreachable, - } - // switch (sema_declaration.*) { - // else => |t| @panic(@tagName(t)), - // } - }, + .pointer => .i64, + .void => .void, + .noreturn => .noreturn, else => |t| @panic(@tagName(t)), }; } @@ -707,6 +1172,7 @@ pub const Builder = struct { fn jump(builder: *Builder, descriptor: Jump) !Jump.Index { const destination_block = builder.ir.blocks.get(descriptor.destination); assert(!destination_block.sealed); + assert(descriptor.source.valid); const jump_allocation = try builder.ir.jumps.append(builder.allocator, descriptor); return jump_allocation.index; } @@ -719,9 +1185,6 @@ pub const Builder = struct { } fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { - if (instruction == .phi) { - print("Adding phi: {}\n", .{instruction}); - } const instruction_allocation = try builder.ir.instructions.append(builder.allocator, instruction); try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_allocation.index); @@ -730,12 +1193,19 @@ pub const Builder = struct { fn newBlock(builder: *Builder) !BasicBlock.Index { const new_block_allocation = try builder.ir.blocks.append(builder.allocator, .{}); - const current_function = builder.ir.functions.get(builder.current_function_index); + const current_function = builder.currentFunction(); const function_block_index = current_function.blocks.items.len; + _ = function_block_index; try current_function.blocks.append(builder.allocator, new_block_allocation.index); - print("Adding block: {}\n", .{function_block_index}); - return new_block_allocation.index; } }; + +pub const Integer = struct { + value: extern union { + signed: i64, + unsigned: u64, + }, + type: Type, +}; diff --git a/src/backend/macho.zig b/src/backend/macho.zig new file mode 100644 index 0000000..e5f9bf9 --- /dev/null +++ b/src/backend/macho.zig @@ -0,0 +1,682 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const print = std.debug.print; + +const Compilation = @import("../Compilation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const mmap = data_structures.mmap; + +const Header = extern struct { + magic: u32 = magic, + cpu_type: CpuType, + cpu_subtype: extern union { + arm: ArmSubType, + x86: X86SubType, + }, + file_type: FileType, + load_command_count: u32, + load_command_size: u32, + flags: Flags, + reserved: u32 = 0, + + const magic = 0xfeedfacf; + + const CpuType = enum(u32) { + VAX = 0x00000001, + ROMP = 0x00000002, + NS32032 = 0x00000004, + NS32332 = 0x00000005, + MC680x0 = 0x00000006, + x86 = 0x00000007, + MIPS = 0x00000008, + NS32352 = 0x00000009, + MC98000 = 0x0000000A, + HPPA = 0x0000000B, + ARM = 0x0000000C, + MC88000 = 0x0000000D, + SPARC = 0x0000000E, + i860be = 0x0000000F, + i860_le = 0x00000010, + RS6000 = 0x00000011, + PowerPC = 0x00000012, + arm64 = 0x0000000C | abi64, + x86_64 = 0x00000007 | abi64, + + const abi64 = 0x01000000; + }; + + const ArmSubType = enum(u32) { + all = 0x00000000, + ARM_A500_ARCH = 0x00000001, + ARM_A500 = 0x00000002, + ARM_A440 = 0x00000003, + ARM_M4 = 0x00000004, + ARM_V4T = 0x00000005, + ARM_V6 = 0x00000006, + ARM_V5TEJ = 0x00000007, + ARM_XSCALE = 0x00000008, + ARM_V7 = 0x00000009, + ARM_V7F = 0x0000000A, + ARM_V7S = 0x0000000B, + ARM_V7K = 0x0000000C, + ARM_V8 = 0x0000000D, + ARM_V6M = 0x0000000E, + ARM_V7M = 0x0000000F, + ARM_V7EM = 0x00000010, + _, + }; + + const X86SubType = enum(u32) { + All = 0x00000003, + @"486" = 0x00000004, + @"486SX" = 0x00000084, + Pentium_M5 = 0x00000056, + Celeron = 0x00000067, + Celeron_Mobile = 0x00000077, + Pentium_3 = 0x00000008, + Pentium_3_M = 0x00000018, + Pentium_3_XEON = 0x00000028, + Pentium_4 = 0x0000000A, + Itanium = 0x0000000B, + Itanium_2 = 0x0000001B, + XEON = 0x0000000C, + XEON_MP = 0x0000001C, + _, + }; + + const FileType = enum(u32) { + relocatable_object = 0x00000001, + demand_paged_executable = 0x00000002, + fixed_vm_shared_library = 0x00000003, + core = 0x00000004, + preloaded_executable = 0x00000005, + dynamic_shared_library = 0x00000006, + dynamic_link_editor = 0x00000007, + dynamic_bundle = 0x00000008, + shared_library_stub = 0x00000009, + debug_companion = 0x0000000A, + x86_64_kext = 0x0000000B, + archive = 0x0000000C, + }; + + const Flags = packed struct(u32) { + no_undefined_references: bool = true, + incrementally_linked: bool = false, + dynamic_linker_input: bool = true, + dynamic_linker_bound_undefined_references: bool = false, + prebound_dynamic_undefined_references: bool = false, + split_ro_and_rw_segments: bool = false, + _: bool = false, + two_level_namespace_bindings: bool = true, + no_symbol_multiple_definition_in_subimages: bool = false, + no_dyld_prebinding_agent_notification: bool = false, + can_redo_prebinding: bool = false, + bind_two_level_namespaces_to_libraries: bool = false, + safe_to_split_sections_for_dead_code_stripping: bool = false, + canonicalized_by_unprebinding: bool = false, + final_external_weak_symbols: bool = false, + final_weak_symbols: bool = false, + all_stacks_execute_protected: bool = false, + safe_for_zero_uid: bool = false, + safe_for_ugid: bool = false, + no_check_dependent_dylibs_for_reexport: bool = false, + load_at_random_address: bool = false, + no_load_command_for_unreferenced_dylib: bool = true, + thread_local_variable_section: bool = false, + run_with_non_executable_heap: bool = false, + code_linked_for_application_use: bool = false, + nlist_external_symbols_not_all_dyld_info_symbols: bool = false, + allow_lc_min_version_macos_lc_build_version: bool = false, + reserved: u4 = 0, + dylib_only: bool = false, + }; +}; + +const UniversalHeader = extern struct { + magic: u32 = magic, + binary_count: u32, + + const magic = 0xcafebabe; +}; + +const LoadCommand = extern struct { + type: Type, + size: u32, + + const Type = enum(u32) { + segment32 = 0x01, + symbol_table = 0x02, + symbol_table_information = 0x0b, + load_dylib = 0x0c, + id_dylib = 0x0d, + load_dylinker = 0x0e, + id_dylinker = 0x0f, + optional_dynamic_library = 0x18, + segment64 = 0x19, + uuid_number = 0x1b, + code_signature = 0x1d, + compressed_linkedit_table = 0x22, + function_starts = 0x26, + data_in_code = 0x29, + source_version = 0x2a, + minimum_os_version = 0x32, + dyld_exports_trie = 0x80000033, + dyld_chained_fixups = 0x80000034, + dyld_main_entry_point = 0x80000028, + }; + + const Segment64 = extern struct { + type: Type = .segment64, + size: u32, + name: [16]u8, + address: u64, + address_size: u64, + file_offset: u64, + file_size: u64, + maximum_virtual_memory_protections: VirtualMemoryProtection, + initial_virtual_memory_protections: VirtualMemoryProtection, + section_count: u32, + flags: Flags, + + const VirtualMemoryProtection = packed struct(u32) { + read: bool, + write: bool, + execute: bool, + reserved: u29 = 0, + }; + + const Flags = packed struct(u32) { + vm_space_high_part: bool = false, + vm_fixed_library: bool = false, + no_relocation: bool = false, + protected_segment: bool = false, + read_only_after_relocations: bool = false, + reserved: u27 = 0, + }; + + const Section = extern struct { + name: [16]u8, + segment_name: [16]u8, + address: u64, + size: u64, + file_offset: u32, + alignment: u32, + relocation_file_offset: u32, + relocation_count: u32, + type: Section.Type, + reserved: u8 = 0, + flags: Section.Flags, + reserved0: u32 = 0, + reserved1: u32 = 0, + reserved2: u32 = 0, + + comptime { + assert(@sizeOf(Section) == 80); + } + + const Type = enum(u8) { + regular = 0, + only_non_lazy_symbol_pointers = 0b110, + only_lazy_symbol_pointers_only_symbol_stubs = 0b111, + zero_fill_on_demand_section = 0b1100, + only_lazy_pointers_to_lazy_loaded_dylibs = 0b10000, + }; + + const Flags = packed struct(u16) { + local_relocations: bool = false, + external_relocations: bool = false, + some_machine_instructions: bool = false, + reserved: u5 = 0, + reserved2: u1 = 0, + debug_section: bool = false, + i386_code_stubs: bool = false, + live_blocks_if_reference_live_blocks: bool = false, + no_dead_stripping: bool = false, + strip_static_symbols_dyldlink_flag: bool = false, + coalesced_symbols: bool = false, + only_machine_instructions: bool = false, + }; + }; + + fn getSize(section_count: u32) u32 { + return @sizeOf(LoadCommand.Segment64) + section_count * @sizeOf(LoadCommand.Segment64.Section); + } + }; + + const LinkeditData = extern struct { + type: Type, + size: u32 = 16, + data_offset: u32, + data_size: u32, + }; + + const SymbolTable = extern struct { + type: Type, + size: u32 = 24, + symbol_offset: u32, + symbol_count: u32, + string_table_offset: u32, + string_table_size: u32, + }; + + const SymbolTableInformation = extern struct { + type: Type, + size: u32 = 80, + local_symbol_index: u32, + local_symbol_count: u32, + external_symbol_index: u32, + external_symbol_count: u32, + undefined_symbol_index: u32, + undefined_symbol_count: u32, + content_table_offset: u32, + content_table_entry_count: u32, + module_table_offset: u32, + module_table_entry_count: u32, + referenced_symbol_table_offset: u32, + referenced_symbol_table_entry_count: u32, + indirect_symbol_table_offset: u32, + indirect_symbol_table_entry_count: u32, + external_relocation_offset: u32, + external_relocation_entry_count: u32, + local_relocation_offset: u32, + local_relocation_entry_count: u32, + }; + + const Dylinker = extern struct { + type: Type, + size: u32, + name_offset: u32 = 12, + }; + + const Dylib = extern struct { + type: Type, + size: u32, + name_offset: u32, + timestamp: u32, + current_version: u32, + compatibility_version: u32, + }; + + const Uuid = extern struct { + type: Type, + size: u32, + uuid: [16]u8, + }; + + const MinimumVersion = extern struct { + type: Type, + size: u32, + version: u32, + sdk: u32, + }; + + const SourceVersion = extern struct { + type: Type, + size: u32, + version: u64, + }; + + const EntryPoint = extern struct { + type: Type, + size: u32, + entry_offset: u64, + stack_size: u64, + }; +}; + +const Writer = struct { + items: []u8, + index: usize = 0, + address_offset: usize = 0, + file_offset: usize = 0, + load_command_size: u32, + segment_count: u16, + segment_index: u16 = 0, + segment_offset: u16 = @sizeOf(Header), + linkedit_segment_address_offset: u64 = 0, + linkedit_segment_file_offset: u64 = 0, + linkedit_segment_size: u32 = 0, + + fn getWrittenBytes(writer: *const Writer) []const u8 { + return writer.items[0..writer.index]; + } + + fn append(writer: *Writer, bytes: []const u8) void { + writer.writeBytesAt(bytes, writer.index); + writer.index += bytes.len; + } + + fn writeBytesAt(writer: *Writer, bytes: []const u8, offset: usize) void { + @memcpy(writer.items[offset..][0..bytes.len], bytes); + } + + const SegmentCreation = struct { + name: []const u8, + sections: []const SectionCreation, + protection: LoadCommand.Segment64.VirtualMemoryProtection, + }; + + const SectionCreation = struct { + name: []const u8, + bytes: []const u8, + alignment: u32 = 1, + flags: LoadCommand.Segment64.Section.Flags, + }; + + fn writeSegment(writer: *Writer, descriptor: SegmentCreation) void { + assert(writer.segment_index < writer.segment_count); + defer writer.segment_index += 1; + + const segment_name = blk: { + var result = [1]u8{0} ** 16; + @memcpy(result[0..descriptor.name.len], descriptor.name); + break :blk result; + }; + + if (equal(u8, descriptor.name, "__PAGEZERO")) { + assert(writer.segment_offset == @sizeOf(Header)); + const address_size = 4 * 1024 * 1024 * 1024; + writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64{ + .size = @sizeOf(LoadCommand.Segment64), + .name = segment_name, + .address = 0, + .address_size = address_size, + .file_offset = 0, + .file_size = 0, + .maximum_virtual_memory_protections = descriptor.protection, + .initial_virtual_memory_protections = descriptor.protection, + .section_count = @intCast(descriptor.sections.len), + .flags = .{}, + }), writer.segment_offset); + + writer.address_offset += address_size; + writer.segment_offset += @sizeOf(LoadCommand.Segment64); + } else if (equal(u8, descriptor.name, "__TEXT")) { + const original_offset = writer.segment_offset; + assert(original_offset == @sizeOf(Header) + @sizeOf(LoadCommand.Segment64)); + writer.segment_offset += @sizeOf(LoadCommand.Segment64); + + const text_metadata_offset = @sizeOf(Header) + writer.load_command_size; + var section_address_offset = writer.address_offset + text_metadata_offset; + var section_file_offset = writer.file_offset + text_metadata_offset; + + for (descriptor.sections) |section| { + section_address_offset = std.mem.alignForward(usize, section_address_offset, section.alignment); + section_file_offset = std.mem.alignForward(usize, section_file_offset, section.alignment); + + writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64.Section{ + .name = blk: { + var result = [1]u8{0} ** 16; + @memcpy(result[0..section.name.len], section.name); + break :blk result; + }, + .segment_name = segment_name, + .address = section_address_offset, + .size = section.bytes.len, + .file_offset = @intCast(section_file_offset), + .alignment = std.math.log2(section.alignment), + .relocation_file_offset = 0, + .relocation_count = 0, + .type = .regular, + .flags = section.flags, + }), writer.segment_offset); + + @memcpy(writer.items[section_file_offset..][0..section.bytes.len], section.bytes); + + section_address_offset += section.bytes.len; + section_file_offset += section.bytes.len; + + writer.segment_offset += @sizeOf(LoadCommand.Segment64.Section); + } + + const end_segment_offset = writer.segment_offset; + writer.segment_offset = original_offset; + + const size = end_segment_offset - writer.file_offset; + const aligned_size = std.mem.alignForward(usize, size, 16 * 1024); + + writer.append(std.mem.asBytes(&LoadCommand.Segment64{ + .size = @sizeOf(LoadCommand.Segment64), + .name = segment_name, + .address = writer.address_offset, + .address_size = aligned_size, + .file_offset = writer.file_offset, + .file_size = aligned_size, + .maximum_virtual_memory_protections = descriptor.protection, + .initial_virtual_memory_protections = descriptor.protection, + .section_count = @intCast(descriptor.sections.len), + .flags = .{}, + })); + + writer.segment_offset = end_segment_offset; + + writer.address_offset += aligned_size; + writer.file_offset += aligned_size; + } else { + unreachable; + } + } + + fn writeLinkeditData(writer: *Writer, bytes: []const u8, load_command_type: LoadCommand.Type) void { + if (writer.linkedit_segment_size == 0) { + writer.linkedit_segment_address_offset = writer.address_offset; + writer.linkedit_segment_file_offset = writer.file_offset; + } + + const data_size: u32 = @intCast(bytes.len); + @memcpy(writer.items[writer.file_offset..][0..data_size], bytes); + + writer.append(std.mem.asBytes(&LoadCommand.LinkeditData{ + .type = load_command_type, + .data_offset = @intCast(writer.linkedit_segment_file_offset), + .data_size = data_size, + })); + + writer.address_offset += data_size; + writer.file_offset += data_size; + + writer.linkedit_segment_size += data_size; + } +}; + +pub fn interpretFile(allocator: Allocator, descriptor: Compilation.Module.Descriptor, file: []const u8) !void { + _ = allocator; + _ = descriptor; + const header: *const Header = @ptrCast(@alignCast(file.ptr)); + print("Header : {}\n", .{header}); + assert(header.magic == Header.magic); + + var text_segment: LoadCommand.Segment64 = undefined; + const load_command_start: *const LoadCommand = @ptrCast(@alignCast(file[@sizeOf(Header)..].ptr)); + var load_command_ptr = load_command_start; + + for (0..header.load_command_count) |_| { + const load_command = load_command_ptr.*; + switch (load_command.type) { + .segment64 => { + const segment_load_command: *const LoadCommand.Segment64 = @ptrCast(@alignCast(load_command_ptr)); + const text_segment_name = "__TEXT"; + if (equal(u8, segment_load_command.name[0..text_segment_name.len], text_segment_name)) { + text_segment = segment_load_command.*; + } + print("SLC: {}\n", .{segment_load_command}); + print("segment name: {s}\n", .{segment_load_command.name}); + const section_ptr: [*]const LoadCommand.Segment64.Section = @ptrFromInt(@intFromPtr(segment_load_command) + @sizeOf(LoadCommand.Segment64)); + const sections = section_ptr[0..segment_load_command.section_count]; + for (sections) |section| { + print("{}\n", .{section}); + print("Section name: {s}. Segment name: {s}\n", .{ section.name, section.segment_name }); + } + }, + .dyld_chained_fixups => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .dyld_exports_trie => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .symbol_table => { + const command: *const LoadCommand.SymbolTable = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .symbol_table_information => { + const command: *const LoadCommand.SymbolTableInformation = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .load_dylinker => { + const command: *const LoadCommand.Dylinker = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + const name: [*:0]const u8 = @ptrFromInt(@intFromPtr(command) + command.name_offset); + print("Name: {s}\n", .{name}); + }, + .uuid_number => { + const command: *const LoadCommand.Uuid = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .minimum_os_version => { + const command: *const LoadCommand.MinimumVersion = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .source_version => { + const command: *const LoadCommand.SourceVersion = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .dyld_main_entry_point => { + const command: *const LoadCommand.EntryPoint = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .load_dylib => { + const command: *const LoadCommand.Dylib = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + print("Dylib: {s}\n", .{@as([*:0]const u8, @ptrFromInt(@intFromPtr(load_command_ptr) + @sizeOf(LoadCommand.Dylib)))}); + }, + .function_starts => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .data_in_code => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + .code_signature => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}\n", .{command}); + }, + else => |t| @panic(@tagName(t)), + } + + load_command_ptr = @ptrFromInt(@intFromPtr(load_command_ptr) + load_command.size); + } + + // const load_command_end = load_command_ptr; + // const load_command_size = @intFromPtr(load_command_end) - @intFromPtr(load_command_start); + // assert(load_command_size == header.load_command_size); + + const segment_count = 3; + var writer = Writer{ + .items = try mmap(0x100000, .{}), + .load_command_size = segment_count * @sizeOf(LoadCommand.Segment64) + + 2 * @sizeOf(LoadCommand.Segment64.Section) + + @sizeOf(LoadCommand.LinkeditData) + + @sizeOf(LoadCommand.LinkeditData) + + @sizeOf(LoadCommand.SymbolTable) + + @sizeOf(LoadCommand.SymbolTableInformation) + + @sizeOf(LoadCommand.Dylinker) + std.mem.alignForward(u32, "/usr/lib/dyld".len, 8) + + @sizeOf(LoadCommand.Uuid) + + @sizeOf(LoadCommand.MinimumVersion) + + @sizeOf(LoadCommand.EntryPoint) + + @sizeOf(LoadCommand.Dylib) + std.mem.alignForward(u32, "/usr/lib/libSystem.B.dylib".len, 8) + + 3 * @sizeOf(LoadCommand.LinkeditData), + .segment_count = segment_count, + }; + writer.index = @sizeOf(Header); + writer.writeSegment(.{ + .name = "__PAGEZERO", + .sections = &.{}, + .protection = .{ + .read = false, + .write = false, + .execute = false, + }, + }); + writer.writeSegment(.{ + .name = "__TEXT", + .sections = &.{ + .{ + .name = "__text", + .bytes = &.{ + 0x00, 0x00, 0x80, 0x52, + 0xc0, 0x03, 0x5f, 0xd6, + }, + .alignment = 4, + .flags = .{ + .only_machine_instructions = true, + }, + }, + .{ + .name = "__unwind_info", + .bytes = &.{ + 0x01, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, + 0xb0, 0x3f, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0xb9, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x01, 0x00, + 0x10, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, + }, + .alignment = 4, + .flags = .{}, + }, + }, + .protection = .{ + .read = true, + .write = false, + .execute = true, + }, + }); + + // TODO: write this later + + // writer.writeSegment(.{ + // .name = "__LINKEDIT", + // .sections = &.{}, + // .protection = .{ + // .read = true, + // .write = false, + // .execute = false, + // }, + // }); + assert(writer.segment_index == writer.segment_count - 1); + writer.index = writer.segment_offset + @sizeOf(LoadCommand.Segment64); + + for (file[16384 + 56 ..][0..48]) |b| { + print("0x{x}, ", .{b}); + } + + const chained_fixup_bytes = &.{ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + writer.writeLinkeditData(chained_fixup_bytes, .dyld_chained_fixups); + const export_trie_bytes = &.{ 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + writer.writeLinkeditData(export_trie_bytes, .dyld_exports_trie); + unreachable; + // writer.writeSymbolTable( +} + +// .bytes = &.{ +// 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0xf, 0x1, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x16, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0xb0, 0x3f, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x20, 0x0, 0x5f, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfa, 0xde, 0xc, 0xc0, 0x0, 0x0, 0x1, 0x11, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14, 0xfa, 0xde, 0xc, 0x2, 0x0, 0x0, 0x0, 0xfd, 0x0, 0x2, 0x4, 0x0, 0x0, 0x2, 0x0, 0x2, 0x0, 0x0, 0x0, 0x5d, 0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x40, 0xb0, 0x20, 0x2, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0xb2, 0x2a, 0x3, 0x79, 0x1b, 0x82, 0xf4, 0x71, 0xf1, 0xae, 0xfa, 0x44, 0x53, 0xe0, 0xc2, 0x78, 0x1e, 0x56, 0xd1, 0x9b, 0x36, 0x37, 0x7b, 0x7e, 0x61, 0xf5, 0x8a, 0x59, 0xc4, 0xf0, 0x64, 0x56, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0x8, 0xdb, 0xee, 0xf5, 0x95, 0x71, 0x3e, 0xcb, 0x29, 0xff, 0x3f, 0x28, 0x46, 0xf0, 0xdc, 0x97, 0xbf, 0x2d, 0x3, 0xf2, 0xec, 0xc, 0x84, 0xa, 0x44, 0x90, 0xf, 0xe0, 0xf4, 0xea, 0x67, 0x97, 0x6b, 0xb0, 0x22, 0x2, 0x0, 0xa7, 0xed, 0x94, 0xb2, 0x3d, 0x86, 0x4d, 0x13, 0xd6, 0xa4, 0xe, 0x1c, 0x1a, 0x6b, 0x9b, 0x82, 0xa0, 0xeb, 0x28, 0x23, 0xfe, 0x8a, 0x51, 0x2a, 0xe5, 0xf9, 0x39, +// }, diff --git a/src/backend/pe.zig b/src/backend/pe.zig new file mode 100644 index 0000000..aca8210 --- /dev/null +++ b/src/backend/pe.zig @@ -0,0 +1,266 @@ +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; +const Allocator = std.mem.Allocator; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const emit = @import("emit.zig"); +pub const Writer = struct { + in_file: []const u8, + items: []u8, + index: usize = 0, + allocator: Allocator, + pub fn init(allocator: Allocator) !Writer { + const file = try std.fs.cwd().readFileAlloc(allocator, "main.exe", 0xfffffffffffff); + const len = std.mem.alignForward(usize, file.len, 0x1000); + return Writer{ + .in_file = file, + .items = try data_structures.mmap(len, .{}), + .allocator = allocator, + }; + } + + pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void { + print("File len: {}\n", .{writer.in_file.len}); + const dos_header: *const ImageDosHeader = @ptrCast(@alignCast(writer.in_file.ptr)); + print("File address: {}\n", .{dos_header.file_address_of_new_exe_header}); + print("File: {s}\n", .{writer.in_file[0x40..]}); + for (writer.in_file[0x40..], 0..) |byte, index| { + if (byte == 'T') { + print("Index: {}\n", .{index}); + break; + } + } + assert(dos_header.magic_number == ImageDosHeader.magic); + // assert(dos_header.file_address_of_new_exe_header == @sizeOf(ImageDosHeader)); + print("{}\n", .{dos_header}); + const file_header: *const ImageFileHeader = @ptrCast(@alignCast(writer.in_file[dos_header.file_address_of_new_exe_header + 4 ..].ptr)); + print("File header: {}\n", .{file_header}); + + writer.append(std.mem.asBytes(&ImageDosHeader{ + .file_address_of_new_exe_header = 208, + })); + while (writer.index < 208) : (writer.index += 1) { + writer.append(&.{0}); + } + writer.append(std.mem.asBytes(&image_NT_signature)); + writer.append(std.mem.asBytes(&ImageFileHeader{ + .machine = switch (image.target.cpu.arch) { + .x86_64 => .amd64, + .aarch64 => .arm64, + else => @panic("Architecture"), + }, + .section_count = 3, + .time_date_stamp = @intCast(std.time.timestamp()), + })); + + const kernel32 = blk: { + var library = Library{ + .name = "KERNEL32.DLL", + }; + try library.symbols.append(writer.allocator, Symbol{ + .name = "ExitProcess", + }); + + break :blk library; + }; + + const libraries = &[_]Library{kernel32}; + _ = libraries; + + const code = &.{ + 0x48, 0x83, 0xec, 0x28, //subq $40, %rsp + 0xb9, 0x2a, 0x00, 0x00, 0x00, //movl $42, %ecx + 0xff, 0x15, 0xf1, 0x0f, 0x00, 0x00, //callq *4081(%rip) # 0x140002000 + 0xcc, + }; + _ = code; + + const pdata = &.{ + 0x00, 0x10, + 0x00, 0x00, + 0x10, 0x10, + 0x00, 0x00, + 0x28, 0x21, + 0x00, 0x00, + }; + _ = pdata; + + // TODO + // writer.append(std.mem.asBytes(ImageOptionalHeader{ + // .magic = ImageOptionalHeader.magic, + // .size_of_code = code.len, + // })); + + unreachable; + } + + fn append(writer: *Writer, bytes: []const u8) void { + const destination = writer.items[writer.index..][0..bytes.len]; + const source = bytes; + @memcpy(destination, source); + writer.index += bytes.len; + } + + pub fn writeToFile(writer: *Writer, executable_relative_path: []const u8) !void { + _ = writer; + _ = executable_relative_path; + unreachable; + } +}; + +const ImageDosHeader = extern struct { + magic_number: u16 = magic, + bytes_last_page_of_file: u16 = 0, + pages_in_file: u16 = 0, + relocations: u16 = 0, + size_of_header_in_paragraphs: u16 = 0, + minimum_extra_paragraphs: u16 = 0, + maximum_extra_paragraphs: u16 = 0, + initial_ss_value: u16 = 0, + initial_sp_value: u16 = 0, + cheksum: u16 = 0, + initial_ip_value: u16 = 0, + initial_cs_value: u16 = 0, + file_address_of_relocation_table: u16 = 0, + overlay_number: u16 = 0, + reserved_words: [4]u16 = .{0} ** 4, + oem_id: u16 = 0, + oem_info: u16 = 0, + reserved_words2: [10]u16 = .{0} ** 10, + file_address_of_new_exe_header: u32 = @sizeOf(ImageDosHeader), + + const magic = 0x5a4d; + + comptime { + assert(@sizeOf(ImageDosHeader) == 64); + } +}; +const image_NT_signature: u32 = 0x00004550; + +/// COFF header format +const ImageFileHeader = extern struct { + machine: ImageFileMachine, + section_count: u16, + time_date_stamp: u32, + symbol_table_offset: u32 = 0, + symbol_count: u32 = 0, + size_of_optional_header: u16 = @sizeOf(ImageOptionalHeader), + characteristics: Characteristics = .{}, + + const Characteristics = packed struct(u16) { + relocations_stripped: bool = false, + executable_image: bool = true, + stripped_line_count: bool = false, + stripped_local_symbols: bool = false, + aggressive_ws_trim: bool = false, + large_address_aware: bool = true, + reserved: u1 = 0, + bytes_reversed_lo: bool = false, + machine_32bit: bool = false, + stripped_debug: bool = false, + removable_run_from_swap: bool = false, + net_run_from_swap: bool = false, + system: bool = false, + dll: bool = false, + up_systems_only: bool = false, + bytes_reversed_hi: bool = false, + }; +}; + +const ImageFileMachine = enum(u16) { + unknown = 0, + target_host = 0x0001, // Useful for indicating we want to interact with the host and not a WoW guest. + i386 = 0x014c, // Intel 386. + r3000 = 0x0162, // MIPS little-endian, 0x160 big-endian + r4000 = 0x0166, // MIPS little-endian + r10000 = 0x0168, // MIPS little-endian + wcemipsv2 = 0x0169, // MIPS little-endian WCE v2 + alpha = 0x0184, // Alpha_AXP + sh3 = 0x01a2, // SH3 little-endian + sh3dsp = 0x01a3, + sh3e = 0x01a4, // SH3E little-endian + sh4 = 0x01a6, // SH4 little-endian + sh5 = 0x01a8, // SH5 + arm = 0x01c0, // ARM Little-Endian + thumb = 0x01c2, // ARM Thumb/Thumb-2 Little-Endian + armnt = 0x01c4, // ARM Thumb-2 Little-Endian + am33 = 0x01d3, + powerpc = 0x01F0, // IBM PowerPC Little-Endian + powerpcfp = 0x01f1, + ia64 = 0x0200, // Intel 64 + mips16 = 0x0266, // MIPS + alpha64 = 0x0284, // ALPHA64 + mipsfpu = 0x0366, // MIPS + mipsfpu16 = 0x0466, // MIPS + tricore = 0x0520, // Infineon + cef = 0x0CEF, + ebc = 0x0EBC, // EFI Byte Code + amd64 = 0x8664, // AMD64 (K8) + m32r = 0x9041, // M32R little-endian + arm64 = 0xAA64, // ARM64 Little-Endian + cee = 0xC0EE, + + const axp64 = ImageFileMachine.alpha64; +}; + +const ImageOptionalHeader = extern struct { + magic: u16 = magic, + major_linker_version: u8 = 0, + minor_linker_version: u8 = 0, + size_of_code: u32, + size_of_initialized_data: u32, + size_of_uninitialized_data: u32, + address_of_entry_point: u32, + base_of_code: u32, + image_base: u64, + section_alignment: u32, + file_alignment: u32, + major_os_version: u16, + minor_os_version: u16, + major_image_version: u16, + minor_image_version: u16, + major_subsystem_version: u16, + minor_subsystem_version: u16, + win32_version_value: u32, + size_of_image: u32, + size_of_headers: u32, + checksum: u32, + subsystem: u16, + dll_characteristics: u16, + size_of_stack_reserve: u64, + size_of_stack_commit: u64, + size_of_heap_reserve: u64, + size_of_heap_commit: u64, + loader_flags: u32, + number_of_RVA_and_sizes: u32, + data_directory: [image_number_of_directory_entries]ImageDataDirectory, + + const magic = 0x20b; + + comptime { + assert(@sizeOf(ImageOptionalHeader) == 0xf0); + } +}; + +const ImageDataDirectory = extern struct { + virtual_address: u32, + size: u32, +}; + +const image_number_of_directory_entries = 0x10; + +const Library = struct { + symbols: ArrayList(Symbol) = .{}, + name: []const u8, + name_virtual_address: u32 = 0, + virtual_address: u32 = 0, + image_thunk_virtual_address: u32 = 0, +}; + +const Symbol = struct { + name: []const u8, + name_virtual_address: u32 = 0, + offset_in_data: u32 = 0, +}; diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index f56900e..d15d8e4 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -10,9 +10,1804 @@ const Compilation = @import("../Compilation.zig"); const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; +const BlockList = data_structures.BlockList; const x86_64 = @This(); +const Register = struct { + list: List = .{}, + index: Index, + + const Index = union(enum) { + physical: Register.Physical, + virtual: Register.Virtual.Index, + }; + + const State = union(enum) { + virtual: Virtual.Index, + free, + preassigned, + livein, + }; + const Class = enum { + not_a_register, + any, + // gp8, + // gp16, + gp32, + gp64, + gp64_nosp, + + pub const Descriptor = struct { + size: u16, + spill_size: u16, + spill_alignment: u16, + }; + }; + + const Physical = enum(u9) { + no_register = 0, + ah = 1, + al = 2, + ax = 3, + bh = 4, + bl = 5, + bp = 6, + bph = 7, + bpl = 8, + bx = 9, + ch = 10, + cl = 11, + cs = 12, + cx = 13, + df = 14, + dh = 15, + di = 16, + dih = 17, + dil = 18, + dl = 19, + ds = 20, + dx = 21, + eax = 22, + ebp = 23, + ebx = 24, + ecx = 25, + edi = 26, + edx = 27, + eflags = 28, + eip = 29, + eiz = 30, + es = 31, + esi = 32, + esp = 33, + fpcw = 34, + fpsw = 35, + fs = 36, + fs_base = 37, + gs = 38, + gs_base = 39, + hax = 40, + hbp = 41, + hbx = 42, + hcx = 43, + hdi = 44, + hdx = 45, + hip = 46, + hsi = 47, + hsp = 48, + ip = 49, + mxcsr = 50, + rax = 51, + rbp = 52, + rbx = 53, + rcx = 54, + rdi = 55, + rdx = 56, + rflags = 57, + rip = 58, + riz = 59, + rsi = 60, + rsp = 61, + si = 62, + sih = 63, + sil = 64, + sp = 65, + sph = 66, + spl = 67, + ss = 68, + ssp = 69, + tmmcfg = 70, + _eflags = 71, + cr0 = 72, + cr1 = 73, + cr2 = 74, + cr3 = 75, + cr4 = 76, + cr5 = 77, + cr6 = 78, + cr7 = 79, + cr8 = 80, + cr9 = 81, + cr10 = 82, + cr11 = 83, + cr12 = 84, + cr13 = 85, + cr14 = 86, + cr15 = 87, + dr0 = 88, + dr1 = 89, + dr2 = 90, + dr3 = 91, + dr4 = 92, + dr5 = 93, + dr6 = 94, + dr7 = 95, + dr8 = 96, + dr9 = 97, + dr10 = 98, + dr11 = 99, + dr12 = 100, + dr13 = 101, + dr14 = 102, + dr15 = 103, + fp0 = 104, + fp1 = 105, + fp2 = 106, + fp3 = 107, + fp4 = 108, + fp5 = 109, + fp6 = 110, + fp7 = 111, + k0 = 112, + k1 = 113, + k2 = 114, + k3 = 115, + k4 = 116, + k5 = 117, + k6 = 118, + k7 = 119, + mm0 = 120, + mm1 = 121, + mm2 = 122, + mm3 = 123, + mm4 = 124, + mm5 = 125, + mm6 = 126, + mm7 = 127, + r8 = 128, + r9 = 129, + r10 = 130, + r11 = 131, + r12 = 132, + r13 = 133, + r14 = 134, + r15 = 135, + st0 = 136, + st1 = 137, + st2 = 138, + st3 = 139, + st4 = 140, + st5 = 141, + st6 = 142, + st7 = 143, + tmm0 = 144, + tmm1 = 145, + tmm2 = 146, + tmm3 = 147, + tmm4 = 148, + tmm5 = 149, + tmm6 = 150, + tmm7 = 151, + xmm0 = 152, + xmm1 = 153, + xmm2 = 154, + xmm3 = 155, + xmm4 = 156, + xmm5 = 157, + xmm6 = 158, + xmm7 = 159, + xmm8 = 160, + xmm9 = 161, + xmm10 = 162, + xmm11 = 163, + xmm12 = 164, + xmm13 = 165, + xmm14 = 166, + xmm15 = 167, + xmm16 = 168, + xmm17 = 169, + xmm18 = 170, + xmm19 = 171, + xmm20 = 172, + xmm21 = 173, + xmm22 = 174, + xmm23 = 175, + xmm24 = 176, + xmm25 = 177, + xmm26 = 178, + xmm27 = 179, + xmm28 = 180, + xmm29 = 181, + xmm30 = 182, + xmm31 = 183, + ymm0 = 184, + ymm1 = 185, + ymm2 = 186, + ymm3 = 187, + ymm4 = 188, + ymm5 = 189, + ymm6 = 190, + ymm7 = 191, + ymm8 = 192, + ymm9 = 193, + ymm10 = 194, + ymm11 = 195, + ymm12 = 196, + ymm13 = 197, + ymm14 = 198, + ymm15 = 199, + ymm16 = 200, + ymm17 = 201, + ymm18 = 202, + ymm19 = 203, + ymm20 = 204, + ymm21 = 205, + ymm22 = 206, + ymm23 = 207, + ymm24 = 208, + ymm25 = 209, + ymm26 = 210, + ymm27 = 211, + ymm28 = 212, + ymm29 = 213, + ymm30 = 214, + ymm31 = 215, + zmm0 = 216, + zmm1 = 217, + zmm2 = 218, + zmm3 = 219, + zmm4 = 220, + zmm5 = 221, + zmm6 = 222, + zmm7 = 223, + zmm8 = 224, + zmm9 = 225, + zmm10 = 226, + zmm11 = 227, + zmm12 = 228, + zmm13 = 229, + zmm14 = 230, + zmm15 = 231, + zmm16 = 232, + zmm17 = 233, + zmm18 = 234, + zmm19 = 235, + zmm20 = 236, + zmm21 = 237, + zmm22 = 238, + zmm23 = 239, + zmm24 = 240, + zmm25 = 241, + zmm26 = 242, + zmm27 = 243, + zmm28 = 244, + zmm29 = 245, + zmm30 = 246, + zmm31 = 247, + r8b = 248, + r9b = 249, + r10b = 250, + r11b = 251, + r12b = 252, + r13b = 253, + r14b = 254, + r15b = 255, + r8bh = 256, + r9bh = 257, + r10bh = 258, + r11bh = 259, + r12bh = 260, + r13bh = 261, + r14bh = 262, + r15bh = 263, + r8d = 264, + r9d = 265, + r10d = 266, + r11d = 267, + r12d = 268, + r13d = 269, + r14d = 270, + r15d = 271, + r8w = 272, + r9w = 273, + r10w = 274, + r11w = 275, + r12w = 276, + r13w = 277, + r14w = 278, + r15w = 279, + r8wh = 280, + r9wh = 281, + r10wh = 282, + r11wh = 283, + r12wh = 284, + r13wh = 285, + r14wh = 286, + r15wh = 287, + k0_k1 = 288, + k2_k3 = 289, + k4_k5 = 290, + k6_k7 = 291, + + const Descriptor = struct { + subregisters: []const Register.Physical = &.{}, + }; + }; + + const Virtual = struct { + register_class: Register.Class, + use_def_list_head: Operand.Index = Operand.Index.invalid, + + pub const List = BlockList(@This()); + pub const Index = Virtual.List.Index; + pub const Allocation = Virtual.List.Allocation; + }; + + const List = struct { + previous: Operand.Index = Operand.Index.invalid, + next: Operand.Index = Operand.Index.invalid, + }; +}; + +const register_descriptors = std.EnumArray(Register.Physical, Register.Physical.Descriptor).init(.{ + .no_register = .{}, + .ah = .{}, + .al = .{}, + .ax = .{}, + .bh = .{}, + .bl = .{}, + .bp = .{}, + .bph = .{}, + .bpl = .{}, + .bx = .{}, + .ch = .{}, + .cl = .{}, + .cs = .{}, + .cx = .{}, + .df = .{}, + .dh = .{}, + .di = .{}, + .dih = .{}, + .dil = .{}, + .dl = .{}, + .ds = .{}, + .dx = .{}, + .eax = .{}, + .ebp = .{}, + .ebx = .{}, + .ecx = .{}, + .edi = .{}, + .edx = .{}, + .eflags = .{}, + .eip = .{ + .subregisters = &.{ .ip, .hip }, + }, + .eiz = .{}, + .es = .{}, + .esi = .{}, + .esp = .{}, + .fpcw = .{}, + .fpsw = .{}, + .fs = .{}, + .fs_base = .{}, + .gs = .{}, + .gs_base = .{}, + .hax = .{}, + .hbp = .{}, + .hbx = .{}, + .hcx = .{}, + .hdi = .{}, + .hdx = .{}, + .hip = .{}, + .hsi = .{}, + .hsp = .{}, + .ip = .{}, + .mxcsr = .{}, + .rax = .{}, + .rbp = .{}, + .rbx = .{}, + .rcx = .{}, + .rdi = .{}, + .rdx = .{}, + .rflags = .{}, + .rip = .{ + .subregisters = &.{.eip}, + }, + .riz = .{}, + .rsi = .{}, + .rsp = .{}, + .si = .{}, + .sih = .{}, + .sil = .{}, + .sp = .{}, + .sph = .{}, + .spl = .{}, + .ss = .{}, + .ssp = .{}, + .tmmcfg = .{}, + ._eflags = .{}, + .cr0 = .{}, + .cr1 = .{}, + .cr2 = .{}, + .cr3 = .{}, + .cr4 = .{}, + .cr5 = .{}, + .cr6 = .{}, + .cr7 = .{}, + .cr8 = .{}, + .cr9 = .{}, + .cr10 = .{}, + .cr11 = .{}, + .cr12 = .{}, + .cr13 = .{}, + .cr14 = .{}, + .cr15 = .{}, + .dr0 = .{}, + .dr1 = .{}, + .dr2 = .{}, + .dr3 = .{}, + .dr4 = .{}, + .dr5 = .{}, + .dr6 = .{}, + .dr7 = .{}, + .dr8 = .{}, + .dr9 = .{}, + .dr10 = .{}, + .dr11 = .{}, + .dr12 = .{}, + .dr13 = .{}, + .dr14 = .{}, + .dr15 = .{}, + .fp0 = .{}, + .fp1 = .{}, + .fp2 = .{}, + .fp3 = .{}, + .fp4 = .{}, + .fp5 = .{}, + .fp6 = .{}, + .fp7 = .{}, + .k0 = .{}, + .k1 = .{}, + .k2 = .{}, + .k3 = .{}, + .k4 = .{}, + .k5 = .{}, + .k6 = .{}, + .k7 = .{}, + .mm0 = .{}, + .mm1 = .{}, + .mm2 = .{}, + .mm3 = .{}, + .mm4 = .{}, + .mm5 = .{}, + .mm6 = .{}, + .mm7 = .{}, + .r8 = .{}, + .r9 = .{}, + .r10 = .{}, + .r11 = .{}, + .r12 = .{}, + .r13 = .{}, + .r14 = .{}, + .r15 = .{}, + .st0 = .{}, + .st1 = .{}, + .st2 = .{}, + .st3 = .{}, + .st4 = .{}, + .st5 = .{}, + .st6 = .{}, + .st7 = .{}, + .tmm0 = .{}, + .tmm1 = .{}, + .tmm2 = .{}, + .tmm3 = .{}, + .tmm4 = .{}, + .tmm5 = .{}, + .tmm6 = .{}, + .tmm7 = .{}, + .xmm0 = .{}, + .xmm1 = .{}, + .xmm2 = .{}, + .xmm3 = .{}, + .xmm4 = .{}, + .xmm5 = .{}, + .xmm6 = .{}, + .xmm7 = .{}, + .xmm8 = .{}, + .xmm9 = .{}, + .xmm10 = .{}, + .xmm11 = .{}, + .xmm12 = .{}, + .xmm13 = .{}, + .xmm14 = .{}, + .xmm15 = .{}, + .xmm16 = .{}, + .xmm17 = .{}, + .xmm18 = .{}, + .xmm19 = .{}, + .xmm20 = .{}, + .xmm21 = .{}, + .xmm22 = .{}, + .xmm23 = .{}, + .xmm24 = .{}, + .xmm25 = .{}, + .xmm26 = .{}, + .xmm27 = .{}, + .xmm28 = .{}, + .xmm29 = .{}, + .xmm30 = .{}, + .xmm31 = .{}, + .ymm0 = .{}, + .ymm1 = .{}, + .ymm2 = .{}, + .ymm3 = .{}, + .ymm4 = .{}, + .ymm5 = .{}, + .ymm6 = .{}, + .ymm7 = .{}, + .ymm8 = .{}, + .ymm9 = .{}, + .ymm10 = .{}, + .ymm11 = .{}, + .ymm12 = .{}, + .ymm13 = .{}, + .ymm14 = .{}, + .ymm15 = .{}, + .ymm16 = .{}, + .ymm17 = .{}, + .ymm18 = .{}, + .ymm19 = .{}, + .ymm20 = .{}, + .ymm21 = .{}, + .ymm22 = .{}, + .ymm23 = .{}, + .ymm24 = .{}, + .ymm25 = .{}, + .ymm26 = .{}, + .ymm27 = .{}, + .ymm28 = .{}, + .ymm29 = .{}, + .ymm30 = .{}, + .ymm31 = .{}, + .zmm0 = .{}, + .zmm1 = .{}, + .zmm2 = .{}, + .zmm3 = .{}, + .zmm4 = .{}, + .zmm5 = .{}, + .zmm6 = .{}, + .zmm7 = .{}, + .zmm8 = .{}, + .zmm9 = .{}, + .zmm10 = .{}, + .zmm11 = .{}, + .zmm12 = .{}, + .zmm13 = .{}, + .zmm14 = .{}, + .zmm15 = .{}, + .zmm16 = .{}, + .zmm17 = .{}, + .zmm18 = .{}, + .zmm19 = .{}, + .zmm20 = .{}, + .zmm21 = .{}, + .zmm22 = .{}, + .zmm23 = .{}, + .zmm24 = .{}, + .zmm25 = .{}, + .zmm26 = .{}, + .zmm27 = .{}, + .zmm28 = .{}, + .zmm29 = .{}, + .zmm30 = .{}, + .zmm31 = .{}, + .r8b = .{}, + .r9b = .{}, + .r10b = .{}, + .r11b = .{}, + .r12b = .{}, + .r13b = .{}, + .r14b = .{}, + .r15b = .{}, + .r8bh = .{}, + .r9bh = .{}, + .r10bh = .{}, + .r11bh = .{}, + .r12bh = .{}, + .r13bh = .{}, + .r14bh = .{}, + .r15bh = .{}, + .r8d = .{}, + .r9d = .{}, + .r10d = .{}, + .r11d = .{}, + .r12d = .{}, + .r13d = .{}, + .r14d = .{}, + .r15d = .{}, + .r8w = .{}, + .r9w = .{}, + .r10w = .{}, + .r11w = .{}, + .r12w = .{}, + .r13w = .{}, + .r14w = .{}, + .r15w = .{}, + .r8wh = .{}, + .r9wh = .{}, + .r10wh = .{}, + .r11wh = .{}, + .r12wh = .{}, + .r13wh = .{}, + .r14wh = .{}, + .r15wh = .{}, + .k0_k1 = .{}, + .k2_k3 = .{}, + .k4_k5 = .{}, + .k6_k7 = .{}, +}); + +// const SubregisterIndex = struct { +// size: u16, +// offset: u16 = 0, +// }; +// +// const SubRegisterIndexType = enum { +// sub_8bit, +// sub_8bit_hi, +// sub_16bit_, +// sub_16bit_hi, +// sub_32bit, +// }; + +// const subregister_indices = std.EnumArray(SubRegisterIndexType, []const SubregisterIndex).init(.{ +// }); + +// const Sub8Bit = enum{ +// ax = 0, +// cx = 1, +// dx = 2, +// bx = 3, +// }; + +const GP32 = enum(u3) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, +}; + +const GP64 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +const GP64NOSP = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +const XMMRegister = u4; + +const CallingConvention = struct { + argument_registers: RegisterGroupMap, + syscall_registers: []const Register.Physical, + + const Id = Compilation.CallingConvention; +}; + +const RegisterGroupMap = std.EnumArray(Register.Class, []const Register.Physical); + +const zero_register_class_descriptor = Register.Class.Descriptor{ + .size = 0, + .spill_size = 0, + .spill_alignment = 0, +}; +const register_class_descriptors = std.EnumArray(Register.Class, Register.Class.Descriptor).init(.{ + .not_a_register = zero_register_class_descriptor, + .any = zero_register_class_descriptor, + .gp32 = .{ + .size = 32, + .spill_size = 32, + .spill_alignment = 32, + }, + .gp64 = .{ + .size = 64, + .spill_size = 64, + .spill_alignment = 64, + }, + .gp64_nosp = .{ + .size = 64, + .spill_size = 64, + .spill_alignment = 64, + }, +}); + +const registers_by_class = RegisterGroupMap.init(.{ + .not_a_register = &.{}, + .any = &.{}, + .gp32 = &.{ + .eax, + .ecx, + .edx, + .esi, + .edi, + .ebx, + .ebp, + .esp, + .r8d, + .r9d, + .r10d, + .r11d, + .r14d, + .r15d, + .r12d, + .r13d, + }, + .gp64 = &.{ + .rax, + .rcx, + .rdx, + .rsi, + .rdi, + .r8, + .r9, + .r10, + .r11, + .rbx, + .r14, + .r15, + .r12, + .r13, + .rbp, + .rsp, + }, + .gp64_nosp = &.{}, +}); + +// TODO: fix this +const system_v_gp32_argument_registers = [4]Register.Physical{ .edi, .esi, .edx, .ecx }; +const system_v_gp64_argument_registers = [6]Register.Physical{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; +const system_v_xmm_argument_registers = [8]Register.Physical{ .xmm0, .xmm1, .xmm2, .xmm3, .xmm4, .xmm5, .xmm6, .xmm7 }; +const system_v_syscall_registers = [7]Register.Physical{ .rax, .rdi, .rsi, .rdx, .r10, .r8, .r9 }; + +const system_v = CallingConvention{ + .argument_registers = RegisterGroupMap.init(.{ + .not_a_register = &.{}, + .any = &.{}, + .gp32 = &system_v_gp32_argument_registers, + .gp64 = &system_v_gp64_argument_registers, + .gp64_nosp = &.{}, + }), + .syscall_registers = &system_v_syscall_registers, +}; + +const calling_conventions = std.EnumArray(CallingConvention.Id, CallingConvention).init(.{ + .system_v = system_v, +}); + +const ValueType = struct { + size: u16, + element_count: u16, + element_type: u32, + data_type: DataType, + scalarness: Scalarness, + + const DataType = enum(u1) { + integer = 0, + float = 1, + }; + const Scalarness = enum(u1) { + scalar = 0, + vector = 1, + }; + + const Id = enum(u32) { + any = 0, + // other = 1, + // i1 = 2, + // i8 = 3, + // i16 = 4, + i32 = 5, + i64 = 6, + // i128 = 7, + }; +}; + +const value_types = std.EnumArray(ValueType.Id, ValueType).init(.{ + .any = .{ + .size = 0, + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.any), + .data_type = .integer, + .scalarness = .scalar, + }, + .i32 = .{ + .size = @sizeOf(u32), + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.i32), + .data_type = .integer, + .scalarness = .scalar, + }, + .i64 = .{ + .size = @sizeOf(u64), + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.i64), + .data_type = .integer, + .scalarness = .scalar, + }, +}); + +const register_classes = std.EnumArray(ValueType.Id, Register.Class).init(.{ + .any = .any, + .i32 = .gp32, + .i64 = .gp64, +}); + +const Memory = struct { + alignment: u64, + // low_level_type: LowLevelType, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +const LowLevelType = packed struct(u64) { + u: packed union { + vector: Vector, + scalar: Scalar, + }, + scalar: bool, + pointer: bool, + + const Vector = packed struct(u62) { + foo: u62 = 0, + }; + + const Scalar = packed struct {}; +}; + +const AddressingMode = struct { + base: AddressingMode.Base, + scale: u32 = 1, + displacement: i32 = 0, + index_register: u32 = 0, + const Base = union(enum) { + register_base: u32, + frame_index: u32, + }; +}; + +const StackObject = struct { + size: u64, + alignment: u32, + spill_slot: bool, + ir: ir.Instruction.Index, +}; + +const InstructionSelection = struct { + local_value_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Register) = .{}, + block_map: data_structures.AutoHashMap(ir.BasicBlock.Index, BasicBlock.Index) = .{}, + liveins: data_structures.AutoArrayHashMap(Register.Physical, Register.Virtual.Index) = .{}, + memory_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Memory.Index) = .{}, + stack_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, u32) = .{}, + physical_register_use_or_definition_list: std.EnumArray(Register.Physical, Operand.Index) = std.EnumArray(Register.Physical, Operand.Index).initFill(Operand.Index.invalid), + current_block: BasicBlock.Index = BasicBlock.Index.invalid, + stack_objects: ArrayList(StackObject) = .{}, + function: *MIR.Function, + instruction_cache: ArrayList(Instruction.Index) = .{}, + + fn storeRegisterToStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, source_register: Register.Physical, kill: bool, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { + _ = virtual_register; + const stack_object = instruction_selection.stack_objects.items[frame_index]; + switch (@divExact(stack_object.size, 8)) { + @sizeOf(u64) => { + switch (register_class) { + .gp64 => { + const instruction_id = Instruction.Id.mov64mr; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + + const destination_operand_id = instruction_descriptor.operands[0].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .register = .{ + .index = .{ + .physical = source_register, + }, + }, + }, + .flags = .{ + .dead_or_kill = kill, + }, + }; + + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => std.debug.panic("Stack object size: {}\n", .{stack_object.size}), + } + } + + fn loadRegisterFromStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, destination_register: Register.Physical, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { + _ = virtual_register; + const stack_object = instruction_selection.stack_objects.items[frame_index]; + switch (@divExact(stack_object.size, 8)) { + @sizeOf(u64) => { + switch (register_class) { + .gp64 => { + const instruction_id = Instruction.Id.mov64rm; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp64, + .u = .{ + .register = .{ + .index = .{ + .physical = destination_register, + }, + }, + }, + .flags = .{ .type = .def }, + }; + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + print("Inserting instruction at index {}", .{insert_before_instruction_index}); + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => std.debug.panic("Stack object size: {}\n", .{stack_object.size}), + } + } + + // TODO: add value map on top of local value map? + fn lookupRegisterForValue(instruction_selection: *InstructionSelection, ir_instruction_index: ir.Instruction.Index) ?Register { + if (instruction_selection.local_value_map.get(ir_instruction_index)) |register| { + return register; + } + + return null; + } + + fn getRegisterForValue(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !Register { + if (instruction_selection.lookupRegisterForValue(ir_instruction_index)) |register| { + return register; + } + + const ir_type = getIrType(mir.ir, ir_instruction_index); + const value_type = resolveType(ir_type); + + const register_class = register_classes.get(value_type); + const virtual_register = try mir.createVirtualRegister(register_class); + try instruction_selection.local_value_map.putNoClobber(mir.allocator, ir_instruction_index, virtual_register); + return virtual_register; + } + + // Moving an immediate to a register + fn materializeInteger(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !void { + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const integer = mir.ir.instructions.get(ir_instruction_index).load_integer; + const value_type = resolveType(integer.type); + // const destination_register_class = register_classes.get(value_type); + // const instruction_id: Instruction.Id = + switch (integer.value.unsigned == 0) { + true => { + const instruction_id: Instruction.Id = switch (value_type) { + // .i8 => unreachable, + // .i16 => unreachable, + .i32 => .mov32r0, + // .i64 => b: { + // if (std.math.cast(u32, integer.value.unsigned)) |_| { + // break :b .mov32ri64; + // } else if (std.math.cast(i32, integer.value.signed)) |_| { + // unreachable; + // } else { + // unreachable; + // } + // }, + else => |t| @panic(@tagName(t)), + }; + const instruction_descriptor = instruction_descriptors.get(instruction_id); + const operand_id = instruction_descriptor.operands[0].id; + // const register_class = register_classes.get(operand_id); + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const xor = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, xor); + }, + false => { + const instruction_id: Instruction.Id = switch (value_type) { + .i32 => .mov32ri, + .i64 => b: { + if (std.math.cast(u32, integer.value.unsigned)) |_| { + break :b .mov32ri64; + } else if (std.math.cast(i32, integer.value.signed)) |_| { + unreachable; + } else { + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + }; + + const instruction_descriptor = instruction_descriptors.get(instruction_id); + const operand_id = instruction_descriptor.operands[0].id; + + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const source_operand = Operand{ + .id = .immediate, + .u = .{ + .immediate = integer.value.unsigned, + }, + .flags = .{}, + }; + + const instr = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, instr); + }, + } + // const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); + // + // switch (integer.value.unsigned == 0) { + // true => switch (value_type) { + // .i32 => .mov32r0, + // else => |t| @panic(@tagName(t)), + // }, + // false => blk: { + // + // const destination_register = try mir.createVirtualRegister(destination_register_class); + // const destination_operand = mir.constrainOperandRegisterClass(instruction_descriptor, destination_register, 0, .{ .type = .def }); + // + // const instr = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + // destination_operand, + // Operand{ + // .id = .immediate, + // .u = .{ + // .immediate = integer.value.unsigned, + // }, + // .flags = .{}, + // }, + // }); + // try instruction_selection.instruction_cache.append(mir.allocator, instr); + // + // break :blk destination_register; + // }, + // } + } + + fn getAddressingModeFromIr(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) AddressingMode { + const instruction = mir.ir.instructions.get(ir_instruction_index); + switch (instruction.*) { + .stack => { + const frame_index: u32 = @intCast(instruction_selection.stack_map.getIndex(ir_instruction_index).?); + return AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + }, + else => |t| @panic(@tagName(t)), + } + } + + fn updateValueMap(instruction_selection: *InstructionSelection, allocator: Allocator, ir_instruction_index: ir.Instruction.Index, register: Register) !void { + const gop = try instruction_selection.local_value_map.getOrPut(allocator, ir_instruction_index); + if (gop.found_existing) { + const stored_register = gop.value_ptr.*; + if (std.meta.eql(stored_register, register)) { + unreachable; + } else { + std.debug.panic("Register mismatch: Stored: {} Got: {}\n", .{ stored_register, register }); + } + } else { + gop.value_ptr.* = register; + } + } + + fn lowerArguments(instruction_selection: *InstructionSelection, mir: *MIR, ir_function: *ir.Function) !void { + const ir_function_declaration = mir.ir.function_declarations.get(ir_function.declaration); + const ir_arguments = ir_function_declaration.arguments.values(); + const calling_convention = calling_conventions.get(ir_function_declaration.calling_convention); + + try instruction_selection.local_value_map.ensureUnusedCapacity(mir.allocator, ir_arguments.len); + + var gp_count: u8 = 0; + + for (ir_arguments) |ir_argument_instruction_index| { + const ir_argument_instruction = mir.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = mir.ir.arguments.get(ir_argument_instruction.argument); + switch (ir_argument.type) { + .i8, .i16, .i32, .i64 => gp_count += 1, + .void, + .noreturn, + => unreachable, + } + } + + if (gp_count >= 8) { + @panic("Cannot lower arguments"); + } + + var gp_i: u8 = 0; + var fp_i: u8 = 0; + _ = fp_i; + + for (ir_arguments) |ir_argument_instruction_index| { + const ir_argument_instruction = mir.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = mir.ir.arguments.get(ir_argument_instruction.argument); + const value_type = resolveType(ir_argument.type); + const register_class = register_classes.get(value_type); + const argument_registers = calling_convention.argument_registers.get(register_class); + const physical_register = argument_registers[gp_i]; + const operand_id: Operand.Id = switch (register_class) { + inline .gp32, + .gp64, + => |gp| blk: { + gp_i += 1; + break :blk switch (gp) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + }, + else => unreachable, + }; + + // const operand_register_class = register_class_operand_matcher.get(operand_reference.id); + + const virtual_register_index = try instruction_selection.createLiveIn(mir, physical_register, register_class); + try mir.append(instruction_selection, .copy, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = .{ + .index = .{ + .virtual = virtual_register_index, + }, + }, + }, + .flags = .{ + .dead_or_kill = true, + }, + }, + Operand{ + .id = operand_id, + .u = .{ + .register = .{ + .index = .{ + .physical = physical_register, + }, + }, + }, + .flags = .{ + .type = .def, + }, + }, + }); + + mir.blocks.get(instruction_selection.current_block).current_stack_index += 1; + + try instruction_selection.local_value_map.putNoClobber(mir.allocator, ir_argument_instruction_index, Register{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + } + } + + fn addLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, register: Register, register_class: Register.Class.Id) !void { + _ = mir; + _ = register_class; + _ = register; + _ = instruction_selection; + unreachable; + } + + fn addExistingLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, physical_register: Register.Physical.Index, virtual_register: Register) !void { + _ = mir; + _ = virtual_register; + _ = physical_register; + _ = instruction_selection; + unreachable; + } + + fn createLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, physical_register: Register.Physical, register_class: Register.Class) !Register.Virtual.Index { + const virtual_register_index = try mir.createVirtualRegisterIndexed(register_class); + try instruction_selection.liveins.putNoClobber(mir.allocator, physical_register, virtual_register_index); + + return virtual_register_index; + } +}; + +const Instruction = struct { + id: Id, + operands: ArrayList(Operand.Index), + parent: BasicBlock.Index, + + const Id = enum { + call64pcrel32, + copy, + lea64r, + mov32r0, + mov32rm, + mov64rm, + mov32mr, + mov64mr, + mov32ri, + mov32ri64, + movsx64rr32, + ret, + syscall, + ud2, + }; + + pub const Descriptor = struct { + operands: []const Operand.Reference = &.{}, + opcode: u16 = 0, + format: Format = .pseudo, + flags: Flags, + const Flags = packed struct { + implicit_def: bool, + }; + + const Format = enum { + pseudo, + no_operands, + add_reg, + mrm_dest_mem, + mrm_source_mem, + mrm_source_reg, + }; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; + + pub const Iterator = struct { + pub const Arguments = packed struct { + use: bool, + def: bool, + element: Iterator.Element, + }; + pub const Element = enum(u1) { + operand = 0, + instruction = 1, + }; + + fn Get(comptime arguments: Arguments) type { + return struct { + index: Operand.Index, + mir: *MIR, + + const I = @This(); + + fn new(mir: *MIR, index: Operand.Index) I { + var it = I{ + .index = index, + .mir = mir, + }; + + if (index.valid) { + const operand = mir.operands.get(index); + if ((!arguments.use and operand.flags.type == .use) or (!arguments.def and operand.flags.type == .def)) { + it.advance(); + } + } + + return it; + } + + fn next(it: *I) switch (arguments.element) { + .instruction => ?*Instruction, + .operand => ?*Operand, + } { + if (it.index.valid) { + var operand = it.mir.operands.get(it.index); + switch (arguments.element) { + .instruction => { + const instruction = operand.parent; + const i_desc = it.mir.instructions.get(instruction); + print("Instruction: {}\n", .{i_desc.id}); + while (true) { + it.advance(); + if (!it.index.valid) return null; + operand = it.mir.operands.get(it.index); + if (!operand.parent.eq(instruction)) break; + } + + return it.mir.instructions.get(operand.parent); + }, + .operand => return operand, + } + } else { + return null; + } + } + + fn advance(it: *I) void { + assert(it.index.valid); + it.advanceRaw(); + + if (!arguments.use) { + if (it.index.valid) { + const operand = it.mir.operands.get(it.index); + if (operand.flags.type == .use) { + it.index = Operand.Index.invalid; + } else { + //TODO: assert that is not debug + } + } + } else { + while (it.index.valid) { + const operand = it.mir.operands.get(it.index); + if (!arguments.def and operand.flags.type == .def) { + it.advanceRaw(); + } else { + break; + } + } + } + } + + fn advanceRaw(it: *I) void { + assert(it.index.valid); + const current_operand = it.mir.operands.get(it.index); + assert(current_operand.u == .register); + const next_index = current_operand.u.register.list.next; + it.index = next_index; + } + }; + } + }; +}; +pub const Operand = struct { + id: Operand.Id, + u: union(enum) { + register: Register, + memory: Operand.Memory, + immediate: Operand.Immediate, + pc_relative: PCRelative, + lea64mem: Lea64Mem, + }, + flags: Flags, + parent: Instruction.Index = Instruction.Index.invalid, + + pub const List = BlockList(@This()); + pub const Index = Operand.List.Index; + pub const Allocation = Operand.List.Allocation; + + fn readsRegister(operand: Operand) bool { + return !operand.flags.undef and !operand.flags.internal_read and (operand.flags.type == .use or operand.flags.subreg); + } + + fn isOnRegisterUseList(operand: *const Operand) bool { + assert(operand.u == .register); + return operand.u.register.list.previous.valid; + } + + const Id = enum { + unknown, + i32mem, + i64mem, + gp32, + gp64, + gp64_nosp, + immediate, + i64i32imm_brtarget, + lea64mem, + }; + pub const Type = enum(u1) { + use = 0, + def = 1, + }; + + const Flags = packed struct { + type: Type = .use, + dead_or_kill: bool = false, + undef: bool = false, + early_clobber: bool = false, + internal_read: bool = false, + subreg: bool = false, + renamable: bool = false, + implicit: bool = false, + + fn isDead(flags: Flags) bool { + return flags.dead_or_kill and flags.type == .def; + } + + fn isKill(flags: Flags) bool { + return flags.dead_or_kill and flags.type != .def; + } + }; + + // fn mapOperandIdToPayloadType(comptime id: Operand.Id) type { + // } + fn mapOperandIdToPayloadName(comptime id: Operand.Id) []const u8 { + return switch (id) { + .unknown => @compileError("unsupported"), + .i32mem, + .i64mem, + => "memory", + .gp32, + .gp64, + .gp64_nosp, + => "register", + .immediate => "immediate", + .i64i32imm_brtarget => "pc_relative", + + .lea64mem => "lea64mem", + }; + } + + fn operandUnionPayloadType(comptime id: Operand.Id) type { + const dumb_union = @field(@as(Operand, undefined), "u"); + return @TypeOf(@field(dumb_union, mapOperandIdToPayloadName(id))); + } + + const Reference = struct { + id: Operand.Id, + kind: Operand.Kind, + }; + + const Kind = enum { + src, + dst, + }; + + const Memory = struct { + addressing_mode: AddressingMode, + global_offset: i32 = 0, + }; + + const PCRelative = union(enum) { + function_declaration: ir.Function.Declaration.Index, + string_literal: ir.StringLiteral.Index, + imm32: i32, + imm8: i8, + + fn function(ir_function_decl_index: ir.Function.Declaration.Index) Operand { + return Operand{ + .i64i32imm_brtarget = PCRelative{ + .function_declaration = ir_function_decl_index, + }, + }; + } + }; + + const Lea64Mem = struct { + gp64: ?Register, // null means RIP, as this register is mandatory + scale: u8, + scale_reg: ?Register, + displacement: PCRelative, + + fn stringLiteral(ir_load_string_literal_index: ir.StringLiteral.Index) Operand { + return Operand{ + .id = .lea64mem, + .u = .{ + .lea64mem = .{ + .gp64 = null, // rip + .scale = 1, + .scale_reg = null, + .displacement = PCRelative{ + .string_literal = ir_load_string_literal_index, + }, + }, + }, + .flags = .{}, + }; + } + }; + + const Immediate = u64; +}; + +const register_class_operand_matcher = std.EnumArray(Operand.Id, Register.Class).init(.{ + .unknown = .any, + .i64i32imm_brtarget = .not_a_register, + .i32mem = .not_a_register, + .i64mem = .not_a_register, + .gp32 = .gp32, + .gp64 = .gp64, + .gp64_nosp = .gp64_nosp, + .immediate = .not_a_register, + .lea64mem = .not_a_register, +}); + +const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descriptor).init(.{ + .call64pcrel32 = .{ + .format = .no_operands, + .operands = &.{ + .{ + .id = .i64i32imm_brtarget, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .copy = .{ + .format = .pseudo, + .operands = &.{ + .{ + .id = .unknown, + .kind = .dst, + }, + .{ + .id = .unknown, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .lea64r = .{ + .format = .mrm_source_mem, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .lea64mem, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov32r0 = .{ + .format = .pseudo, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov32rm = .{ + .format = .mrm_source_mem, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov64rm = .{ + .format = .mrm_source_mem, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .i64mem, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov32mr = .{ + .format = .mrm_dest_mem, + .operands = &.{ + .{ + .id = .i32mem, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov64mr = .{ + .format = .mrm_dest_mem, + .operands = &.{ + .{ + .id = .i64mem, + .kind = .dst, + }, + .{ + .id = .gp64, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov32ri = .{ + .format = .add_reg, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .immediate, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .mov32ri64 = .{ + .format = .pseudo, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .immediate, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .movsx64rr32 = .{ + .format = .mrm_source_reg, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .ret = .{ + .format = .no_operands, + .operands = &.{ + .{ + .id = .unknown, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, + .syscall = .{ + .format = .no_operands, + .operands = &.{}, + .flags = .{ + .implicit_def = false, + }, + }, + .ud2 = .{ + .format = .no_operands, + .operands = &.{}, + .flags = .{ + .implicit_def = false, + }, + }, +}); + const Size = enum(u2) { one = 0, two = 1, @@ -37,878 +1832,1705 @@ const Size = enum(u2) { .eight => u64, }; } + + fn fromType(t: ir.Type) Size { + return fromByteCount(@intCast(t.getSize())); + } }; -fn Relocation(comptime Target: type) type { - return struct { - target: Target, - instruction_byte_offset: u32, - instruction_length: u8, - source_address_writer_offset: u8, - size: Size, - }; -} - -const LocalRelocation = Relocation(ir.BasicBlock.Index); -const GlobalRelocation = Relocation(u32); - -fn RelocationIndex(comptime relocation_type: RelocationType) type { - return switch (relocation_type) { - .local => ir.BasicBlock.Index, - .global => u32, - }; -} -const RelocationType = enum { - local, - global, +const BasicBlock = struct { + instructions: ArrayList(Instruction.Index) = .{}, + current_stack_index: usize = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const MIR = struct { - functions: ArrayList(Function) = .{}, allocator: Allocator, - const GPRegister = struct { - value: ?x86_64.GPRegister = null, - size: Size, - can_omit_if_present: bool = true, - }; - const Stack = struct { - offset: u64, - }; - const Function = struct { - instructions: ArrayList(MIR.Instruction) = .{}, - blocks: AutoArrayHashMap(ir.BasicBlock.Index, u32) = .{}, - instruction_byte_offset: u32 = 0, - }; - const Instruction = struct { - operands: [4]Operand, - ir: ir.Instruction.Index, - id: Id, - operand_count: u8 = 0, + ir: *ir.Result, + target: std.Target, + instructions: BlockList(Instruction) = .{}, + functions: BlockList(Function) = .{}, + blocks: BlockList(BasicBlock) = .{}, + operands: BlockList(Operand) = .{}, + instruction_selections: ArrayList(InstructionSelection) = .{}, + virtual_registers: BlockList(Register.Virtual) = .{}, - pub fn getOperands(instruction: *MIR.Instruction) []Operand { - return instruction.operands[0..instruction.operand_count]; - } - - const Id = enum(u16) { - call, - jmp, - mov, - push, - ret, - sub, - syscall, - ud2, - xor, - }; - }; - const Operand = union(enum) { - gp_register: MIR.GPRegister, - fp_register, - memory, - displacement: struct { - source: ir.BasicBlock.Index, - destination: union(enum) { - block: ir.BasicBlock.Index, - function: ir.Function.Index, - }, - }, - immediate: Compilation.Integer, - stack: Stack, - }; - - const RegisterUse = union(enum) { - general, - ret, - param: x86_64.GPRegister, - syscall_param: x86_64.GPRegister, - }; - - fn addInstruction(mir: *MIR, function: *Function, instruction_id: Instruction.Id, ir_instruction: ir.Instruction.Index, operands: []const Operand) !void { - var out_operands: [4]Operand = undefined; - @memset(std.mem.asBytes(&out_operands), 0); - @memcpy(out_operands[0..operands.len], operands); - - const instruction = MIR.Instruction{ - .operands = out_operands, - .ir = ir_instruction, - .id = instruction_id, - .operand_count = @intCast(operands.len), - }; - print("Adding instruction {s}\n", .{@tagName(instruction_id)}); - try function.instructions.append(mir.allocator, instruction); - } - - fn emitMovRegImm(mir: *MIR, function: *Function, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse, register_size: Size) !void { - if (integer.type.bit_count <= @bitSizeOf(u64)) { - if (integer.value == 0) { - const operand = .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => .a, - .param => unreachable, - .syscall_param => |register| register, - }, - .size = register_size, - }, - }; - - try mir.addInstruction(function, .xor, instruction_index, &.{ - operand, - operand, - }); - } else if (integer.value <= std.math.maxInt(u32)) { - try mir.addInstruction(function, .mov, instruction_index, &.{ - .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => .a, - .param => unreachable, - .syscall_param => |register| register, - }, - .size = .four, - }, - }, - .{ - .immediate = .{ - .value = integer.value, - .type = .{ - .signedness = integer.type.signedness, - .bit_count = 32, - }, - }, - }, - }); - } else { - unreachable; - } - } else { - unreachable; - } - } - - fn emitMovRegStack(mir: *MIR, function: *Function, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { - if (stack_reference.size <= @sizeOf(u64)) { - switch (stack_reference.size) { - @sizeOf(u8) => unreachable, - @sizeOf(u16) => unreachable, - @sizeOf(u32) => { - try mir.addInstruction(function, .mov, instruction_index, &.{ - .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => unreachable, - .param => unreachable, - .syscall_param => |syscall_register| syscall_register, - }, - .size = Size.fromByteCount(@intCast(stack_reference.size)), - }, - }, - .{ - .stack = .{ - .offset = stack_reference.offset, - }, - }, - }); - }, - @sizeOf(u64) => unreachable, - else => unreachable, - } - } else { - unreachable; - } - } - - pub fn generate(allocator: Allocator, intermediate: *ir.Result) !MIR { - var mir = MIR{ + pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !MIR { + print("\n[INSTRUCTION SELECTION]\n\n", .{}); + var mir_stack = MIR{ .allocator = allocator, + .ir = intermediate, + .target = target, }; - try mir.functions.ensureTotalCapacity(allocator, intermediate.functions.len); - var ir_function_it = intermediate.functions.iterator(); - while (ir_function_it.nextPointer()) |ir_function| { - const function = mir.functions.addOneAssumeCapacity(); - function.* = .{}; + const mir = &mir_stack; + + try mir.blocks.ensureCapacity(allocator, intermediate.blocks.len); + try mir.functions.ensureCapacity(allocator, intermediate.function_definitions.len); + try mir.instruction_selections.ensureUnusedCapacity(allocator, intermediate.function_definitions.len); + + var function_definition_iterator = intermediate.function_definitions.iterator(); + + while (function_definition_iterator.nextPointer()) |ir_function| { + const fn_name = mir.ir.getFunctionName(ir_function.declaration); + print("=========\n{}=========\n", .{ir_function}); + + const instruction_selection = mir.instruction_selections.addOneAssumeCapacity(); + const function_allocation = try mir.functions.addOne(mir.allocator); + const function = function_allocation.ptr; + function.* = .{ + .mir = mir, + .instruction_selection = instruction_selection, + .name = fn_name, + }; + instruction_selection.* = .{ + .function = function, + }; + + const ir_function_declaration = mir.ir.function_declarations.get(ir_function.declaration); + const calling_convention = calling_conventions.get(ir_function_declaration.calling_convention); + + try instruction_selection.block_map.ensureUnusedCapacity(allocator, @intCast(ir_function.blocks.items.len)); try function.blocks.ensureTotalCapacity(allocator, ir_function.blocks.items.len); - for (ir_function.blocks.items) |block_index| { - function.blocks.putAssumeCapacity(block_index, @intCast(function.instructions.items.len)); - const basic_block = intermediate.blocks.get(block_index); + for (ir_function.blocks.items) |block| { + const block_allocation = try mir.blocks.append(allocator, .{}); + instruction_selection.block_map.putAssumeCapacity(block, block_allocation.index); + function.blocks.appendAssumeCapacity(block_allocation.index); + } - if (ir_function.current_stack_offset > 0) { - // TODO: switch on ABI - try mir.addInstruction(function, .push, ir.Instruction.Index.invalid, &.{.{ .gp_register = .{ .value = .bp, .size = .eight } }}); + for (mir.ir.blocks.get(ir_function.blocks.items[0]).instructions.items) |ir_instruction_index| { + const ir_instruction = mir.ir.instructions.get(ir_instruction_index); - try mir.addInstruction(function, .mov, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .bp, .size = .eight } }, - .{ .gp_register = .{ .value = .sp, .size = .eight } }, - }); - - try mir.addInstruction(function, .sub, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .sp, .size = .eight } }, - .{ - .immediate = Compilation.Integer{ - .value = ir_function.current_stack_offset, - .type = .{ - .bit_count = 8, - .signedness = .unsigned, - }, - }, - }, - }); + // TODO: take into account exceptions, dynamic allocas? + if (ir_instruction.* == .stack) { + const stack = mir.ir.stack_references.get(ir_instruction.stack); + const ir_type = getIrType(mir.ir, ir_instruction_index); + const value_type = resolveType(ir_type); + const type_info = value_types.get(value_type); + const total_size = type_info.size * stack.count; + const frame_index = try mir.createStackObject(instruction_selection, total_size, @intCast(stack.alignment), ir_instruction_index, false); + try instruction_selection.stack_map.putNoClobber(allocator, ir_instruction_index, frame_index); } - for (basic_block.instructions.items) |instruction_index| { - const instruction = intermediate.instructions.get(instruction_index); - switch (instruction.*) { - .jump => |jump_index| { - const jump = intermediate.jumps.get(jump_index); - try mir.addInstruction(function, .jmp, instruction_index, &.{ - .{ .displacement = .{ - .source = jump.source, - .destination = .{ .block = jump.destination }, - } }, + // TODO: handle stack references outside blocks + } + + instruction_selection.current_block = function.blocks.items[0]; + + try instruction_selection.lowerArguments(mir, ir_function); + + print("Block count: {}\n", .{function.blocks.items.len}); + var block_i: usize = function.blocks.items.len; + + while (block_i > 0) { + block_i -= 1; + + const block_index = function.blocks.items[block_i]; + _ = block_index; + const ir_block_index = ir_function.blocks.items[block_i]; + const ir_block = mir.ir.blocks.get(ir_block_index); + + var instruction_i: usize = ir_block.instructions.items.len; + print("Instruction count: {}\n", .{instruction_i}); + + while (instruction_i > 0) { + instruction_i -= 1; + + const ir_instruction_index = ir_block.instructions.items[instruction_i]; + const ir_instruction = mir.ir.instructions.get(ir_instruction_index); + + print("Instruction #{}\n", .{instruction_i}); + + switch (ir_instruction.*) { + .ret => |ir_ret_index| { + const ir_ret = mir.ir.returns.get(ir_ret_index); + const value_type = resolveType(getIrType(mir.ir, ir_ret.instruction)); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_ret.instruction); + + const register_class = register_classes.get(value_type); + + const physical_register = Register{ + .index = .{ + .physical = switch (register_class) { + .gp32 => .eax, + .gp64 => .rax, + else => unreachable, + }, + }, + }; + const operand_id: Operand.Id = switch (register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + + const copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = physical_register, + }, + .flags = .{}, + }, + Operand{ + .id = operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }, }); - }, - .copy => |copy_value_index| { - const copy_value = intermediate.values.get(copy_value_index); - switch (copy_value.*) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .general, Size.fromBitCount(integer.type.bit_count)), - else => |t| @panic(@tagName(t)), - } - }, - .ret => |ret_value_index| { - const ret_value = intermediate.values.get(ret_value_index); - switch (ret_value.*) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .ret, Size.fromBitCount(integer.type.bit_count)), - else => |t| @panic(@tagName(t)), - } - if (ir_function.current_stack_offset > 0) { - unreachable; - } + try instruction_selection.instruction_cache.append(mir.allocator, copy); - try mir.addInstruction(function, .ret, instruction_index, &.{}); - }, - .call => |call_value_index| { - // TODO: args - const call = intermediate.calls.get(call_value_index); - try mir.addInstruction(function, .call, instruction_index, &.{ - .{ - .displacement = .{ - .source = block_index, - .destination = .{ - .function = call.function, - }, + const ret = try mir.buildInstruction(instruction_selection, .ret, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = physical_register, + }, + .flags = .{ + .implicit = true, }, }, }); + try instruction_selection.instruction_cache.append(mir.allocator, ret); }, - .store => |store_index| { - const store = intermediate.stores.get(store_index); - const source_value = intermediate.values.get(store.source); - const destination_value = intermediate.values.get(store.destination); - switch (destination_value.*) { - .stack_reference => |stack_reference_index| { - const stack_reference = intermediate.stack_references.get(stack_reference_index); - print("stack ref: {}\n", .{stack_reference}); - switch (source_value.*) { - .call => |call_index| try mir.emitStoreForFunctionCallResult(function, intermediate, instruction_index, stack_reference.*, call_index), - else => |t| @panic(@tagName(t)), - } - }, - else => |t| @panic(@tagName(t)), + .load_integer => try instruction_selection.materializeInteger(mir, ir_instruction_index), + .@"unreachable" => try instruction_selection.instruction_cache.append(mir.allocator, try mir.buildInstruction(instruction_selection, .ud2, &.{})), + .syscall => |ir_syscall_index| { + const ir_syscall = mir.ir.syscalls.get(ir_syscall_index); + const syscall_register_list = calling_convention.syscall_registers[0..ir_syscall.arguments.items.len]; + + for (ir_syscall.arguments.items, syscall_register_list) |ir_argument_index, syscall_register| { + //print("index: {}\n", .{index}); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); + const destination_register = Register{ + .index = .{ + .physical = syscall_register, + }, + }; + + const source_operand = Operand{ + .id = .gp64, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp64, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const argument_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, argument_copy); } + + // TODO: handle syscall return value + const syscall = try mir.buildInstruction(instruction_selection, .syscall, &.{}); + try instruction_selection.instruction_cache.append(mir.allocator, syscall); + + const physical_return_register = Register{ + .index = .{ + .physical = .rax, + }, + }; + const physical_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = physical_return_register, + }, + .flags = .{ .type = .def }, + }; + + const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const virtual_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = virtual_return_register, + }, + .flags = .{ .type = .def }, + }; + + const syscall_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + virtual_return_operand, + physical_return_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, syscall_result_copy); }, - .syscall => |syscall_value_index| { - const syscall_value = intermediate.values.get(syscall_value_index); - const syscall = intermediate.syscalls.get(syscall_value.syscall); - for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { - const argument = intermediate.values.get(argument_index).*; - switch (argument) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .{ .syscall_param = syscall_register }, Size.eight), - .stack_reference => |stack_reference_index| { - const stack_reference = intermediate.stack_references.get(stack_reference_index); - try mir.emitMovRegStack(function, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); + .sign_extend => |ir_cast_index| { + const ir_sign_extend = mir.ir.casts.get(ir_cast_index); + const ir_source_instruction = ir_sign_extend.value; + + const destination_type = resolveType(ir_sign_extend.type); + + const source_register = try instruction_selection.getRegisterForValue(mir, ir_source_instruction); + const source_type = resolveType(getIrType(mir.ir, ir_source_instruction)); + + if (destination_type != source_type) { + const instruction_id: Instruction.Id = switch (source_type) { + .i32 => switch (destination_type) { + .i64 => .movsx64rr32, + else => unreachable, }, else => |t| @panic(@tagName(t)), - } - } + }; + const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); + assert(instruction_descriptor.operands.len == 2); + const destination_operand_index = 0; + const source_operand_index = 1; + const source_operand = mir.constrainOperandRegisterClass(instruction_descriptor, source_register, source_operand_index, .{}); + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const destination_operand = mir.constrainOperandRegisterClass(instruction_descriptor, destination_register, destination_operand_index, .{ .type = .def }); - try mir.addInstruction(function, .syscall, instruction_index, &.{}); - }, - .@"unreachable" => try mir.addInstruction(function, .ud2, instruction_index, &.{}), - else => |t| @panic(@tagName(t)), - } - } - } - } - - return mir; - } - - pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { - for (mir.functions.items) |*function| { - var register_allocator = try RegisterAllocator.init(allocator); - var instructions_to_delete = AutoArrayHashMap(u32, void){}; - for (function.instructions.items, 0..) |*instruction, instruction_index| { - print("#{} {s}\n", .{ instruction_index, @tagName(instruction.id) }); - var allocated_gp_register: ?x86_64.GPRegister = null; - for (instruction.getOperands()) |*operand| { - switch (operand.*) { - .displacement, .immediate, .stack => {}, - .gp_register => |gp_register| switch (instruction.ir.valid) { - true => operand.gp_register.value = blk: { - const value_index = getValueFromInstruction(intermediate, instruction.ir); - - if (gp_register.value) |expected_register| { - if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { - switch (value_index.eq(allocated_value)) { - // TODO delete the instruction - true => if (allocated_gp_register == null) unreachable else { - assert(allocated_gp_register.? == expected_register); - }, - // _ = try instructions_to_delete.getOrPut(allocator, @intCast(instruction_index)), //.append(allocator, @intCast(instruction_index)), - false => unreachable, - } - } else { - if (register_allocator.gp_registers.free.get(expected_register)) |_| { - try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); - allocated_gp_register = expected_register; - } else { - unreachable; - } - } - - break :blk expected_register; - } else { - for (register_allocator.gp_registers.free.keys()) |register| { - try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); - break :blk register; - } else { - unreachable; - } - } - }, - false => {}, - }, - else => |t| @panic(@tagName(t)), - } - } - } - - if (instructions_to_delete.keys().len > 0) { - var next_instruction_to_delete_index: usize = 0; - print("Instructions to delete: ", .{}); - for (instructions_to_delete.keys()) |instruction| { - print("#{}, ", .{instruction}); - } - print("\n", .{}); - for (function.blocks.keys(), function.blocks.values()) |*block_index, *instruction_offset| { - _ = block_index; - while (instructions_to_delete.keys()[next_instruction_to_delete_index] <= instruction_offset.*) : (next_instruction_to_delete_index += 1) { - unreachable; - } - } - - var removed_instruction_count: usize = 0; - for (instructions_to_delete.keys()) |instruction_to_delete_index| { - _ = function.instructions.orderedRemove(instruction_to_delete_index - removed_instruction_count); - removed_instruction_count += 1; - } - - print("Instructions after deletion\n", .{}); - for (function.instructions.items, 0..) |instruction, index| { - print("#{} {s}\n", .{ index, @tagName(instruction.id) }); - } - print("\n", .{}); - } - } - } - - const RegisterAllocator = struct { - gp_registers: RegisterSet(x86_64.GPRegister) = .{}, - - fn init(allocator: Allocator) !RegisterAllocator { - var register_allocator = RegisterAllocator{}; - try register_allocator.gp_registers.free.ensureTotalCapacity(allocator, @typeInfo(x86_64.GPRegister).Enum.fields.len); - inline for (@typeInfo(x86_64.GPRegister).Enum.fields) |enum_field| { - register_allocator.gp_registers.free.putAssumeCapacity(@field(x86_64.GPRegister, enum_field.name), {}); - } - - return register_allocator; - } - }; - - fn RegisterSet(comptime RegisterEnum: type) type { - return struct { - used: AutoArrayHashMap(RegisterEnum, ir.Value.Index) = .{}, - free: AutoArrayHashMap(RegisterEnum, void) = .{}, - - fn allocate(register_set: *@This(), allocator: Allocator, register: RegisterEnum, intermediate: *ir.Result, instruction: MIR.Instruction, value_index: ir.Value.Index) !void { - switch (intermediate.instructions.get(instruction.ir).*) { - .store => {}, - else => { - switch (register_set.free.orderedRemove(register)) { - true => try register_set.used.put(allocator, register, value_index), - false => unreachable, - } - }, - } - } - }; - } - - fn getValueFromInstruction(intermediate: *ir.Result, instruction_index: ir.Instruction.Index) ir.Value.Index { - const instruction = intermediate.instructions.get(instruction_index); - const value_index: ir.Value.Index = switch (instruction.*) { - .copy, .ret, .syscall => |value_index| value_index, - .store => |store_index| blk: { - const store = intermediate.stores.get(store_index); - break :blk store.source; - }, - else => |t| @panic(@tagName(t)), - }; - - return value_index; - } - - fn emitStoreForFunctionCallResult(mir: *MIR, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { - _ = call_index; - _ = intermediate; - if (stack_reference.size <= @sizeOf(u64)) { - switch (stack_reference.size) { - @sizeOf(u8) => unreachable, - @sizeOf(u16) => unreachable, - @sizeOf(u32) => try mir.addInstruction(function, .mov, instruction, &.{ - .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a, .size = Size.fromByteCount(@intCast(stack_reference.size)) } }, - }), - @sizeOf(u64) => unreachable, - else => unreachable, - } - } else { - unreachable; - } - } - - pub fn encode(mir: *const MIR, intermediate: *const ir.Result) !emit.Result { - var local_relocations = ArrayList(LocalRelocation){}; - var global_relocations = ArrayList(GlobalRelocation){}; - var block_index: usize = 0; - - var image = try emit.Result.create(); - - for (mir.functions.items) |*function| { - local_relocations.clearRetainingCapacity(); - function.instruction_byte_offset = @intCast(image.sections.text.index); - for (function.instructions.items, 0..) |*instruction, instruction_index| { - if (block_index < function.blocks.values().len) { - if (instruction_index == function.blocks.values()[block_index]) { - function.blocks.values()[block_index] = @intCast(image.sections.text.index); - block_index += 1; - } - } - - const operands = instruction.getOperands(); - switch (operands.len) { - 0 => switch (instruction.id) { - .ret => image.appendCodeByte(0xc3), - .syscall => image.appendCode(&.{ 0x0f, 0x05 }), - .ud2 => image.appendCode(&.{ 0x0f, 0x0b }), - else => |t| @panic(@tagName(t)), - }, - 1 => switch (instruction.id) { - .call => { - const operand = operands[0]; - assert(operand == .displacement); - switch (operand.displacement.destination) { - .function => |ir_function_index| { - const function_index = ir_function_index.uniqueInteger(); - const current_function_index = @divExact(@intFromPtr(function) - @intFromPtr(mir.functions.items.ptr), @sizeOf(MIR.Function)); - - if (current_function_index < function_index) { - try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.global, .{ - .relocations = &global_relocations, - .target = function_index, - .opcode = 0xe8, - }); - } else { - try encodeRel32Instruction(&image, .{ - .target = mir.functions.items[function_index].instruction_byte_offset, - .opcode = 0xe8, - }); - } - }, - else => |t| @panic(@tagName(t)), - } - }, - .jmp => { - const operand = operands[0]; - assert(operand == .displacement); - if (operand.displacement.source.uniqueInteger() < operand.displacement.destination.block.uniqueInteger()) { - try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.local, .{ - .relocations = &local_relocations, - .target = operand.displacement.destination.block, - .opcode = 0xe9, + const sign_extend = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, }); - } else if (operand.displacement.source.uniqueInteger() == operand.displacement.destination.block.uniqueInteger()) { - unreachable; + try instruction_selection.instruction_cache.append(mir.allocator, sign_extend); } else { unreachable; } }, - .push => { - const operand = operands[0]; - switch (operand) { - .gp_register => |gp_register| { - assert(gp_register.size == .eight); - if (Rex.create(.{ .rm = gp_register.value.? })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode = @as(u8, 0x50) | @as(u3, @truncate(@intFromEnum(gp_register.value.?))); - image.appendCodeByte(opcode); + .load => |ir_load_index| { + const ir_load = mir.ir.loads.get(ir_load_index); + const ir_destination = ir_load.instruction; + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_destination); + const value_type = resolveType(getIrType(mir.ir, ir_destination)); + + switch (value_type) { + inline .i32, + .i64, + => |vt| { + const instruction_id: Instruction.Id = switch (vt) { + .i32 => .mov32rm, + .i64 => .mov64rm, + else => |t| @panic(@tagName(t)), + }; + const memory_id: Operand.Id = switch (vt) { + .i32 => .i32mem, + .i64 => .i64mem, + else => |t| @panic(@tagName(t)), + }; + + const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); + + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const destination_operand_index = 0; + const destination_operand_id = instruction_descriptor.operands[destination_operand_index].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const source_operand = Operand{ + .id = memory_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, + }, + .flags = .{}, + }; + + const load = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, load); }, else => |t| @panic(@tagName(t)), } }, - else => |t| @panic(@tagName(t)), - }, - 2 => switch (operands[0]) { - .gp_register => |dst_gp_register| switch (operands[1]) { - .gp_register => |src_gp_register| { - assert(dst_gp_register.size == src_gp_register.size); - const direct = true; - const rm = dst_gp_register.value.?; - const reg = src_gp_register.value.?; + .store => |ir_store_index| { + const ir_store = mir.ir.stores.get(ir_store_index); + const ir_source = ir_store.source; - if (Rex.create(.{ - .rm = rm, - .reg = reg, - .rm_size = dst_gp_register.size, - })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } + const ir_destination = ir_store.destination; + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_destination); - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x88, 0x89 }, - .xor => .{ 0x30, 0x31 }, - else => |t| @panic(@tagName(t)), - }; + const source_register = try instruction_selection.getRegisterForValue(mir, ir_source); - image.appendCodeByte(switch (dst_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], - }); + const value_type = resolveType(getIrType(mir.ir, ir_source)); - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), - }; - image.appendCodeByte(@bitCast(modrm)); - }, - .immediate => |src_immediate| { - assert(src_immediate.type.bit_count % @bitSizeOf(u8) == 0); - print("DST GP register: {}. SRC immediate: {}\n", .{ dst_gp_register, src_immediate }); - switch (instruction.id) { - .mov => switch (@intFromEnum(dst_gp_register.value.?) > std.math.maxInt(u3)) { - true => unreachable, // Use RM encoding - false => { - const opcode: u8 = switch (dst_gp_register.size) { - .one => 0xb0, - else => 0xb8, - }; - const opcode_byte = opcode | @intFromEnum(dst_gp_register.value.?); - image.appendCodeByte(opcode_byte); - const immediate_byte_count = @as(usize, 1) << @intFromEnum(dst_gp_register.size); - print("Immediate byte count: {}\n", .{immediate_byte_count}); - for (std.mem.asBytes(&src_immediate.value)[0..immediate_byte_count]) |immediate_byte| { - image.appendCodeByte(immediate_byte); - } - }, - }, - else => { - const immediate8_different_than_register = src_immediate.type.bit_count == 8 and dst_gp_register.size != .one; - switch (dst_gp_register.value.? == .a and !immediate8_different_than_register) { - true => unreachable, - false => { - const reg: x86_64.GPRegister = @enumFromInt(@as(u3, switch (instruction.id) { - .sub => 5, - else => |t| @panic(@tagName(t)), - })); - if (Rex.create(.{ .reg = reg, .rm = dst_gp_register.value.?, .rm_size = dst_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode: u8 = switch (immediate8_different_than_register) { - true => switch (instruction.id) { - .sub => 0x83, - else => |t| @panic(@tagName(t)), - }, - false => unreachable, - }; - image.appendCodeByte(opcode); - - const rm = dst_gp_register.value.?; - const direct = true; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), - }; - image.appendCodeByte(@bitCast(modrm)); - - switch (Size.fromBitCount(src_immediate.type.bit_count)) { - inline else => |size| image.appendCode(std.mem.asBytes(&@as(size.toInteger(), @intCast(src_immediate.value)))), - } - }, - } - }, - } - }, - .stack => |src_stack| { - const stack_offset = -@as(i64, @intCast(src_stack.offset)); - for (std.mem.asBytes(&stack_offset)) |stack_byte| { - print("0x{x} ", .{stack_byte}); - } - print("\n", .{}); - const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; - - const reg = dst_gp_register.value.?; - if (Rex.create(.{ .reg = reg, .rm_size = dst_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x8a, 0x8b }, - else => |t| @panic(@tagName(t)), - }; - - image.appendCodeByte(switch (dst_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], - }); - - const rm = x86_64.GPRegister.bp; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = 0b01, - }; - image.appendCodeByte(@bitCast(modrm)); - - image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); - }, - else => |t| @panic(@tagName(t)), - }, - .stack => |dst_stack| switch (operands[1]) { - .gp_register => |src_gp_register| switch (instruction.id) { - .mov => { - const stack_offset = -@as(i64, @intCast(dst_stack.offset)); - for (std.mem.asBytes(&stack_offset)) |stack_byte| { - print("0x{x} ", .{stack_byte}); - } - print("\n", .{}); - const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; - - const reg = src_gp_register.value.?; - if (Rex.create(.{ .reg = reg, .rm_size = src_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x88, 0x89 }, + switch (value_type) { + inline .i32, .i64 => |vt| { + const instruction_id: Instruction.Id = switch (vt) { + // TODO, non-temporal SSE2 MOVNT + .i32 => .mov32mr, + .i64 => .mov64mr, else => |t| @panic(@tagName(t)), }; - image.appendCodeByte(switch (src_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], + const instruction_descriptor = comptime instruction_descriptors.getPtrConst(instruction_id); + const source_operand_index = instruction_descriptor.operands.len - 1; + const source_operand_id = instruction_descriptor.operands[source_operand_index].id; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + + const destination_operand_id = instruction_descriptor.operands[0].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, + }, + .flags = .{}, + }; + + const store = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, }); - const rm = x86_64.GPRegister.bp; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = 0b01, - }; - image.appendCodeByte(@bitCast(modrm)); - - image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); + try instruction_selection.instruction_cache.append(mir.allocator, store); }, else => |t| @panic(@tagName(t)), - }, - else => |t| @panic(@tagName(t)), + } + }, + .stack => { + assert(instruction_selection.stack_map.get(ir_instruction_index) != null); + }, + .call => |ir_call_index| { + const ir_call = mir.ir.calls.get(ir_call_index); + for (ir_call.arguments, 0..) |ir_argument_index, index| { + // print("index: {}\n", .{index}); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); + const source_value_type = resolveType(getIrType(mir.ir, ir_argument_index)); + const source_register_class = register_classes.get(source_value_type); + const argument_register = calling_convention.argument_registers.get(source_register_class)[index]; + // print("Argument register: {}\n", .{argument_register}); + + const destination_register = Register{ + .index = .{ + .physical = argument_register, + }, + }; + + const operand_id: Operand.Id = switch (source_register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + const source_operand = Operand{ + .id = operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{}, + }; + + const copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, copy); + } + + const call = try mir.buildInstruction(instruction_selection, .call64pcrel32, &.{ + Operand{ + .id = .i64i32imm_brtarget, + .u = .{ + .pc_relative = .{ + .function_declaration = ir_call.function, + }, + }, + .flags = .{}, + }, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, call); + + const ir_return_type = mir.ir.function_declarations.get(ir_call.function).return_type; + switch (ir_return_type) { + .void, + .noreturn, + => {}, + else => { + const return_type = resolveType(ir_return_type); + switch (return_type) { + inline .i64, .i32 => |rt| { + const register_operand_id = switch (rt) { + .i32 => .gp32, + .i64 => .gp64, + else => unreachable, + }; + const physical_return_register = Register{ + .index = .{ + .physical = switch (rt) { + .i32 => .eax, + .i64 => .rax, + else => unreachable, + }, + }, + }; + + const physical_return_operand = Operand{ + .id = register_operand_id, + .u = .{ + .register = physical_return_register, + }, + .flags = .{}, + }; + + const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const virtual_return_operand = Operand{ + .id = register_operand_id, + .u = .{ + .register = virtual_return_register, + }, + .flags = .{ .type = .def }, + }; + + const call_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + virtual_return_operand, + physical_return_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, call_result_copy); + }, + else => |t| @panic(@tagName(t)), + } + }, + } }, else => |t| @panic(@tagName(t)), - }, - 3 => switch (instruction.id) { - else => |t| @panic(@tagName(t)), - }, - 4 => switch (instruction.id) { - else => |t| @panic(@tagName(t)), - }, - else => unreachable, + } + + var i: usize = instruction_selection.instruction_cache.items.len; + const block = mir.blocks.get(instruction_selection.current_block); + + while (i > 0) { + i -= 1; + + const instruction_index = instruction_selection.instruction_cache.items[i]; + const instruction = mir.instructions.get(instruction_index); + print("Inserting instruction #{} ({s}) into index {} (instruction count: {})\n", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); + try block.instructions.insert(mir.allocator, block.current_stack_index, instruction_index); + } + + instruction_selection.instruction_cache.clearRetainingCapacity(); } + + instruction_selection.local_value_map.clearRetainingCapacity(); } - for (local_relocations.items) |relocation| { - const source_offset: i64 = relocation.instruction_byte_offset + relocation.instruction_length; - const destination_offset: i64 = function.blocks.get(relocation.target).?; - print("Source: {}. Destination: {}\n", .{ source_offset, destination_offset }); - const displacement_offset = destination_offset - source_offset; - const address_to_address = @intFromPtr(&image.sections.text.content[relocation.instruction_byte_offset + relocation.source_address_writer_offset]); - switch (relocation.size) { - inline .one, .four => |relocation_size| { - const RelocationInteger = switch (relocation_size) { - .one => i8, - .four => i32, - else => @compileError("Unreachable"), - }; - const ptr: *align(1) RelocationInteger = @ptrFromInt(address_to_address); - ptr.* = @intCast(displacement_offset); - }, - else => unreachable, - } - } + // for (ir_function.blocks.items, function.blocks.items) |ir_block_index, block_index| { + // const ir_block = mir.ir.blocks.get(ir_block_index); + // instruction_selection.current_block = block_index; + // + // for (ir_block.instructions.items) |ir_instruction_index| { + // const ir_instruction = mir.ir.instructions.get(ir_instruction_index); + // switch (ir_instruction.*) { + // .load_string_literal => |ir_load_string_literal_index| { + // // const ir_string_literal = mir.ir.string_literals.get(ir_load_string_literal_index); + // const virtual_register = try mir.createVirtualRegister(Register.Class.gp64); + // const virtual_operand = Operand.new(.gp64, virtual_register, .{ .type = .def }); + // try mir.append(instruction_selection, .lea64r, &.{ + // virtual_operand, + // Operand.Lea64Mem.stringLiteral(ir_load_string_literal_index), + // }); + // + // try instruction_selection.updateValueMap(allocator, ir_instruction_index, virtual_register); + // }, + // .@"unreachable" => try mir.append(instruction_selection, .ud2, &.{}), + // else => |t| @panic(@tagName(t)), + // } + // } + // + // instruction_selection.local_value_map.clearRetainingCapacity(); + // } - print("Function code:\n", .{}); - for (image.sections.text.content[function.instruction_byte_offset..][0..image.sections.text.index]) |code_byte| { - print("0x{x:0>2} ", .{code_byte}); - } - print("\n", .{}); + print("=========\n{}=========\n", .{function}); } - for (global_relocations.items) |global_relocation| { - _ = global_relocation; + return mir_stack; + } + + fn getNextInstructionIndex(mir: *MIR, instruction_index: Instruction.Index) usize { + const instruction = mir.instructions.get(instruction_index); + const parent_block = mir.blocks.get(instruction.parent); + const next = for (parent_block.instructions.items, 0..) |index, i| { + if (index.eq(instruction_index)) break i + 1; + } else unreachable; + return next; + } + + fn setPhysicalRegister(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index, register: Register.Physical) bool { + const operand = mir.operands.get(operand_index); + if (!operand.flags.subreg) { + mir.setRegisterInOperand(instruction_selection, operand_index, .{ + .physical = register, + }); + operand.flags.renamable = true; + return false; + } + + unreachable; + } + + fn setRegisterInOperand(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index, register: Register.Index) void { + const operand = mir.operands.get(operand_index); + assert(operand.u == .register); + assert(!std.meta.eql(operand.u.register.index, register)); + operand.flags.renamable = true; + mir.removeRegisterOperandFromUseList(instruction_selection, operand); + operand.u.register.index = register; + mir.addRegisterOperandFromUseList(instruction_selection, operand_index); + } + + fn addRegisterOperandFromUseList(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index) void { + const operand = mir.operands.get(operand_index); + assert(!operand.isOnRegisterUseList()); + const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); + const head_index = head_index_ptr.*; + + switch (head_index.valid) { + true => { + const head_operand = mir.operands.get(head_index); + assert(std.meta.eql(head_operand.u.register.index, operand.u.register.index)); + const last_operand_index = head_operand.u.register.list.previous; + const last_operand = mir.operands.get(last_operand_index); + assert(std.meta.eql(last_operand.u.register.index, operand.u.register.index)); + head_operand.u.register.list.previous = operand_index; + operand.u.register.list.previous = last_operand_index; + + switch (operand.flags.type) { + .def => { + operand.u.register.list.next = head_index; + head_index_ptr.* = operand_index; + }, + .use => { + operand.u.register.list.next = Operand.Index.invalid; + last_operand.u.register.list.next = operand_index; + }, + } + }, + false => { + operand.u.register.list.previous = operand_index; + operand.u.register.list.next = Operand.Index.invalid; + head_index_ptr.* = operand_index; + }, + } + } + fn removeRegisterOperandFromUseList(mir: *MIR, instruction_selection: *InstructionSelection, operand: *Operand) void { + assert(operand.isOnRegisterUseList()); + const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); + const head_index = head_index_ptr.*; + assert(head_index.valid); + + const operand_previous = operand.u.register.list.previous; + const operand_next = operand.u.register.list.next; + + const head = mir.operands.get(head_index); + if (operand == head) { + head_index_ptr.* = operand_next; + } else { + const previous = mir.operands.get(operand_previous); + previous.u.register.list.next = operand_next; + } + + const next = switch (operand_next.valid) { + true => mir.operands.get(operand_next), + false => head, + }; + next.u.register.list.previous = operand_previous; + + operand.u.register.list.previous = Operand.Index.invalid; + operand.u.register.list.next = Operand.Index.invalid; + } + + fn constrainRegisterClass(mir: *MIR, register: Register, old_register_class: Register.Class) ?Register.Class { + const new_register_class = switch (register.index) { + .virtual => |virtual_register_index| mir.virtual_registers.get(virtual_register_index).register_class, + else => unreachable, + }; + + // print("Old: {}. New: {}\n", .{ old_register_class, new_register_class }); + switch (old_register_class == new_register_class) { + true => return new_register_class, + false => unreachable, + } + unreachable; + } + + fn constrainOperandRegisterClass(mir: *MIR, instruction_descriptor: *const Instruction.Descriptor, register: Register, operand_index: usize, flags: Operand.Flags) Operand { + assert(register.index == .virtual); + const operand_reference = instruction_descriptor.operands[operand_index]; + const operand_register_class = register_class_operand_matcher.get(operand_reference.id); + // print("Constraint operand #{} with {} (out of {})\n", .{ operand_index, operand_register_class, operand_reference.id }); + + // const register_class = op + if (mir.constrainRegisterClass(register, operand_register_class) == null) { unreachable; } - image.entry_point = mir.functions.items[intermediate.entry_point].instruction_byte_offset; - - return image; + return Operand{ + .id = operand_reference.id, + .u = .{ + .register = register, + }, + .flags = flags, + }; } - fn encodeRel32Instruction(image: *emit.Result, arguments: struct { - target: u32, - opcode: u8, - }) !void { - const instruction_byte_offset: u32 = @intCast(image.sections.text.index); - const instruction_length = 5; - - const source_offset: i64 = instruction_byte_offset + instruction_length; - const destination_offset: i64 = arguments.target; - const offset: i32 = @intCast(destination_offset - source_offset); - - image.appendCodeByte(arguments.opcode); - image.appendCode(std.mem.asBytes(&offset)); + fn createVirtualRegister(mir: *MIR, register_class: Register.Class) !Register { + const virtual_register_index = try mir.createVirtualRegisterIndexed(register_class); + return Register{ + .index = .{ + .virtual = virtual_register_index, + }, + }; } - fn encodeRel32InstructionWithRelocation(mir: *const MIR, image: *emit.Result, comptime relocation_type: RelocationType, arguments: struct { - relocations: *ArrayList(Relocation(RelocationIndex(relocation_type))), - target: RelocationIndex(relocation_type), - opcode: u8, - }) !void { - const instruction_byte_offset = image.sections.text.index; - const source_address_writer_offset = 1; - const instruction_length = 5; - const size = .four; - - image.appendCodeByte(arguments.opcode); - image.appendCode(&(.{0} ** 4)); - - try arguments.relocations.append(mir.allocator, .{ - .instruction_byte_offset = @intCast(instruction_byte_offset), - .source_address_writer_offset = source_address_writer_offset, - .instruction_length = instruction_length, - .target = arguments.target, - .size = size, + fn createVirtualRegisterIndexed(mir: *MIR, register_class: Register.Class) !Register.Virtual.Index { + const allocation = try mir.virtual_registers.append(mir.allocator, .{ + .register_class = register_class, }); + return allocation.index; } -}; -const RegisterImmediate = struct { - immediate: ir.Value.Index, - register: GPRegister, - register_size: Size, - immediate_size: Size, -}; + const RegisterBitset = std.EnumSet(Register.Physical); -const RegisterMemoryRegister = struct { - destination: GPRegister, - source: GPRegister, - size: Size, - direct: bool, -}; + const RegisterAllocator = struct { + reserved: RegisterBitset = RegisterBitset.initEmpty(), + register_states: std.EnumArray(Register.Physical, Register.State) = std.EnumArray(Register.Physical, Register.State).initFill(.free), + used_in_instruction: RegisterBitset = RegisterBitset.initEmpty(), + may_live_across_blocks: std.DynamicBitSetUnmanaged, + live_virtual_registers: std.AutoArrayHashMapUnmanaged(Register.Virtual.Index, LiveRegister) = .{}, + stack_slots: std.AutoHashMapUnmanaged(Register.Virtual.Index, u32) = .{}, + coalesced: ArrayList(Instruction.Index) = .{}, -const Displacement = struct { - instruction_index: u16, - size: Size, - source: u16, - destination: u16, -}; + fn init(mir: *MIR, instruction_selection: *InstructionSelection) !RegisterAllocator { + var result = RegisterAllocator{ + .may_live_across_blocks = try std.DynamicBitSetUnmanaged.initEmpty(mir.allocator, mir.virtual_registers.len), + }; -const RmResult = struct { - rex: Rex, - mod_rm: ModRm, -}; + result.reserved.setPresent(.fpcw, true); + result.reserved.setPresent(.fpsw, true); + result.reserved.setPresent(.mxcsr, true); -const RmAndRexArguments = packed struct { - rm: GPRegister, - reg: GPRegister, - direct: bool, - bit64: bool, - sib: bool, -}; -const ModRm = packed struct(u8) { - rm: u3, - reg: u3, - mod: u2, -}; + for ((try getSubregisters(mir.allocator, .rsp)).keys()) |rsp_subreg| { + result.reserved.setPresent(rsp_subreg, true); + } -const Rex = packed struct(u8) { - b: bool, - x: bool, - r: bool, - w: bool, - fixed: u4 = 0b0100, + result.reserved.setPresent(.ssp, true); - fn create(args: struct { - rm: ?GPRegister = null, - reg: ?GPRegister = null, - sib: bool = false, - rm_size: ?Size = null, - }) ?Rex { - const rex_byte = Rex{ - .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, - .x = args.sib, - .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, - .w = if (args.rm_size) |rm_size| rm_size == .eight else false, + for ((try getSubregisters(mir.allocator, .rip)).keys()) |rip_subreg| { + result.reserved.setPresent(rip_subreg, true); + } + + // TODO: complete + const has_frame_pointer = instruction_selection.stack_map.entries.len > 0; + if (has_frame_pointer) { + for ((try getSubregisters(mir.allocator, .rbp)).keys()) |rbp_subreg| { + result.reserved.setPresent(rbp_subreg, true); + } + } + + // TODO: complete + const has_base_pointer = false; + if (has_base_pointer) { + // TODO + } + + result.reserved.setPresent(.cs, true); + result.reserved.setPresent(.ss, true); + result.reserved.setPresent(.ds, true); + result.reserved.setPresent(.es, true); + result.reserved.setPresent(.fs, true); + result.reserved.setPresent(.gs, true); + + inline for ([8]Register.Physical{ .st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7 }) |st_reg| { + result.reserved.setPresent(st_reg, true); + } + + const has_avx512 = false; + if (!has_avx512) { + // TODO xmm alias + } + + // TODO: callee saved registers (CSR) + + // TODO: more setup + + return result; + } + + fn useVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, virtual_register: Register.Virtual.Index, instruction_operand_index: u8) !bool { + const instruction = mir.instructions.get(instruction_index); + const operand_index = instruction.operands.items[instruction_operand_index]; + const operand = mir.operands.get(operand_index); + const gop = try register_allocator.live_virtual_registers.getOrPut(mir.allocator, virtual_register); + const live_register = gop.value_ptr; + switch (gop.found_existing) { + true => { + // TODO: asserts + const assert_result = !operand.flags.isKill() or live_register.last_use.eq(instruction_index); + if (assert_result) { + print("Existing live register at instruction #{}: {}\n", .{ instruction_index.uniqueInteger(), live_register }); + print("Function until now: {}\n", .{instruction_selection.function}); + assert(assert_result); + } + }, + false => { + if (!operand.flags.isKill()) { + // TODO some logic + // unreachable; + if (register_allocator.mayLiveOut(mir, instruction_selection, virtual_register)) { + unreachable; + } else { + operand.flags.dead_or_kill = true; + } + } + + live_register.* = .{ + .virtual = virtual_register, + }; + }, + } + + if (live_register.physical == .no_register) { + const hint: ?Register = blk: { + if (instruction.id == .copy) { + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(source_operand.u == .register); + if (!source_operand.flags.subreg) { + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const hint_register = destination_operand.u.register; + assert(hint_register.index == .physical); + break :blk hint_register; + } + } + break :blk null; + }; + // TODO: handle allocation error here + register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction, live_register, hint, false) catch unreachable; + } + + live_register.last_use = instruction_index; + + register_allocator.markUsedRegisterInInstruction(live_register.physical); + return mir.setPhysicalRegister(instruction_selection, operand_index, live_register.physical); + } + + fn isRegisterInClass(register: Register.Physical, register_class: Register.Class) bool { + const result = std.mem.indexOfScalar(Register.Physical, registers_by_class.get(register_class), register) != null; + print("Is {s} in class {s}: {}\n", .{ @tagName(register), @tagName(register_class), result }); + return result; + } + + fn allocateVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction: *Instruction, live_register: *LiveRegister, maybe_hint: ?Register, look_at_physical_register_uses: bool) !void { + _ = instruction; + assert(live_register.physical == .no_register); + const virtual_register = live_register.virtual; + const register_class = mir.virtual_registers.get(live_register.virtual).register_class; + + if (maybe_hint) |hint_register| { + if (hint_register.index == .physical + // TODO : and isAllocatable + and isRegisterInClass(hint_register.index.physical, register_class) + + // TODO and !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + + ) { + if (register_allocator.register_states.get(hint_register.index.physical) == .free) { + register_allocator.assignVirtualToPhysicalRegister(live_register, hint_register.index.physical); + return; + } + } + } + + const maybe_hint2 = register_allocator.traceCopies(mir, instruction_selection, virtual_register); + if (maybe_hint2) |hint| { + _ = hint; + unreachable; + } + + const register_class_members = registers_by_class.get(register_class); + assert(register_class_members.len > 0); + + var best_cost: u32 = SpillCost.impossible; + var best_register = Register.Physical.no_register; + print("Candidates for {s}: ", .{@tagName(register_class)}); + for (register_class_members) |candidate_register| { + print("{s}, ", .{@tagName(candidate_register)}); + } + print("\n", .{}); + for (register_class_members) |candidate_register| { + print("Checking candidate register {s}\n", .{@tagName(candidate_register)}); + if (register_allocator.isRegisterUsedInInstruction(candidate_register, look_at_physical_register_uses)) continue; + const spill_cost = register_allocator.computeSpillCost(candidate_register); + print("Spill cost: {}\n", .{spill_cost}); + + if (spill_cost == 0) { + register_allocator.assignVirtualToPhysicalRegister(live_register, candidate_register); + return; + } + + if (maybe_hint) |hint| { + if (hint.index.physical == candidate_register) { + unreachable; + } + } + + if (maybe_hint2) |hint| { + if (hint.physical == candidate_register) { + unreachable; + } + } + + if (spill_cost < best_cost) { + best_register = candidate_register; + best_cost = spill_cost; + } + } + + assert(best_register != .no_register); + + unreachable; + } + + fn computeSpillCost(register_allocator: *RegisterAllocator, physical_register: Register.Physical) u32 { + const register_state = register_allocator.register_states.get(physical_register); + return switch (register_state) { + .free => 0, + .preassigned => SpillCost.impossible, + .virtual => |virtual_register_index| blk: { + const sure_spill = register_allocator.stack_slots.get(virtual_register_index) != null or register_allocator.live_virtual_registers.get(virtual_register_index).?.live_out; + break :blk if (sure_spill) SpillCost.clean else SpillCost.dirty; + }, + .livein => unreachable, + }; + } + + const SpillCost = struct { + const clean = 50; + const dirty = 100; + const pref_bonus = 20; + const impossible = std.math.maxInt(u32); }; - if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { - return rex_byte; - } else { + fn isRegisterUsedInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical, look_at_physical_register_uses: bool) bool { + _ = look_at_physical_register_uses; + + // TODO: register masks + + if (register_allocator.used_in_instruction.contains(physical_register)) { + return true; + } + // TODO + //else if (look_at_physical_register_uses and register_classes.ph + else { + return false; + } + } + + fn traceCopies(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) ?Register.Index { + _ = register_allocator; + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + var define_instructions = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, head_index_ptr.*); + + while (define_instructions.next()) |_| { + unreachable; + } + return null; } + + fn assignVirtualToPhysicalRegister(register_allocator: *RegisterAllocator, live_register: *LiveRegister, register: Register.Physical) void { + const virtual_register = live_register.virtual; + assert(live_register.physical == .no_register); + assert(register != .no_register); + live_register.physical = register; + register_allocator.register_states.set(register, .{ + .virtual = virtual_register, + }); + + print("Assigning V{} to {s}\n", .{ virtual_register.uniqueInteger(), @tagName(register) }); + // TODO: debug info + } + + fn displacePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const state = register_allocator.register_states.getPtr(physical_register); + print("Trying to displace register {s} with state {s}\n", .{ @tagName(physical_register), @tagName(state.*) }); + return switch (state.*) { + .free => false, + .preassigned => blk: { + state.* = .free; + break :blk true; + }, + .virtual => |virtual_register| blk: { + const live_reg = register_allocator.live_virtual_registers.getPtr(virtual_register).?; + const before = mir.getNextInstructionIndex(instruction_index); + try register_allocator.reload(mir, instruction_selection, before, virtual_register, physical_register); + live_reg.physical = .no_register; + live_reg.reloaded = true; + break :blk true; + }, + .livein => unreachable, + }; + } + + fn reload(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, before_index: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical) !void { + const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); + const register_class = mir.virtual_registers.get(virtual_register).register_class; + print("Frame index: {}\n", .{frame_index}); + + try instruction_selection.loadRegisterFromStackSlot(mir, before_index, physical_register, frame_index, register_class, virtual_register); + } + + fn getStackSpaceFor(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register: Register.Virtual.Index) !u32 { + if (register_allocator.stack_slots.get(virtual_register)) |frame_index| { + return frame_index; + } else { + const register_class = mir.virtual_registers.get(virtual_register).register_class; + const register_class_descriptor = register_class_descriptors.get(register_class); + assert(register_class_descriptor.spill_size > 0); + assert(register_class_descriptor.spill_alignment > 0); + const frame_index = try mir.createSpillStackObject(instruction_selection, register_class_descriptor.spill_size, register_class_descriptor.spill_alignment); + + try register_allocator.stack_slots.put(mir.allocator, virtual_register, frame_index); + return frame_index; + } + } + + fn freePhysicalRegister(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + const state = register_allocator.register_states.getPtr(physical_register); + switch (state.*) { + .free => unreachable, + .preassigned => state.* = .free, + .virtual => |virtual_register_index| { + const live_register = register_allocator.live_virtual_registers.getPtr(virtual_register_index).?; + assert(live_register.physical == physical_register); + register_allocator.register_states.set(physical_register, .free); + live_register.physical = .no_register; + }, + .livein => unreachable, + } + } + + fn markUsedRegisterInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + register_allocator.used_in_instruction.setPresent(physical_register, true); + } + + fn unmarkUsedRegisterInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + register_allocator.used_in_instruction.setPresent(physical_register, false); + } + + fn definePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const displaced_any = try register_allocator.displacePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + register_allocator.register_states.set(physical_register, .preassigned); + return displaced_any; + } + + fn defineVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, operand_index: Operand.Index, virtual_register: Register.Virtual.Index, look_at_physical_register_uses: bool) !bool { + const instruction = mir.instructions.get(instruction_index); + const gop = try register_allocator.live_virtual_registers.getOrPut(mir.allocator, virtual_register); + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .virtual = virtual_register, + }; + unreachable; + } + const live_register = gop.value_ptr; + if (live_register.physical == .no_register) { + try register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction, live_register, null, look_at_physical_register_uses); + } else { + assert(!register_allocator.isRegisterUsedInInstruction(live_register.physical, look_at_physical_register_uses)); + } + + const physical_register = live_register.physical; + assert(physical_register != .no_register); + if (live_register.reloaded or live_register.live_out) { + const instruction_descriptor = instruction_descriptors.get(instruction.id); + if (!instruction_descriptor.flags.implicit_def) { + const spill_before = mir.getNextInstructionIndex(instruction_index); + const kill = !live_register.last_use.valid; + try register_allocator.spill(mir, instruction_selection, spill_before, virtual_register, physical_register, kill, live_register.live_out); + + live_register.last_use = Instruction.Index.invalid; + } + + live_register.live_out = false; + live_register.reloaded = false; + } + + // bundle? + + register_allocator.markUsedRegisterInInstruction(physical_register); + return mir.setPhysicalRegister(instruction_selection, operand_index, physical_register); + } + + fn spill(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, spill_before: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical, kill: bool, live_out: bool) !void { + _ = live_out; + const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); + const register_class = mir.virtual_registers.get(virtual_register).register_class; + try instruction_selection.storeRegisterToStackSlot(mir, spill_before, physical_register, kill, frame_index, register_class, virtual_register); + // TODO: debug operands + } + + fn mayLiveIn(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) bool { + if (register_allocator.may_live_across_blocks.isSet(virtual_register_index.uniqueInteger())) { + unreachable; + } else { + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + + // TODO: setup iterator + var define_instructions = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, head_index_ptr.*); + while (define_instructions.next()) |_| { + unreachable; + } + + return false; + } + } + + fn mayLiveOut(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) bool { + if (register_allocator.may_live_across_blocks.isSet(virtual_register_index.uniqueInteger())) { + unreachable; + } else { + if (false) { + // TODO: FIXME if block loops + } + + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + var iterator = Instruction.Iterator.Get(.{ + .use = true, + .def = false, + .element = .instruction, + }).new(mir, head_index_ptr.*); + + const limit = 8; + var count: u32 = 0; + while (iterator.next()) |use_instruction| { + if (!use_instruction.parent.eq(instruction_selection.current_block)) { + register_allocator.may_live_across_blocks.set(virtual_register_index.uniqueInteger()); + // TODO: return !basic_block.successorsEmpty() + return false; + } + + count += 1; + if (count >= limit) { + register_allocator.may_live_across_blocks.set(virtual_register_index.uniqueInteger()); + // TODO: return !basic_block.successorsEmpty() + return false; + } + + // self loop def + if (false) { + unreachable; + } + } + + return false; + } + } + + fn reloadAtBegin(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, basic_block: BasicBlock.Index) !void { + _ = instruction_selection; + _ = mir; + _ = register_allocator; + _ = basic_block; + // if (register_allocator.live_virtual_registers.entries.len > 0) { + // // TODO: basic block liveins (regmasks?) + // + // const live_registers = register_allocator.live_virtual_registers.values(); + // print("Live register count: {}\n", .{live_registers.len}); + // + // for (live_registers) |live_register| { + // const physical_register = live_register.physical; + // if (physical_register == .no_register) { + // continue; + // } + // + // if (register_allocator.register_states.get(physical_register) == .livein) { + // unreachable; + // } + // + // // assert? + // + // const virtual_register = live_register.virtual; + // if (false) { + // unreachable; + // } else { + // try register_allocator.reload(mir, instruction_selection, 0, virtual_register, physical_register); + // } + // } + // unreachable; + // } + } + }; + + fn getRegisters(operand: *const Operand, register_buffer: []Register) []const Register { + var registers: []Register = register_buffer; + registers.len = 0; + switch (operand.u) { + .register => |register| { + registers.len += 1; + registers[registers.len - 1] = register; + }, + .lea64mem => |lea64mem| { + if (lea64mem.gp64) |register| { + registers.len += 1; + registers[registers.len - 1] = register; + } + + if (lea64mem.scale_reg) |register| { + registers.len += 1; + registers[registers.len - 1] = register; + } + }, + .memory, + .immediate, + .pc_relative, + => {}, + // else => |t| @panic(@tagName(t)), + } + + return registers; + } + + pub fn allocateRegisters(mir: *MIR) !void { + print("\n[REGISTER ALLOCATION]\n\n", .{}); + const function_count = mir.functions.len; + _ = function_count; + var function_iterator = mir.functions.iterator(); + _ = function_iterator; + const register_count = @typeInfo(Register.Physical).Enum.fields.len; + _ = register_count; + const register_unit_count = 173; + _ = register_unit_count; + + // for (0..function_count) |function_index| { + // const function = function_iterator.nextPointer().?; + // const instruction_selection = &mir.instruction_selections.items[function_index]; + // print("FN {s}\n", .{function.name}); + // + // var register_allocator = try RegisterAllocator.init(mir, instruction_selection); + // + // for (function.blocks.items) |block_index| { + // instruction_selection.current_block = block_index; + // register_allocator.coalesced.clearRetainingCapacity(); + // + // const block = mir.blocks.get(block_index); + // const instruction_count = block.instructions.items.len; + // var instruction_i = instruction_count; + // + // while (instruction_i > 0) { + // instruction_i -= 1; + // print("Instruction #{}\n", .{instruction_i}); + // + // register_allocator.used_in_instruction = RegisterBitset.initEmpty(); + // + // const instruction_index = block.instructions.items[instruction_i]; + // const instruction = mir.instructions.get(instruction_index); + // + // var register_define = false; + // var virtual_register_define = false; + // var early_clobber = false; + // var assign_live_throughs = false; + // var physical_register_use = false; + // var register_mask = false; + // + // for (instruction.operands.items) |operand_index| { + // const operand = mir.operands.get(operand_index); + // var register_buffer: [2]Register = undefined; + // const registers = getRegisters(operand, ®ister_buffer); + // + // for (registers) |register| switch (register.index) { + // .virtual => { + // switch (operand.flags.type) { + // .def => { + // register_define = true; + // virtual_register_define = true; + // // TODO early clobber, livethroughs + // if (operand.flags.early_clobber) { + // early_clobber = true; + // assign_live_throughs = true; + // } + // + // // TODO (tied and tied op undef) or (subreg and !undef) + // }, + // .use => {}, + // } + // }, + // .physical => |physical_register| { + // if (!register_allocator.reserved.contains(physical_register)) { + // switch (operand.flags.type) { + // .def => { + // register_define = true; + // const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + // if (operand.flags.early_clobber) { + // early_clobber = true; + // } + // if (!displaced_any) { + // operand.flags.dead_or_kill = true; + // } + // }, + // .use => {}, + // } + // + // if (operand.readsRegister()) { + // physical_register_use = true; + // } + // } + // }, + // }; + // } + // + // if (register_define) { + // if (virtual_register_define) { + // var rearranged_implicit_operands = true; + // if (assign_live_throughs) { + // unreachable; + // } else { + // while (rearranged_implicit_operands) { + // rearranged_implicit_operands = false; + // + // for (instruction.operands.items) |operand_index| { + // const operand = mir.operands.get(operand_index); + // switch (operand.u) { + // .register => |register| switch (operand.flags.type) { + // .def => switch (register.index) { + // .virtual => |virtual_register| { + // rearranged_implicit_operands = try register_allocator.defineVirtualRegister(mir, instruction_selection, instruction_index, operand_index, virtual_register, false); + // if (rearranged_implicit_operands) { + // break; + // } + // }, + // .physical => {}, + // }, + // else => {}, + // }, + // .lea64mem => |lea64mem| { + // assert(lea64mem.gp64 == null); + // assert(lea64mem.scale_reg == null); + // }, + // else => {}, + // } + // } + // } + // } + // } + // + // var operand_i = instruction.operands.items.len; + // while (operand_i > 0) { + // operand_i -= 1; + // const operand_index = instruction.operands.items[operand_i]; + // const operand = mir.operands.get(operand_index); + // var register_buffer: [2]Register = undefined; + // const registers = getRegisters(operand, ®ister_buffer); + // for (registers) |register| { + // switch (operand.flags.type) { + // .def => { + // if (operand.id == .lea64mem) unreachable; + // // TODO: missing checks + // switch (register.index) { + // .virtual => unreachable, + // .physical => |physical_register| switch (register_allocator.reserved.contains(physical_register)) { + // true => {}, + // false => { + // register_allocator.freePhysicalRegister(physical_register); + // register_allocator.unmarkUsedRegisterInInstruction(physical_register); + // }, + // }, + // } + // }, + // .use => {}, + // } + // } + // } + // } + // + // if (register_mask) { + // unreachable; + // } + // + // if (physical_register_use) { + // unreachable; + // } + // + // var undef_use = false; + // + // while (true) { + // var rearrange_implicit_operands = false; + // operand_loop: for (instruction.operands.items, 0..) |operand_index, operand_i| { + // const operand = mir.operands.get(operand_index); + // var register_buffer: [2]Register = undefined; + // const registers = getRegisters(operand, ®ister_buffer); + // + // for (registers) |register| { + // switch (operand.flags.type) { + // .use => switch (register.index) { + // .virtual => |virtual_register_index| switch (operand.flags.undef) { + // true => undef_use = true, + // false => { + // _ = register_allocator.mayLiveIn(mir, instruction_selection, virtual_register_index); + // assert(!operand.flags.internal_read); + // assert(operand.readsRegister()); + // + // if (try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register_index, @intCast(operand_i))) { + // break :operand_loop; + // } + // }, + // }, + // .physical => {}, + // }, + // .def => {}, + // } + // } + // } + // + // if (!rearrange_implicit_operands) break; + // } + // + // if (undef_use) { + // unreachable; + // } + // + // if (early_clobber) { + // unreachable; + // } + // + // if (instruction.id == .copy and instruction.operands.items.len == 2) { + // const dst_register = mir.operands.get(instruction.operands.items[0]).u.register; + // const src_register = mir.operands.get(instruction.operands.items[1]).u.register; + // + // if (std.meta.eql(dst_register, src_register)) { + // try register_allocator.coalesced.append(mir.allocator, instruction_index); + // } + // } + // } + // + // // TODO: + // // try register_allocator.reloadAtBegin(instruction_selection.current_block); + // + // // Remove coalesced instructions + // for (register_allocator.coalesced.items) |instruction_index| { + // _ = instruction_index; + // unreachable; + // } + // + // // TODO: fix debug values + // + // } + // + // print("After register allocation before clearing virtual registers:\n{}\n", .{function}); + // + // const clear_virtual_registers = true; + // if (clear_virtual_registers) { + // mir.clearVirtualRegisters(); + // } + // + // unreachable; + // } + + unreachable; + } + + fn clearVirtualRegisters(mir: *MIR) void { + var vr_it = mir.virtual_registers.iterator(); + var vr_index = vr_it.getCurrentIndex(); + while (vr_it.nextPointer()) |vr| { + if (!vr.use_def_list_head.valid) { + continue; + } + + mir.verifyUseList(vr.use_def_list_head, vr_index); + vr_index = vr_it.getCurrentIndex(); + } + } + + fn verifyUseList(mir: *MIR, start_operand_index: Operand.Index, register: Register.Virtual.Index) void { + var iterator = Instruction.Iterator.Get(.{ + .use = true, + .def = true, + .element = .operand, + }).new(mir, start_operand_index); + + while (iterator.next()) |operand| { + const instruction_index = operand.parent; + assert(instruction_index.valid); + const instruction = mir.instructions.get(instruction_index); + print("Verifying instruction #{}, operand #{}\n", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); + _ = instruction; + assert(operand.u == .register); + assert(operand.u.register.index == .virtual and operand.u.register.index.virtual.eq(register)); + } + } + + pub fn encode(mir: *MIR) !emit.Result { + _ = mir; + // unreachable; + return undefined; + } + + fn getRegisterListHead(mir: *MIR, instruction_selection: *InstructionSelection, register: Register) *Operand.Index { + return switch (register.index) { + .physical => |physical| instruction_selection.physical_register_use_or_definition_list.getPtr(physical), + .virtual => |virtual_register_index| &mir.virtual_registers.get(virtual_register_index).use_def_list_head, + }; + } + + const Function = struct { + blocks: ArrayList(BasicBlock.Index) = .{}, + instruction_selection: *InstructionSelection, + mir: *MIR, + name: []const u8, + + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + const function_name = function.name; + try writer.print("{s}:\n", .{function_name}); + for (function.blocks.items, 0..) |block_index, function_block_index| { + try writer.print("#{}: ({})\n", .{ function_block_index, block_index.uniqueInteger() }); + const block = function.mir.blocks.get(block_index); + for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { + try writer.print("%{} (${}): ", .{ block_instruction_index, instruction_index.uniqueInteger() }); + const instruction = function.mir.instructions.get(instruction_index).*; + try writer.print("{s}", .{@tagName(instruction.id)}); + for (instruction.operands.items, 0..) |operand_index, i| { + const operand = function.mir.operands.get(operand_index); + try writer.writeByte(' '); + switch (operand.u) { + .register => |register| { + switch (register.index) { + .physical => |physical| try writer.writeAll(@tagName(physical)), + .virtual => |virtual| try writer.print("VR{}", .{virtual.uniqueInteger()}), + } + }, + else => try writer.writeAll(@tagName(operand.u)), + } + // switch (operand.u) { + // .memory => + // else => |t| @panic(@tagName(t)), + // } + if (i < instruction.operands.items.len - 1) { + try writer.writeByte(','); + } + } + + try writer.writeByte('\n'); + } + + try writer.writeByte('\n'); + } + _ = options; + _ = fmt; + } + }; + + fn buildInstruction(mir: *MIR, instruction_selection: *InstructionSelection, instruction: Instruction.Id, operands: []const Operand) !Instruction.Index { + // Some sanity check + { + if (instruction != .copy) { + const descriptor = instruction_descriptors.getPtrConst(instruction); + if (descriptor.operands.len != operands.len) unreachable; + for (descriptor.operands, operands) |descriptor_operand, operand| { + switch (descriptor_operand.id) { + .unknown => {}, + else => if (descriptor_operand.id != operand.id) unreachable, + } + } + } + } + + var list = try ArrayList(Operand.Index).initCapacity(mir.allocator, operands.len); + const instruction_allocation = try mir.instructions.addOne(mir.allocator); + // TODO: MachineRegisterInfo::addRegOperandToUseList + for (operands) |operand_value| { + const operand_allocation = try mir.operands.append(mir.allocator, operand_value); + list.appendAssumeCapacity(operand_allocation.index); + const operand = operand_allocation.ptr; + const operand_index = operand_allocation.index; + operand_allocation.ptr.parent = instruction_allocation.index; + + switch (operand.u) { + .register => mir.addRegisterOperandFromUseList(instruction_selection, operand_index), + .lea64mem => |lea64mem| { + // TODO + assert(lea64mem.gp64 == null); + assert(lea64mem.scale_reg == null); + }, + .memory, + .immediate, + .pc_relative, + => {}, + } + } + instruction_allocation.ptr.* = .{ + .id = instruction, + .operands = list, + .parent = instruction_selection.current_block, + }; + + return instruction_allocation.index; + } + + fn append(mir: *MIR, instruction_selection: *InstructionSelection, id: Instruction.Id, operands: []const Operand) !void { + const instruction = try mir.buildInstruction(instruction_selection, id, operands); + const current_block = mir.blocks.get(instruction_selection.current_block); + try current_block.instructions.append(mir.allocator, instruction); + } + + fn createSpillStackObject(mir: *MIR, instruction_selection: *InstructionSelection, spill_size: u32, spill_alignment: u32) !u32 { + const frame_index = try mir.createStackObject(instruction_selection, spill_size, spill_alignment, ir.Instruction.Index.invalid, true); + return frame_index; + } + + fn createStackObject(mir: *MIR, instruction_selection: *InstructionSelection, size: u64, asked_alignment: u32, ir_instruction: ir.Instruction.Index, is_spill_slot: bool) !u32 { + const stack_realignable = false; + const alignment = clampStackAlignment(!stack_realignable, asked_alignment, 16); + const index: u32 = @intCast(instruction_selection.stack_objects.items.len); + try instruction_selection.stack_objects.append(mir.allocator, .{ + .size = size, + .alignment = alignment, + .spill_slot = is_spill_slot, + .ir = ir_instruction, + }); + return index; + } + + fn clampStackAlignment(clamp: bool, alignment: u32, stack_alignment: u32) u32 { + if (!clamp or alignment <= stack_alignment) return alignment; + return stack_alignment; } }; -const GPRegister = enum(u4) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, - r8 = 8, - r9 = 9, - r10 = 10, - r11 = 11, - r12 = 12, - r13 = 13, - r14 = 14, - r15 = 15, -}; +// const ModRm = packed struct(u8) { +// rm: u3, +// reg: u3, +// mod: u2, +// }; -const syscall_registers = [7]GPRegister{ .a, .di, .si, .d, .r10, .r8, .r9 }; +// const Rex = packed struct(u8) { +// b: bool, +// x: bool, +// r: bool, +// w: bool, +// fixed: u4 = 0b0100, +// +// fn create(args: struct { +// rm: ?GPRegister = null, +// reg: ?GPRegister = null, +// sib: bool = false, +// rm_size: ?Size = null, +// }) ?Rex { +// const rex_byte = Rex{ +// .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, +// .x = args.sib, +// .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, +// .w = if (args.rm_size) |rm_size| rm_size == .eight else false, +// }; +// +// if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { +// return rex_byte; +// } else { +// return null; +// } +// } +// }; + +fn getIrType(intermediate: *ir.Result, ir_instruction_index: ir.Instruction.Index) ir.Type { + const ir_instruction = intermediate.instructions.get(ir_instruction_index); + return switch (ir_instruction.*) { + .argument => |argument_index| intermediate.arguments.get(argument_index).type, + .stack => |stack_index| intermediate.stack_references.get(stack_index).type, + .load => |load_index| getIrType(intermediate, intermediate.loads.get(load_index).instruction), + .syscall => |_| .i64, + .load_integer => |integer| integer.type, + .load_string_literal => .i64, + .call => |call_index| intermediate.function_declarations.get(intermediate.calls.get(call_index).function).return_type, + .sign_extend => |cast_index| intermediate.casts.get(cast_index).type, + else => |t| @panic(@tagName(t)), + }; +} + +fn resolveType(ir_type: ir.Type) ValueType.Id { + return switch (ir_type) { + inline //.i8, + //.i16, + .i32, + .i64, + => |ir_type_ct| @field(ValueType.Id, @typeInfo(ir.Type).Enum.fields[@intFromEnum(ir_type_ct)].name), + .i8, .i16 => unreachable, + .void, + .noreturn, + => unreachable, + }; +} + +const RegisterSet = AutoArrayHashMap(Register.Physical, void); + +fn getSubregisters(allocator: Allocator, reg: Register.Physical) !RegisterSet { + var result = RegisterSet{}; + + try getSubregistersRecursive(allocator, &result, reg); + + return result; +} + +fn getSubregistersRecursive(allocator: Allocator, set: *RegisterSet, reg: Register.Physical) !void { + if (set.get(reg) == null) { + try set.putNoClobber(allocator, reg, {}); + const register_descriptor = register_descriptors.getPtrConst(reg); + for (register_descriptor.subregisters) |subreg| { + try getSubregistersRecursive(allocator, set, subreg); + } + } +} + +const LiveRegister = struct { + last_use: Instruction.Index = Instruction.Index.invalid, + virtual: Register.Virtual.Index, + physical: Register.Physical = Register.Physical.no_register, + live_out: bool = false, + reloaded: bool = false, +}; diff --git a/src/data_structures.zig b/src/data_structures.zig index 39b0df9..315c21d 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -56,29 +56,42 @@ pub fn BlockList(comptime T: type) type { assert(index.valid); return @as(u30, @truncate(@as(u32, @bitCast(index)))); } + + pub fn fromInteger(usize_index: usize) Index { + const index: u32 = @intCast(usize_index); + const block: u24 = @intCast(index / item_count); + const i: u6 = @intCast(index % item_count); + return .{ + .index = i, + .block = block, + }; + } }; pub const Iterator = struct { - block_index: u26, - element_index: u7, + block_index: u24, + element_index: u6, list: *const List, + pub fn getCurrentIndex(i: *const Iterator) Index { + return .{ + .block = i.block_index, + .index = @intCast(i.element_index), + }; + } + pub fn next(i: *Iterator) ?T { return if (i.nextPointer()) |ptr| ptr.* else null; } pub fn nextPointer(i: *Iterator) ?*T { - if (i.element_index >= item_count) { - i.block_index += 1; - i.element_index = 0; - } - - while (i.block_index < i.list.blocks.items.len) : (i.block_index += 1) { - while (i.element_index < item_count) : (i.element_index += 1) { - if (i.list.blocks.items[i.block_index].bitset.isSet(i.element_index)) { - const index = i.element_index; - i.element_index += 1; - return &i.list.blocks.items[i.block_index].items[index]; + for (i.block_index..i.list.blocks.items.len) |block_index| { + for (@as(u8, i.element_index)..item_count) |element_index| { + if (i.list.blocks.items[i.block_index].bitset.isSet(element_index)) { + i.element_index = @intCast(element_index); + i.element_index +%= 1; + i.block_index = @as(u24, @intCast(block_index)) + @intFromBool(i.element_index < element_index); + return &i.list.blocks.items[block_index].items[element_index]; } } } @@ -136,6 +149,7 @@ pub fn BlockList(comptime T: type) type { new_block.* = .{}; const index = new_block.allocateIndex() catch unreachable; const ptr = &new_block.items[index]; + list.first_block += @intFromBool(block_index != 0); break :blk Allocation{ .ptr = ptr, .index = .{ @@ -159,13 +173,11 @@ pub fn BlockList(comptime T: type) type { } } - pub fn indexOf(list: *List, elem: *T) Index { + pub fn indexOf(list: *const List, elem: *const T) Index { const address = @intFromPtr(elem); - std.debug.print("Items: {}. Block count: {}\n", .{ list.len, list.blocks.items.len }); for (list.blocks.items, 0..) |*block, block_index| { const base = @intFromPtr(&block.items[0]); const top = base + @sizeOf(T) * item_count; - std.debug.print("Bitset: {}. address: 0x{x}. Base: 0x{x}. Top: 0x{x}\n", .{ block.bitset, address, base, top }); if (address >= base and address < top) { return .{ .block = @intCast(block_index), @@ -199,3 +211,113 @@ pub fn enumFromString(comptime E: type, string: []const u8) ?E { } } else null; } + +pub fn StringKeyMap(comptime Value: type) type { + return struct { + list: std.MultiArrayList(Data) = .{}, + const Key = u32; + const Data = struct { + key: Key, + value: Value, + }; + + pub fn length(string_map: *@This()) usize { + return string_map.list.len; + } + + fn hash(string: []const u8) Key { + const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string)); + return string_key; + } + + pub fn getKey(string_map: *const @This(), string: []const u8) ?Key { + return if (string_map.getKeyPtr(string)) |key_ptr| key_ptr.* else null; + } + + pub fn getKeyPtr(string_map: *const @This(), string_key: Key) ?*const Key { + for (string_map.list.items(.key)) |*key_ptr| { + if (key_ptr.* == string_key) { + return key_ptr; + } + } else { + return null; + } + } + + pub fn getValue(string_map: *const @This(), key: Key) ?Value { + if (string_map.getKeyPtr(key)) |key_ptr| { + const index = string_map.indexOfKey(key_ptr); + return string_map.list.items(.value)[index]; + } else { + return null; + } + } + + pub fn indexOfKey(string_map: *const @This(), key_ptr: *const Key) usize { + return @divExact(@intFromPtr(key_ptr) - @intFromPtr(string_map.list.items(.key).ptr), @sizeOf(Key)); + } + + const GOP = struct { + key: Key, + found_existing: bool, + }; + + pub fn getOrPut(string_map: *@This(), allocator: Allocator, string: []const u8, value: Value) !GOP { + const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string)); + for (string_map.list.items(.key)) |key| { + if (key == string_key) return .{ + .key = string_key, + .found_existing = true, + }; + } else { + try string_map.list.append(allocator, .{ + .key = string_key, + .value = value, + }); + + return .{ + .key = string_key, + .found_existing = false, + }; + } + } + }; +} + +const page_size = std.mem.page_size; +extern fn pthread_jit_write_protect_np(enabled: bool) void; + +pub fn mmap(size: usize, flags: packed struct { + executable: bool = false, +}) ![]align(page_size) u8 { + return switch (@import("builtin").os.tag) { + .windows => blk: { + const windows = std.os.windows; + break :blk @as([*]align(page_size) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; + }, + .linux, .macos => |os_tag| blk: { + const jit = switch (os_tag) { + .macos => 0x800, + .linux => 0, + else => unreachable, + }; + const execute_flag: switch (os_tag) { + .linux => u32, + .macos => c_int, + else => unreachable, + } = if (flags.executable) std.os.PROT.EXEC else 0; + const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); + const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit; + + const result = try std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); + if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) { + if (flags.executable) { + pthread_jit_write_protect_np(false); + } + } + + break :blk result; + }, + else => @compileError("OS not supported"), + }; +} diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index fc63727..f1334ac 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -10,6 +10,7 @@ const ArrayList = data_structures.ArrayList; const enumFromString = data_structures.enumFromString; const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; const fs = @import("../fs.zig"); pub const Token = packed struct(u64) { @@ -35,6 +36,15 @@ pub const Token = packed struct(u64) { fixed_keyword_fn = 0x0e, fixed_keyword_unreachable = 0x0f, fixed_keyword_return = 0x10, + fixed_keyword_ssize = 0x11, + fixed_keyword_usize = 0x12, + fixed_keyword_switch = 0x13, + fixed_keyword_if = 0x14, + fixed_keyword_else = 0x15, + fixed_keyword_struct = 0x16, + fixed_keyword_enum = 0x17, + fixed_keyword_union = 0x18, + fixed_keyword_extern = 0x19, keyword_unsigned_integer = 0x1f, keyword_signed_integer = 0x20, bang = '!', // 0x21 @@ -86,6 +96,15 @@ pub const FixedKeyword = enum { @"fn", @"unreachable", @"return", + ssize, + usize, + @"switch", + @"if", + @"else", + @"struct", + @"enum", + @"union", + @"extern", }; pub const Result = struct { @@ -93,7 +112,8 @@ pub const Result = struct { time: u64, }; -pub fn analyze(allocator: Allocator, text: []const u8) !Result { +pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) !Result { + _ = file_index; const time_start = std.time.Instant.now() catch unreachable; var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); var index: usize = 0; @@ -138,7 +158,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), } else .identifier; }, - '(', ')', '{', '}', '-', '=', ';', '#', '@', ',', '.' => |operator| blk: { + '(', ')', '{', '}', '[', ']', '-', '=', ';', '#', '@', ',', '.', ':', '>', '<', '*', '!' => |operator| blk: { index += 1; break :blk @enumFromInt(operator); }, diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index ce75292..ce296d6 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -10,9 +10,12 @@ const Package = Compilation.Package; const ArgumentList = Compilation.ArgumentList; const Assignment = Compilation.Assignment; const Block = Compilation.Block; +const Call = Compilation.Call; const Declaration = Compilation.Declaration; +const Enum = Compilation.Enum; const Field = Compilation.Field; const Function = Compilation.Function; +const Intrinsic = Compilation.Intrinsic; const Loop = Compilation.Loop; const Scope = Compilation.Scope; const ScopeType = Compilation.ScopeType; @@ -39,20 +42,21 @@ const Analyzer = struct { module: *Module, current_file: File.Index, - fn getSourceFile(analyzer: *Analyzer, scope_index: Scope.Index) []const u8 { + fn getScopeSourceFile(analyzer: *Analyzer, scope_index: Scope.Index) []const u8 { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); return file.source_code; } - fn getNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) Node { + fn getScopeNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) Node { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); - const result = file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; - return result; + const result = &file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; + print("Fetching node #{} (0x{x}) from scope #{} from file #{} with id: {s}\n", .{ node_index.uniqueInteger(), @intFromPtr(result), scope_index.uniqueInteger(), scope.file.uniqueInteger(), @tagName(result.id) }); + return result.*; } - fn getToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) Token { + fn getScopeToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) Token { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); const result = file.lexical_analyzer_result.tokens.items[token_index]; @@ -60,14 +64,32 @@ const Analyzer = struct { return result; } - fn getNodeList(analyzer: *Analyzer, scope_index: Scope.Index, list_index: u32) ArrayList(Node.Index) { + fn getScopeNodeList(analyzer: *Analyzer, scope_index: Scope.Index, node: Node) ArrayList(Node.Index) { const scope = analyzer.module.scopes.get(scope_index); - const file = analyzer.module.files.get(scope.file); - return file.syntactic_analyzer_result.node_lists.items[list_index]; + return getFileNodeList(analyzer, scope.file, node); + } + + fn getFileNodeList(analyzer: *Analyzer, file_index: File.Index, node: Node) ArrayList(Node.Index) { + assert(node.id == .node_list); + const file = analyzer.module.files.get(file_index); + const list_index = node.left; + return file.syntactic_analyzer_result.node_lists.items[list_index.uniqueInteger()]; + } + + fn getFileToken(analyzer: *Analyzer, file_index: File.Index, token: Token.Index) Token { + const file = analyzer.module.files.get(file_index); + const result = file.lexical_analyzer_result.tokens.items[token]; + return result; + } + + fn getFileNode(analyzer: *Analyzer, file_index: File.Index, node_index: Node.Index) Node { + const file = analyzer.module.files.get(file_index); + const result = file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; + return result; } fn comptimeBlock(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value.Index { - const comptime_node = analyzer.getNode(scope_index, node_index); + const comptime_node = analyzer.getScopeNode(scope_index, node_index); const comptime_block = try analyzer.block(scope_index, .{ .none = {} }, comptime_node.left); const value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ @@ -94,8 +116,9 @@ const Analyzer = struct { } fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + print("Resolving block from scope #{} in file #{}\n", .{ scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger() }); var reaches_end = true; - const block_node = analyzer.getNode(scope_index, node_index); + const block_node = analyzer.getScopeNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; switch (block_node.id) { .block_one, .comptime_block_one => { @@ -106,7 +129,7 @@ const Analyzer = struct { try statement_nodes.append(analyzer.allocator, block_node.left); try statement_nodes.append(analyzer.allocator, block_node.right); }, - .block, .comptime_block => statement_nodes = analyzer.getNodeList(scope_index, block_node.left.unwrap()), + .block, .comptime_block => unreachable, //statement_nodes = analyzer.getNodeList(scope_index, block_node.left.unwrap()), else => |t| @panic(@tagName(t)), } @@ -124,105 +147,56 @@ const Analyzer = struct { unreachable; } - const statement_node = analyzer.getNode(scope_index, statement_node_index); + const statement_node = analyzer.getScopeNode(scope_index, statement_node_index); const statement_value = switch (statement_node.id) { - inline .assign, .simple_while => |statement_id| blk: { - const specific_value_index = switch (statement_id) { - .assign => { - print("Assign: #{}\n", .{node_index.value}); - assert(statement_node.id == .assign); - switch (statement_node.left.valid) { - // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` - false => { - const right_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); - right_value_allocation.ptr.* = .{ - .unresolved = .{ - .node_index = statement_node.right, - }, - }; - try analyzer.resolveNode(right_value_allocation.ptr, scope_index, ExpectType.none, statement_node.right); - // switch (right_value_allocation.ptr.*) { - // else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), - // } - try statements.append(analyzer.allocator, right_value_allocation.index); - continue; - }, - true => { - // const id = analyzer.tokenIdentifier(.token); - // print("id: {s}\n", .{id}); - // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); + .assign => (try analyzer.module.values.append(analyzer.allocator, try analyzer.processAssignment(scope_index, statement_node_index))).index, + .simple_while => blk: { + const loop_allocation = try analyzer.module.loops.append(analyzer.allocator, .{ + .condition = Value.Index.invalid, + .body = Value.Index.invalid, + .breaks = false, + }); + loop_allocation.ptr.condition = (try analyzer.unresolvedAllocate(scope_index, ExpectType.boolean, statement_node.left)).index; + loop_allocation.ptr.body = (try analyzer.unresolvedAllocate(scope_index, ExpectType.none, statement_node.right)).index; - // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { - // unreachable; - // } else { - // const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ - // .store = result.left, - // .load = result.right, - // }); - // return assignment_index; - // } - unreachable; - }, - } - }, - .simple_while => statement: { - const loop_allocation = try analyzer.module.loops.append(analyzer.allocator, .{ - .condition = Value.Index.invalid, - .body = Value.Index.invalid, - .breaks = false, - }); - loop_allocation.ptr.condition = (try analyzer.unresolvedAllocate(scope_index, ExpectType.boolean, statement_node.left)).index; - loop_allocation.ptr.body = (try analyzer.unresolvedAllocate(scope_index, ExpectType.none, statement_node.right)).index; + // TODO: bool true + reaches_end = loop_allocation.ptr.breaks or unreachable; - // TODO: bool true - reaches_end = loop_allocation.ptr.breaks or unreachable; - - break :statement loop_allocation.index; - }, - else => unreachable, - }; - const value = @unionInit(Value, switch (statement_id) { - .assign => "assign", - .simple_while => "loop", - else => unreachable, - }, specific_value_index); - const value_allocation = try analyzer.module.values.append(analyzer.allocator, value); + const value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .loop = loop_allocation.index, + }); break :blk value_allocation.index; }, .@"unreachable" => blk: { reaches_end = false; - break :blk Values.@"unreachable".getIndex(); + break :blk Compilation.Values.@"unreachable".getIndex(); + }, + .simple_symbol_declaration => blk: { + const declaration_index = try analyzer.symbolDeclaration(scope_index, statement_node_index, .local); + const declaration = analyzer.module.declarations.get(declaration_index); + const init_value = analyzer.module.values.get(declaration.init_value); + switch (init_value.isComptime() and declaration.mutability == .@"const") { + // Dont add comptime declaration statements + true => continue, + false => { + const statement_value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .declaration = declaration_index, + }); + break :blk statement_value_allocation.index; + }, + } }, - .simple_variable_declaration => (try analyzer.module.values.append(analyzer.allocator, .{ - .declaration = try analyzer.symbolDeclaration(scope_index, statement_node_index, .local), - })).index, .@"return" => blk: { reaches_end = false; - const return_expression: Value.Index = switch (statement_node_index.valid) { - // TODO: expect type - true => ret: { - const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); - return_value_allocation.ptr.* = .{ - .unresolved = .{ - .node_index = statement_node.left, - }, - }; - try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, statement_node.left); - break :ret return_value_allocation.index; - }, - false => @panic("TODO: ret void"), - }; - const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ - .value = return_expression, - }); + const return_value_allocation = try analyzer.module.values.append(analyzer.allocator, try analyzer.processReturn(scope_index, expect_type, statement_node_index)); - const return_expression_value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ - .@"return" = return_value_allocation.index, - }); - - break :blk return_expression_value_allocation.index; + break :blk return_value_allocation.index; }, + .call_two, .call => (try analyzer.module.values.append(analyzer.allocator, .{ + .call = try analyzer.processCall(scope_index, statement_node_index), + })).index, + .@"switch" => (try analyzer.module.values.append(analyzer.allocator, try analyzer.processSwitch(scope_index, statement_node_index))).index, else => |t| @panic(@tagName(t)), }; @@ -237,41 +211,428 @@ const Analyzer = struct { return block_allocation.index; } - fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { - const identifier_hash = try analyzer.identifierFromToken(node_scope_index, node_token); - const scope = analyzer.module.scopes.get(scope_index); - // TODO: search in upper scopes too - const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); - if (identifier_scope_lookup.found_existing) { - const declaration_index = identifier_scope_lookup.value_ptr.*; - const declaration = analyzer.module.declarations.get(declaration_index); - const init_value = analyzer.module.values.get(declaration.init_value); - print("Declaration found: {}\n", .{init_value}); - switch (init_value.*) { - .unresolved => |ur| try analyzer.resolveNode(init_value, scope_index, expect_type, ur.node_index), - else => {}, - } - if (init_value.isComptime() and declaration.mutability == .@"const") { - return declaration.init_value; - } else { - const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ - .declaration_reference = declaration_index, - }); - return ref_allocation.index; + fn processCall(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Call.Index { + const node = analyzer.getScopeNode(scope_index, node_index); + print("Node index: {}. Left index: {}\n", .{ node_index.uniqueInteger(), node.left.uniqueInteger() }); + assert(node.left.valid); + const left_value_index = switch (node.left.valid) { + true => blk: { + const member_or_namespace_node_index = node.left; + assert(member_or_namespace_node_index.valid); + const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, member_or_namespace_node_index); + break :blk this_value_allocation.index; + }, + false => unreachable, //Value.Index.invalid, + }; + + const left_type = switch (left_value_index.valid) { + true => switch (analyzer.module.values.get(left_value_index).*) { + .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.types.get(analyzer.module.functions.get(function_index).prototype).function).return_type, + else => |t| @panic(@tagName(t)), + }, + false => Type.Index.invalid, + }; + const arguments_index = switch (node.id) { + .call, .call_two => |call_tag| (try analyzer.module.argument_lists.append(analyzer.allocator, .{ + .array = b: { + const argument_list_node_index = node.right; + const call_argument_node_list = switch (call_tag) { + .call => analyzer.getScopeNodeList(scope_index, analyzer.getScopeNode(scope_index, argument_list_node_index)).items, + .call_two => &.{argument_list_node_index}, + else => unreachable, + }; + + switch (analyzer.module.values.get(left_value_index).*) { + .function => |function_index| { + const function = analyzer.module.functions.get(function_index); + const function_prototype = analyzer.module.function_prototypes.get(analyzer.module.types.get(function.prototype).function); + const argument_declarations = function_prototype.arguments.?; + print("Argument declaration count: {}. Argument node list count: {}\n", .{ argument_declarations.len, call_argument_node_list.len }); + var argument_array = ArrayList(Value.Index){}; + if (argument_declarations.len == call_argument_node_list.len) { + for (argument_declarations, call_argument_node_list) |argument_declaration_index, argument_node_index| { + const argument_declaration = analyzer.module.declarations.get(argument_declaration_index); + // const argument_declaration_type = analyzer.module.types.get(argument_declaration.type); + // assert(argument_declaration.type.valid); + const call_argument_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType{ + .type_index = argument_declaration.type, + }, argument_node_index); + try call_argument_allocation.ptr.typeCheck(analyzer.module, argument_declaration.type); + // const call_argument_type_index = call_argument_allocation.ptr.getType(analyzer.module); + // const call_argument_type = analyzer.module.types.get(call_argument_type_index); + // if (call_argument_type_index != argument_declaration.type) { + // if (std.meta.activeTag(call_argument_type.*) == std.meta.activeTag(argument_declaration_type.*)) { + // if (!call_argument_type.equalTypeCanCoerce(argument_declaration_type)) { + // unreachable; + // } + // } else { + // try call_argument_type.promote(argument_declaration_type); + // call_argument_allocation.ptr.setType(argument_declaration.type); + // } + // } + + try argument_array.append(analyzer.allocator, call_argument_allocation.index); + } + + break :b argument_array; + } else { + std.debug.panic("Function call has argument count mismatch: call has {}, function declaration has {}\n", .{ call_argument_node_list.len, argument_declarations.len }); + } + }, + else => |t| @panic(@tagName(t)), + } + }, + })).index, + .call_one => ArgumentList.Index.invalid, + else => |t| @panic(@tagName(t)), + }; + const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ + .value = left_value_index, + .arguments = arguments_index, + + .type = left_type, + }); + + return call_allocation.index; + } + + fn typeCheckEnumLiteral(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index, enum_type: *const Enum) !?u32 { + const enum_name = tokenBytes(analyzer.getScopeToken(scope_index, token_index), analyzer.getScopeSourceFile(scope_index)); + const enum_name_hash = try analyzer.processIdentifier(enum_name); + + for (enum_type.fields.items) |enum_field_index| { + const enum_field = analyzer.module.enum_fields.get(enum_field_index); + const existing = analyzer.module.getName(enum_field.name).?; + if (enum_field.name == enum_name_hash) { + return enum_name_hash; } + print("Existing \"{s}\" != current \"{s}\"\n", .{ existing, enum_name }); } else { - std.debug.panic("Identifier not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getToken(scope_index, node_token), analyzer.getSourceFile(scope_index)) }); + return null; + } + } + + fn processSwitch(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + assert(node.id == .@"switch"); + + analyzer.debugNode(scope_index, node_index); + + const switch_expr = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + const switch_case_list_node = analyzer.getScopeNode(scope_index, node.right); + const switch_case_node_list = switch (switch_case_list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, switch_case_list_node).items, + else => |t| @panic(@tagName(t)), + }; + + switch (switch_expr.ptr.*) { + .enum_field => |e_field_index| { + const e_field = analyzer.module.enum_fields.get(e_field_index); + const enum_type = analyzer.module.enums.get(e_field.parent); + const enum_field_name = analyzer.module.getName(e_field.name); + _ = enum_field_name; + + var else_case_index: ?usize = null; + _ = else_case_index; + var existing_enums = ArrayList(u32){}; + var switch_case_groups = try ArrayList(ArrayList(u32)).initCapacity(analyzer.allocator, switch_case_node_list.len); + + for (switch_case_node_list, 0..) |switch_case_node_index, index| { + _ = index; + const switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_index); + switch (switch_case_node.left.valid) { + true => { + const switch_case_condition_node = analyzer.getScopeNode(scope_index, switch_case_node.left); + var switch_case_group = ArrayList(u32){}; + switch (switch_case_condition_node.id) { + .enum_literal => { + if (try typeCheckEnumLiteral(analyzer, scope_index, switch_case_condition_node.token + 1, enum_type)) |enum_name_hash| { + for (existing_enums.items) |existing| { + if (enum_name_hash == existing) { + // Duplicate case + unreachable; + } + } + + try switch_case_group.append(analyzer.allocator, enum_name_hash); + try existing_enums.append(analyzer.allocator, enum_name_hash); + } else { + unreachable; + } + }, + .node_list => { + const node_list = analyzer.getScopeNodeList(scope_index, switch_case_condition_node); + try switch_case_group.ensureTotalCapacity(analyzer.allocator, node_list.items.len); + for (node_list.items) |case_condition_node_index| { + const case_condition_node = analyzer.getScopeNode(scope_index, case_condition_node_index); + switch (case_condition_node.id) { + .enum_literal => { + if (try typeCheckEnumLiteral(analyzer, scope_index, case_condition_node.token + 1, enum_type)) |enum_name_hash| { + for (existing_enums.items) |existing| { + if (enum_name_hash == existing) { + // Duplicate case + unreachable; + } + } + + try existing_enums.append(analyzer.allocator, enum_name_hash); + switch_case_group.appendAssumeCapacity(enum_name_hash); + } else { + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + } + } + }, + else => |t| @panic(@tagName(t)), + } + + switch_case_groups.appendAssumeCapacity(switch_case_group); + }, + false => { + unreachable; + // if (existing_enums.items.len == enum_type.fields.items.len) { + // unreachable; + // } + // + // else_case_index = index; + }, + } + } + + const group_index = for (switch_case_groups.items, 0..) |switch_case_group, switch_case_group_index| { + break for (switch_case_group.items) |case_name| { + if (e_field.name == case_name) { + break switch_case_group_index; + } + } else continue; + } else { + unreachable; + }; + + print("Index: {}\n", .{group_index}); + + const true_switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_list[group_index]); + var result = Value{ + .unresolved = .{ + .node_index = true_switch_case_node.right, + }, + }; + + try analyzer.resolveNode(&result, scope_index, ExpectType.none, true_switch_case_node.right); + + return result; + }, + else => |t| @panic(@tagName(t)), + } + + unreachable; + } + + fn processAssignment(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + assert(node.id == .assign); + const assignment = switch (node.left.valid) { + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + false => { + var result = Value{ + .unresolved = .{ + .node_index = node.right, + }, + }; + + try analyzer.resolveNode(&result, scope_index, ExpectType.none, node.right); + + return result; + }, + true => { + // const id = analyzer.tokenIdentifier(.token); + // print("id: {s}\n", .{id}); + // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); + + // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { + // unreachable; + // } else { + // const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ + // .store = result.left, + // .load = result.right, + // }); + // return assignment_index; + // } + unreachable; + }, + }; + _ = assignment; + + unreachable; + } + + fn processReturn(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + const return_expression: Value.Index = switch (node_index.valid) { + // TODO: expect type + true => ret: { + const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); + return_value_allocation.ptr.* = .{ + .unresolved = .{ + .node_index = node.left, + }, + }; + try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, node.left); + break :ret return_value_allocation.index; + }, + false => @panic("TODO: ret void"), + }; + + const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ + .value = return_expression, + }); + + return .{ + .@"return" = return_value_allocation.index, + }; + } + + const DeclarationLookup = struct { + declaration: Declaration.Index, + scope: Scope.Index, + }; + + fn lookupDeclarationInCurrentAndParentScopes(analyzer: *Analyzer, scope_index: Scope.Index, identifier_hash: u32) ?DeclarationLookup { + var scope_iterator = scope_index; + while (scope_iterator.valid) { + const scope = analyzer.module.scopes.get(scope_iterator); + if (scope.declarations.get(identifier_hash)) |declaration_index| { + return .{ + .declaration = declaration_index, + .scope = scope_iterator, + }; + } + + scope_iterator = scope.parent; + } + + return null; + } + + fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { + const identifier = analyzer.tokenIdentifier(node_scope_index, node_token); + print("Referencing identifier: \"{s}\"\n", .{identifier}); + const identifier_hash = try analyzer.processIdentifier(identifier); + + if (equal(u8, identifier, "print")) { + print("WTF\n", .{}); + } + + if (equal(u8, identifier, "windows")) { + print("WTF\n", .{}); + unreachable; + } + + if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_hash)) |lookup| { + const declaration_index = lookup.declaration; + const declaration = analyzer.module.declarations.get(declaration_index); + + // Up until now, only arguments have no initialization value + const typecheck_result = switch (declaration.init_value.valid) { + true => blk: { + const init_value = analyzer.module.values.get(declaration.init_value); + print("Declaration found: {}\n", .{init_value}); + const is_unresolved = init_value.* == .unresolved; + switch (is_unresolved) { + true => { + try analyzer.resolveNode(init_value, lookup.scope, expect_type, init_value.unresolved.node_index); + declaration.type = init_value.getType(analyzer.module); + switch (init_value.*) { + .function => |function_index| { + try analyzer.module.function_name_map.put(analyzer.allocator, function_index, declaration.name); + }, + else => {}, + } + }, + false => {}, + } + + print("Declaration resolved as: {}\n", .{init_value}); + print("Declaration mutability: {s}. Is comptime: {}\n", .{ @tagName(declaration.mutability), init_value.isComptime() }); + + const typecheck_result = try analyzer.typeCheck(expect_type, declaration.type); + + if (init_value.isComptime() and declaration.mutability == .@"const") { + assert(declaration.init_value.valid); + assert(typecheck_result == .success); + return declaration.init_value; + } + + break :blk typecheck_result; + }, + false => try analyzer.typeCheck(expect_type, declaration.type), + }; + + const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .declaration_reference = .{ + .value = declaration_index, + .type = switch (expect_type) { + .none => declaration.type, + .type_index => switch (typecheck_result) { + .success => expect_type.type_index, + else => declaration.type, + }, + .flexible_integer => blk: { + assert(declaration.type.valid); + break :blk declaration.type; + }, + }, + }, + }); + + return switch (typecheck_result) { + .success => ref_allocation.index, + inline .zero_extend, .sign_extend => |extend| blk: { + const cast_allocation = try analyzer.module.casts.append(analyzer.allocator, .{ + .value = ref_allocation.index, + .type = switch (expect_type) { + .flexible_integer => |flexible_integer| t: { + const cast_type = Type.Integer.getIndex(.{ + .signedness = switch (extend) { + .zero_extend => .unsigned, + .sign_extend => .signed, + else => unreachable, + }, + .bit_count = flexible_integer.byte_count << 3, + }); + break :t cast_type; + }, + else => |t| @panic(@tagName(t)), + }, + }); + const value_allocation = try analyzer.module.values.append(analyzer.allocator, @unionInit(Value, @tagName(extend), cast_allocation.index)); + break :blk value_allocation.index; + }, + }; + } else { + const scope = analyzer.module.scopes.get(scope_index); + std.debug.panic("Identifier \"{s}\" not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ identifier, scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getScopeToken(scope_index, node_token), analyzer.getScopeSourceFile(scope_index)) }); } } fn getArguments(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !ArrayList(Node.Index) { var arguments = ArrayList(Node.Index){}; - const node = analyzer.getNode(scope_index, node_index); + const node = analyzer.getScopeNode(scope_index, node_index); switch (node.id) { .compiler_intrinsic_two => { try arguments.append(analyzer.allocator, node.left); try arguments.append(analyzer.allocator, node.right); }, + .compiler_intrinsic => { + const argument_list_node_index = node.left; + assert(argument_list_node_index.valid); + const node_list_node = analyzer.getScopeNode(scope_index, argument_list_node_index); + const node_list = analyzer.getScopeNodeList(scope_index, node_list_node); + + return node_list; + }, else => |t| @panic(@tagName(t)), } @@ -279,8 +640,8 @@ const Analyzer = struct { } fn resolveNode(analyzer: *Analyzer, value: *Value, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!void { - const node = analyzer.getNode(scope_index, node_index); - print("Resolving node #{}: {}\n", .{ node_index.uniqueInteger(), node }); + const node = analyzer.getScopeNode(scope_index, node_index); + print("Resolving node #{} in scope #{} from file #{}: {}\n", .{ node_index.uniqueInteger(), scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger(), node }); assert(value.* == .unresolved); @@ -294,7 +655,7 @@ const Analyzer = struct { switch (expect_type) { .none => {}, .type_index => |expected_type| { - if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) { + if (@as(u32, @bitCast(Type.boolean)) != @as(u32, @bitCast(expected_type))) { @panic("TODO: compile error"); } }, @@ -306,29 +667,38 @@ const Analyzer = struct { // break :blk Values.getIndex(.bool_true); }, - .compiler_intrinsic_one, .compiler_intrinsic_two => blk: { + .compiler_intrinsic_one, .compiler_intrinsic_two, .compiler_intrinsic => blk: { const intrinsic_name = analyzer.tokenIdentifier(scope_index, node.token + 1); - const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; - print("Intrinsic: {s}\n", .{@tagName(intrinsic)}); + print("Intrinsic: {s}\n", .{intrinsic_name}); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse std.debug.panic("Unknown intrinsic: {s}\n", .{intrinsic_name}); switch (intrinsic) { .import => { assert(node.id == .compiler_intrinsic_one); - const import_argument = analyzer.getNode(scope_index, node.left); + const import_argument = analyzer.getScopeNode(scope_index, node.left); switch (import_argument.id) { .string_literal => { const import_name = analyzer.tokenStringLiteral(scope_index, import_argument.token); const import_file = try analyzer.module.importFile(analyzer.allocator, analyzer.current_file, import_name); + print("Importing \"{s}\"...\n", .{import_name}); - if (import_file.file.is_new) { - // TODO: fix error - try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, import_file.file.ptr); - } else { - unreachable; - } - - break :blk .{ - .type = try analyzeFile(value, analyzer.allocator, analyzer.module, import_file.file.ptr, import_file.file.index), + const result = .{ + .type = switch (import_file.file.is_new) { + true => true_block: { + const new_file_index = import_file.file.index; + try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, new_file_index); + const analyze_result = try analyzeFile(value, analyzer.allocator, analyzer.module, new_file_index); + print("Done analyzing {s}!\n", .{import_name}); + break :true_block analyze_result; + }, + false => false_block: { + const file_type = import_file.file.ptr.type; + assert(file_type.valid); + break :false_block file_type; + }, + }, }; + + break :blk result; }, else => unreachable, } @@ -337,16 +707,17 @@ const Analyzer = struct { var argument_nodes = try analyzer.getArguments(scope_index, node_index); print("Argument count: {}\n", .{argument_nodes.items.len}); if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { - const number_allocation = try analyzer.unresolvedAllocate(scope_index, .{ + const argument_expect_type = .{ .flexible_integer = .{ .byte_count = 8, }, - }, argument_nodes.items[0]); + }; + const number_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_nodes.items[0]); const number = number_allocation.index; assert(number.valid); var arguments = std.mem.zeroes([6]Value.Index); for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { - const argument_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_node_index); + const argument_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_node_index); arguments[argument_index] = argument_allocation.index; } @@ -366,24 +737,65 @@ const Analyzer = struct { unreachable; } }, + .@"error" => { + assert(node.id == .compiler_intrinsic_one); + const message_node = analyzer.getScopeNode(scope_index, node.left); + switch (message_node.id) { + .string_literal => std.debug.panic("error: {s}", .{analyzer.tokenStringLiteral(scope_index, message_node.token)}), + else => |t| @panic(@tagName(t)), + } + unreachable; + }, } unreachable; }, .function_definition => blk: { - const function_prototype_index = try analyzer.functionPrototype(scope_index, node.left); + const function_scope_allocation = try analyzer.allocateScope(.{ + .parent = scope_index, + .file = analyzer.module.scopes.get(scope_index).file, + }); - const function_body = try analyzer.block(scope_index, .{ + const function_prototype_index = try analyzer.functionPrototype(function_scope_allocation.index, node.left); + + const function_body = try analyzer.block(function_scope_allocation.index, .{ .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), }, node.right); - const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ - .prototype = function_prototype_index, - .body = function_body, + const prototype_type = try analyzer.module.types.append(analyzer.allocator, .{ + .function = function_prototype_index, }); + + const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = prototype_type.index, + .body = function_body, + .scope = function_scope_allocation.index, + }); + break :blk .{ .function = function_allocation.index, }; }, + .function_prototype => blk: { + const function_prototype_index = try analyzer.functionPrototype(scope_index, node_index); + const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); + + break :blk switch (function_prototype.attributes.@"extern") { + true => b: { + const prototype_type = try analyzer.module.types.append(analyzer.allocator, .{ + .function = function_prototype_index, + }); + const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = prototype_type.index, + .body = Block.Index.invalid, + .scope = Scope.Index.invalid, + }); + break :b .{ + .function = function_allocation.index, + }; + }, + false => unreachable, + }; + }, .simple_while => unreachable, .block_zero, .block_one => blk: { const block_index = try analyzer.block(scope_index, expect_type, node_index); @@ -392,126 +804,320 @@ const Analyzer = struct { }; }, .number_literal => switch (std.zig.parseNumberLiteral(analyzer.numberBytes(scope_index, node.token))) { - .int => |integer| blk: { - assert(expect_type != .none); - const int_type = switch (expect_type) { - .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ - .bit_count = flexible_integer_type.byte_count << 3, - .signedness = .unsigned, + .int => |integer| .{ + .integer = .{ + .value = integer, + .type = switch (expect_type) { + .none => Type.comptime_int, + .flexible_integer, .type_index => Type.Integer.getIndex(switch (expect_type) { + .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ + .bit_count = flexible_integer_type.byte_count << 3, + .signedness = .unsigned, + }, + .type_index => |type_index| a: { + const type_info = analyzer.module.types.get(type_index); + break :a switch (type_info.*) { + .integer => |int| int, + else => |t| @panic(@tagName(t)), + }; + }, + else => unreachable, + }), }, - .type_index => |type_index| a: { - const type_info = analyzer.module.types.get(type_index); - break :a switch (type_info.*) { - .integer => |int| int, - else => |t| @panic(@tagName(t)), - }; - }, - else => |t| @panic(@tagName(t)), - }; - break :blk .{ - .integer = .{ - .value = integer, - .type = int_type, - }, - }; + .signedness = .unsigned, + }, }, else => |t| @panic(@tagName(t)), }, - .call_one => blk: { - const this_value_node_index = node.left; - const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, this_value_node_index); - const value_type = switch (this_value_allocation.ptr.*) { - .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.functions.get(function_index).prototype).return_type, - else => |t| @panic(@tagName(t)), - }; - - const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ - .value = this_value_allocation.index, - .arguments = ArgumentList.Index.invalid, - .type = value_type, - }); - break :blk .{ - .call = call_allocation.index, - }; + .call, .call_one, .call_two => .{ + .call = try analyzer.processCall(scope_index, node_index), }, .field_access => blk: { - const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + print("left alocation...\n", .{}); const identifier = analyzer.tokenIdentifier(scope_index, node.right.value); - _ = identifier; + print("Field access identifier for RHS: \"{s}\"\n", .{identifier}); + analyzer.debugNode(scope_index, node_index); + const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); switch (left_allocation.ptr.*) { .type => |type_index| { - const left_type = analyzer.module.types.get(type_index); - switch (left_type.*) { - .@"struct" => |struct_index| { - const struct_type = analyzer.module.structs.get(struct_index); - const right_index = try analyzer.doIdentifier(struct_type.scope, ExpectType.none, node.right.value, scope_index); - const right_value = analyzer.module.values.get(right_index); - switch (right_value.*) { - .function => break :blk right_value.*, - else => unreachable, - } - print("Right: {}\n", .{right_value}); - // struct_scope.declarations.get(identifier); + if (type_index.valid) { + const left_type = analyzer.module.types.get(type_index); + switch (left_type.*) { + .@"struct" => |struct_index| { + const struct_type = analyzer.module.structs.get(struct_index); + const right_index = try analyzer.doIdentifier(struct_type.scope, ExpectType.none, node.right.value, scope_index); + const right_value = analyzer.module.values.get(right_index); + switch (right_value.*) { + .function, .type, .enum_field => break :blk right_value.*, + .declaration_reference => |declaration_reference| { + const declaration = analyzer.module.declarations.get(declaration_reference.value); + const declaration_name = analyzer.module.getName(declaration.name).?; + print("Decl ref: {s}\n", .{declaration_name}); + print("TODO: maybe this should not be runtime", .{}); + unreachable; + }, + else => |t| @panic(@tagName(t)), + } + print("Right: {}\n", .{right_value}); + // struct_scope.declarations.get(identifier); - unreachable; + unreachable; + }, + .@"enum" => |enum_index| { + const enum_type = analyzer.module.enums.get(enum_index); + const identifier_hash = try analyzer.processIdentifier(identifier); + + const result = for (enum_type.fields.items) |enum_field_index| { + const enum_field = analyzer.module.enum_fields.get(enum_field_index); + if (enum_field.name == identifier_hash) { + break enum_field_index; + } + } else { + @panic("No enum found"); + }; + const enum_field = analyzer.module.enum_fields.get(result); + const enum_field_name = analyzer.module.getName(enum_field.name).?; + print("Enum field name resolution: {s}\n", .{enum_field_name}); + break :blk .{ + .enum_field = result, + }; + }, + else => |t| @panic(@tagName(t)), + } + unreachable; + } else { + std.debug.panic("Identifier \"{s}\" not found. Type empty", .{identifier}); + } + }, + .declaration_reference => |declaration_reference| { + switch (left_allocation.ptr.*) { + .declaration_reference => |reference| { + const declaration = analyzer.module.declarations.get(reference.value); + const declaration_type_index = declaration.type; + const declaration_type = analyzer.module.types.get(declaration_type_index); + switch (declaration_type.*) { + .slice => unreachable, + else => |t| @panic(@tagName(t)), + } }, else => |t| @panic(@tagName(t)), } + _ = declaration_reference; unreachable; }, else => |t| @panic(@tagName(t)), } unreachable; }, + .string_literal => .{ + .string_literal = try analyzer.processStringLiteral(scope_index, node_index), + }, + .@"switch" => try analyzer.processSwitch(scope_index, node_index), + .enum_type => blk: { + const list_node = analyzer.getScopeNode(scope_index, node.left); + const field_node_list = switch (list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, list_node), + else => |t| @panic(@tagName(t)), + }; + + var field_list = try ArrayList(Enum.Field.Index).initCapacity(analyzer.allocator, field_node_list.items.len); + const enum_allocation = try analyzer.module.enums.addOne(analyzer.allocator); + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .@"enum" = enum_allocation.index, + }); + + for (field_node_list.items) |field_node_index| { + const field_node = analyzer.getScopeNode(scope_index, field_node_index); + const identifier = analyzer.tokenIdentifier(scope_index, field_node.token); + print("Enum field: {s}\n", .{identifier}); + assert(!field_node.left.valid); + + const enum_hash_name = try analyzer.processIdentifier(identifier); + + const enum_field_allocation = try analyzer.module.enum_fields.append(analyzer.allocator, .{ + .name = enum_hash_name, + .value = Value.Index.invalid, + .parent = enum_allocation.index, + }); + + field_list.appendAssumeCapacity(enum_field_allocation.index); + } + + enum_allocation.ptr.* = .{ + .scope = Scope.Index.invalid, + .fields = field_list, + .type = type_allocation.index, + }; + + break :blk .{ + .type = type_allocation.index, + }; + }, + .assign => try analyzer.processAssignment(scope_index, node_index), + .signed_integer_type, .unsigned_integer_type => .{ + .type = try analyzer.resolveType(scope_index, node_index), + }, + .@"return" => try analyzer.processReturn(scope_index, expect_type, node_index), else => |t| @panic(@tagName(t)), }; } + fn debugNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) void { + const node = analyzer.getScopeNode(scope_index, node_index); + const source_file = analyzer.getScopeSourceFile(scope_index); + const token = analyzer.getScopeToken(scope_index, node.token); + print("Debugging node {s}:\n\n```\n{s}\n```\n", .{ @tagName(node.id), source_file[token.start..] }); + } + + fn processStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !u32 { + const string_literal_node = analyzer.getScopeNode(scope_index, node_index); + assert(string_literal_node.id == .string_literal); + const string_literal = analyzer.tokenStringLiteral(scope_index, string_literal_node.token); + const string_key = try analyzer.module.addStringLiteral(analyzer.allocator, string_literal); + return string_key; + } + fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index { const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); return function_prototype.return_type; } - fn functionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Function.Prototype.Index { - const node = analyzer.getNode(scope_index, node_index); - switch (node.id) { - .simple_function_prototype => { - const arguments: ?[]const Field.Index = blk: { - if (node.left.get() == null) break :blk null; - const argument_node = analyzer.getNode(scope_index, node.left); - switch (argument_node.id) { - else => |t| @panic(@tagName(t)), - } + fn resolveType(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Type.Index { + const type_node = analyzer.getScopeNode(scope_index, node_index); + const type_index: Type.Index = switch (type_node.id) { + .identifier => blk: { + const token = analyzer.getScopeToken(scope_index, type_node.token); + const source_file = analyzer.getScopeSourceFile(scope_index); + const identifier = tokenBytes(token, source_file); + print("Identifier: \"{s}\"\n", .{identifier}); + const resolved_value_index = try analyzer.doIdentifier(scope_index, ExpectType.type, type_node.token, scope_index); + const resolved_value = analyzer.module.values.get(resolved_value_index); + break :blk switch (resolved_value.*) { + .type => |type_index| type_index, + else => |t| @panic(@tagName(t)), }; - const return_type_node = analyzer.getNode(scope_index, node.right); - const return_type: Type.Index = switch (return_type_node.id) { - .identifier => { - unreachable; + }, + .keyword_noreturn => Type.noreturn, + inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { + const bit_count: u16 = @intCast(type_node.left.value); + print("Bit count: {}\n", .{bit_count}); + break :blk switch (bit_count) { + inline 8, 16, 32, 64 => |hardware_bit_count| Type.Integer.getIndex(.{ + .bit_count = hardware_bit_count, + .signedness = switch (int_type_signedness) { + .signed_integer_type => .signed, + .unsigned_integer_type => .unsigned, + else => @compileError("OOO"), + }, + }), + else => unreachable, + }; + }, + .many_pointer_type => blk: { + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .pointer = .{ + .element_type = try resolveType(analyzer, scope_index, type_node.left), + .many = true, + .@"const" = switch (analyzer.getScopeToken(scope_index, type_node.token + 3).id) { + .fixed_keyword_const => true, + .fixed_keyword_var => false, + else => |t| @panic(@tagName(t)), + }, }, - .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, - inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { - const bit_count: u16 = @intCast(return_type_node.left.value); - print("Bit count: {}\n", .{bit_count}); - break :blk switch (bit_count) { - inline 8, 16, 32, 64 => |hardware_bit_count| Type.Index{ - .block = 0, - .index = @ctz(hardware_bit_count) - @ctz(@as(u8, 8)) + switch (int_type_signedness) { - .signed_integer_type => HardwareSignedIntegerType, - .unsigned_integer_type => HardwareUnsignedIntegerType, - else => unreachable, - }.offset, - }, - else => unreachable, - }; + }); + break :blk type_allocation.index; + }, + .slice_type => blk: { + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .slice = .{ + .element_type = try resolveType(analyzer, scope_index, type_node.right), }, + }); + break :blk type_allocation.index; + }, + .void_type => Type.void, + .ssize_type => Type.ssize, + .usize_type => Type.usize, + else => |t| @panic(@tagName(t)), + }; + return type_index; + } + + fn processSimpleFunctionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, simple_function_prototype_node_index: Node.Index) !Function.Prototype { + const simple_function_prototype_node = analyzer.getScopeNode(scope_index, simple_function_prototype_node_index); + assert(simple_function_prototype_node.id == .simple_function_prototype); + const arguments_node_index = simple_function_prototype_node.left; + const return_type_node_index = simple_function_prototype_node.right; + + const arguments: ?[]const Declaration.Index = switch (arguments_node_index.valid) { + false => null, + true => blk: { + const argument_list_node = analyzer.getScopeNode(scope_index, arguments_node_index); + // print("Function prototype argument list node: {}\n", .{function_prototype_node.left.uniqueInteger()}); + const argument_node_list = switch (argument_list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, argument_list_node), else => |t| @panic(@tagName(t)), }; - const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, .{ - .arguments = arguments, - .return_type = return_type, - }); + assert(argument_node_list.items.len > 0); + if (argument_node_list.items.len > 0) { + var arguments = try ArrayList(Declaration.Index).initCapacity(analyzer.allocator, argument_node_list.items.len); + const scope = analyzer.module.scopes.get(scope_index); + _ = scope; + for (argument_node_list.items, 0..) |argument_node_index, index| { + const argument_node = analyzer.getScopeNode(scope_index, argument_node_index); + switch (argument_node.id) { + .argument_declaration => { + const argument_type = try analyzer.resolveType(scope_index, argument_node.left); + const argument_declaration = try analyzer.declarationCommon(scope_index, .local, .@"const", argument_node.token, argument_type, Value.Index.invalid, @intCast(index)); + arguments.appendAssumeCapacity(argument_declaration); + }, + else => |t| @panic(@tagName(t)), + } + } + + break :blk arguments.items; + } else { + break :blk null; + } + }, + }; + + const return_type = try analyzer.resolveType(scope_index, return_type_node_index); + + return .{ + .arguments = arguments, + .return_type = return_type, + }; + } + + fn functionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Function.Prototype.Index { + const function_prototype_node = analyzer.getScopeNode(scope_index, node_index); + switch (function_prototype_node.id) { + .simple_function_prototype => { + const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, try analyzer.processSimpleFunctionPrototype(scope_index, node_index)); + + return function_prototype_allocation.index; + }, + .function_prototype => { + var function_prototype = try analyzer.processSimpleFunctionPrototype(scope_index, function_prototype_node.left); + const function_prototype_attribute_list_node = analyzer.getScopeNode(scope_index, function_prototype_node.right); + const attribute_node_list = analyzer.getScopeNodeList(scope_index, function_prototype_attribute_list_node); + var calling_convention: ?Compilation.CallingConvention = null; + + for (attribute_node_list.items) |attribute_node_index| { + const attribute_node = analyzer.getScopeNode(scope_index, attribute_node_index); + + switch (attribute_node.id) { + .extern_qualifier => function_prototype.attributes.@"extern" = true, + else => |t| @panic(@tagName(t)), + } + } + + function_prototype.attributes.calling_convention = calling_convention orelse Compilation.CallingConvention.system_v; + + const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, function_prototype); return function_prototype_allocation.index; }, else => |t| @panic(@tagName(t)), @@ -533,6 +1139,17 @@ const Analyzer = struct { node_buffer[1] = node.right; break :blk &node_buffer; }, + .main => blk: { + const node_list_node = analyzer.getFileNode(file_index, node.left); + const node_list = switch (node_list_node.id) { + .node_list => analyzer.getFileNodeList(file_index, node_list_node), + else => |t| @panic(@tagName(t)), + }; + break :blk node_list.items; + // const node_list = file.syntactic_analyzer_result.node_lists.items[node.left.unwrap()]; + // break :blk node_list.items; + }, + .main_zero => &.{}, else => |t| @panic(@tagName(t)), }; @@ -553,6 +1170,11 @@ const Analyzer = struct { const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ .@"struct" = struct_allocation.index, }); + + if (!parent_scope_index.valid) { + file.type = type_allocation.index; + } + scope.type = type_allocation.index; value.* = .{ .type = type_allocation.index, @@ -564,7 +1186,7 @@ const Analyzer = struct { declarations: u32 = 0, } = .{}; for (nodes) |member_index| { - const member = analyzer.getNode(scope_index, member_index); + const member = analyzer.getFileNode(file_index, member_index); const member_type = getContainerMemberType(member.id); switch (member_type) { @@ -579,7 +1201,7 @@ const Analyzer = struct { var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); for (nodes) |member_index| { - const member = analyzer.getNode(scope_index, member_index); + const member = analyzer.getFileNode(file_index, member_index); const member_type = getContainerMemberType(member.id); const array_list = switch (member_type) { .declaration => &declaration_nodes, @@ -589,26 +1211,26 @@ const Analyzer = struct { } for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + const declaration_node = analyzer.getFileNode(file_index, declaration_node_index); switch (declaration_node.id) { .@"comptime" => {}, - .simple_variable_declaration => _ = try analyzer.symbolDeclaration(scope_index, declaration_node_index, .global), + .simple_symbol_declaration => _ = try analyzer.symbolDeclaration(scope_index, declaration_node_index, .global), else => unreachable, } } // TODO: consider iterating over scope declarations instead? for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + const declaration_node = analyzer.getFileNode(file_index, declaration_node_index); switch (declaration_node.id) { .@"comptime" => _ = try analyzer.comptimeBlock(scope_index, declaration_node_index), - .simple_variable_declaration => {}, + .simple_symbol_declaration => {}, else => |t| @panic(@tagName(t)), } } for (field_nodes.items) |field_index| { - const field_node = analyzer.getNode(scope_index, field_index); + const field_node = analyzer.getFileNode(file_index, field_index); _ = field_node; @panic("TODO: fields"); @@ -620,55 +1242,78 @@ const Analyzer = struct { } } + fn declarationCommon(analyzer: *Analyzer, scope_index: Scope.Index, scope_type: ScopeType, mutability: Compilation.Mutability, identifier_token: Token.Index, type_index: Type.Index, init_value: Value.Index, argument_index: ?u32) !Declaration.Index { + const identifier = analyzer.tokenIdentifier(scope_index, identifier_token); + const identifier_index = try analyzer.processIdentifier(identifier); + + if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_index)) |lookup| { + const declaration_name = analyzer.tokenIdentifier(lookup.scope, identifier_token); + std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); + } + + // Check if the symbol name is already occupied in the same scope + const scope = analyzer.module.scopes.get(scope_index); + const declaration_allocation = try analyzer.module.declarations.append(analyzer.allocator, .{ + .name = identifier_index, + .scope_type = scope_type, + .mutability = mutability, + .init_value = init_value, + .type = type_index, + .argument_index = argument_index, + }); + + try scope.declarations.put(analyzer.allocator, identifier_index, declaration_allocation.index); + + return declaration_allocation.index; + } + fn symbolDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index, scope_type: ScopeType) !Declaration.Index { - const declaration_node = analyzer.getNode(scope_index, node_index); - assert(declaration_node.id == .simple_variable_declaration); - assert(!declaration_node.left.valid); - const mutability: Compilation.Mutability = switch (analyzer.getToken(scope_index, declaration_node.token).id) { + const declaration_node = analyzer.getScopeNode(scope_index, node_index); + assert(declaration_node.id == .simple_symbol_declaration); + const expect_type = switch (declaration_node.left.valid) { + true => switch (scope_type) { + .local => ExpectType{ + .type_index = try analyzer.resolveType(scope_index, declaration_node.left), + }, + .global => ExpectType.none, + }, + false => ExpectType.none, + }; + const mutability: Compilation.Mutability = switch (analyzer.getScopeToken(scope_index, declaration_node.token).id) { .fixed_keyword_const => .@"const", .fixed_keyword_var => .@"var", else => |t| @panic(@tagName(t)), }; const expected_identifier_token_index = declaration_node.token + 1; - const expected_identifier_token = analyzer.getToken(scope_index, expected_identifier_token_index); + const expected_identifier_token = analyzer.getScopeToken(scope_index, expected_identifier_token_index); if (expected_identifier_token.id != .identifier) { print("Error: found: {}", .{expected_identifier_token.id}); @panic("Expected identifier"); } // TODO: Check if it is a keyword - const identifier_index = try analyzer.identifierFromToken(scope_index, expected_identifier_token_index); - - const declaration_name = analyzer.tokenIdentifier(scope_index, expected_identifier_token_index); - // Check if the symbol name is already occupied in the same scope - const scope = analyzer.module.scopes.get(scope_index); - const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); - if (scope_lookup.found_existing) { - std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); - } - - // Check if the symbol name is already occupied in parent scopes - var upper_scope_index = scope.parent; - - while (upper_scope_index.valid) { - @panic("TODO: upper scope"); - } assert(declaration_node.right.valid); - const declaration_allocation = try analyzer.module.declarations.append(analyzer.allocator, .{ - .name = declaration_name, - .scope_type = scope_type, - .mutability = mutability, - .init_value = (try analyzer.module.values.append(analyzer.allocator, .{ + const argument = null; + assert(argument == null); + const init_value_allocation = switch (scope_type) { + .local => try analyzer.unresolvedAllocate(scope_index, expect_type, declaration_node.right), + .global => try analyzer.module.values.append(analyzer.allocator, .{ .unresolved = .{ .node_index = declaration_node.right, }, - })).index, - }); + }), + }; - scope_lookup.value_ptr.* = declaration_allocation.index; + assert(argument == null); + const type_index = switch (scope_type) { + .local => init_value_allocation.ptr.getType(analyzer.module), + .global => Type.Index.invalid, + }; - return declaration_allocation.index; + const result = try analyzer.declarationCommon(scope_index, scope_type, mutability, expected_identifier_token_index, type_index, init_value_allocation.index, argument); + + return result; } const MemberType = enum { @@ -679,28 +1324,19 @@ const Analyzer = struct { fn getContainerMemberType(member_id: Node.Id) MemberType { return switch (member_id) { .@"comptime" => .declaration, - .simple_variable_declaration => .declaration, + .simple_symbol_declaration => .declaration, else => unreachable, }; } - fn identifierFromToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) !u32 { - const identifier = analyzer.tokenIdentifier(scope_index, token_index); - const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); - - const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); - - if (lookup_result.found_existing) { - return lookup_result.key_ptr.*; - } else { - return key; - } + fn processIdentifier(analyzer: *Analyzer, string: []const u8) !u32 { + return analyzer.module.addName(analyzer.allocator, string); } fn tokenIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .identifier); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); const identifier = tokenBytes(token, source_file); return identifier; @@ -711,18 +1347,18 @@ const Analyzer = struct { } fn numberBytes(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .number_literal); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); const bytes = tokenBytes(token, source_file); return bytes; } fn tokenStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .string_literal); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); // Eat double quotes const string_literal = tokenBytes(token, source_file)[1..][0 .. token.len - 2]; @@ -732,6 +1368,87 @@ const Analyzer = struct { fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !Scope.Allocation { return analyzer.module.scopes.append(analyzer.allocator, scope_value); } + + const TypeCheckResult = enum { + success, + zero_extend, + sign_extend, + }; + + fn typeCheck(analyzer: *Analyzer, expect_type: ExpectType, source: Type.Index) !TypeCheckResult { + return switch (expect_type) { + .none => TypeCheckResult.success, + .type_index => |expected_type_index| { + if (expected_type_index.eq(source)) { + return TypeCheckResult.success; + } + + const destination_type = analyzer.module.types.get(expected_type_index); + const source_type = analyzer.module.types.get(source); + + switch (destination_type.*) { + .type => switch (source_type.* == .type) { + true => return TypeCheckResult.success, + false => unreachable, + }, + .integer => |destination_int| switch (source_type.*) { + .integer => |source_int| { + if (destination_int.getSize() < source_int.getSize()) { + @panic("Destination integer type is smaller than sourcE"); + } else if (destination_int.getSize() > source_int.getSize()) { + unreachable; + } else { + unreachable; + } + }, + .comptime_int => return TypeCheckResult.success, + else => |t| @panic(@tagName(t)), + }, + // TODO: type safety + .pointer => |destination_pointer| switch (source_type.*) { + .pointer => |source_pointer| { + switch (source_pointer.many == destination_pointer.many and source_pointer.element_type.eq(destination_pointer.element_type)) { + true => return TypeCheckResult.success, + false => unreachable, + } + }, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + } + }, + .flexible_integer => |expected_flexible_integer| { + const source_type = analyzer.module.types.get(source); + switch (source_type.*) { + .integer => |source_integer| { + const source_size = source_integer.getSize(); + if (expected_flexible_integer.byte_count < source_size) { + unreachable; + } else if (expected_flexible_integer.byte_count > source_size) { + return switch (source_integer.signedness) { + .signed => .sign_extend, + .unsigned => .zero_extend, + }; + } else { + return TypeCheckResult.success; + } + }, + // TODO: add type safety + .pointer => |pointer| { + _ = pointer; + switch (expected_flexible_integer.byte_count == 8) { + true => return TypeCheckResult.success, + false => unreachable, + } + }, + else => |t| @panic(@tagName(t)), + } + // if (expected_flexible_integer.byte_count < + // _ = expected_flexible_integer; + }, + // else => |t| @panic(@tagName(t)), + }; + } }; const ExpectType = union(enum) { @@ -743,7 +1460,11 @@ const ExpectType = union(enum) { .none = {}, }; pub const boolean = ExpectType{ - .type_index = type_boolean, + .type_index = Type.boolean, + }; + + pub const @"type" = ExpectType{ + .type_index = Type.type, }; const FlexibleInteger = struct { @@ -752,123 +1473,17 @@ const ExpectType = union(enum) { }; }; -const type_boolean = Type.Index{ - .block = 0, - .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), -}; - -// Each time an enum is added here, a corresponding insertion in the initialization must be made -const Values = enum { - bool_false, - bool_true, - @"unreachable", - - fn getIndex(value: Values) Value.Index { - const absolute: u32 = @intFromEnum(value); - const foo = @as(Value.Index, undefined); - const ElementT = @TypeOf(@field(foo, "index")); - const BlockT = @TypeOf(@field(foo, "block")); - const divider = std.math.maxInt(ElementT); - const element_index: ElementT = @intCast(absolute % divider); - const block_index: BlockT = @intCast(absolute / divider); - return .{ - .index = element_index, - .block = block_index, - }; - } -}; - -const Intrinsic = enum { - import, - syscall, -}; - -const FixedTypeKeyword = enum { - void, - noreturn, - bool, - - const offset = 0; -}; - -const HardwareUnsignedIntegerType = enum { - u8, - u16, - u32, - u64, - - const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; -}; - -const HardwareSignedIntegerType = enum { - s8, - s16, - s32, - s64, - - const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; -}; - -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, file_index: File.Index) !Type.Index { - _ = file_index; - inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); - } - - inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, .{ - .integer = .{ - .signedness = .unsigned, - .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { - .u8 => 8, - .u16 => 16, - .u32 => 32, - .u64 => 64, - }, - }, - }); - } - - inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, .{ - .integer = .{ - .signedness = .signed, - .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { - .s8 => 8, - .s16 => 16, - .s32 => 32, - .s64 => 64, - }, - }, - }); - } - - _ = try module.values.append(compilation.base_allocator, .{ - .bool = false, - }); - - _ = try module.values.append(compilation.base_allocator, .{ - .bool = true, - }); - - _ = try module.values.append(compilation.base_allocator, .{ - .@"unreachable" = {}, - }); - - const value_allocation = try module.values.append(compilation.base_allocator, .{ - .unresolved = .{ - .node_index = .{ .value = 0 }, - }, - }); - - const result = analyzeExistingPackage(value_allocation.ptr, compilation, module, package); +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_value: *Value) !void { + _ = try analyzeExistingPackage(main_value, compilation, module, package); var decl_iterator = module.declarations.iterator(); while (decl_iterator.nextPointer()) |decl| { - if (equal(u8, decl.name, "_start")) { + const declaration_name = module.getName(decl.name).?; + if (equal(u8, declaration_name, "_start")) { const value = module.values.get(decl.init_value); module.entry_point = switch (value.*) { .function => |function_index| function_index.uniqueInteger(), + .unresolved => std.debug.panic("Unresolved declaration: {s}\n", .{declaration_name}), else => |t| @panic(@tagName(t)), }; break; @@ -876,20 +1491,18 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, } else { @panic("Entry point not found"); } - - return result; } pub fn analyzeExistingPackage(value: *Value, compilation: *Compilation, module: *Module, package: *Package) !Type.Index { const package_import = try module.importPackage(compilation.base_allocator, package); assert(!package_import.file.is_new); - const package_file = package_import.file.ptr; const file_index = package_import.file.index; - return try analyzeFile(value, compilation.base_allocator, module, package_file, file_index); + return try analyzeFile(value, compilation.base_allocator, module, file_index); } -pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file: *File, file_index: File.Index) !Type.Index { +pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file_index: File.Index) !Type.Index { + const file = module.files.get(file_index); assert(value.* == .unresolved); assert(file.status == .parsed); @@ -899,12 +1512,6 @@ pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file: * .module = module, }; - var buffer = [2]Node.Index{ - Node.Index.invalid, - Node.Index.invalid, - }; - _ = buffer; - const result = try analyzer.structType(value, Scope.Index.invalid, .{ .value = 0 }, file_index); return result; } diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 5efc621..bd97791 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -12,6 +12,9 @@ const HashMap = data_structures.HashMap; const lexical_analyzer = @import("lexical_analyzer.zig"); const Token = lexical_analyzer.Token; +const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; + pub const Result = struct { nodes: ArrayList(Node), node_lists: ArrayList(Node.List), @@ -70,7 +73,7 @@ pub const Node = packed struct(u128) { container_declaration = 6, string_literal = 7, compiler_intrinsic_one = 8, - simple_variable_declaration = 9, + simple_symbol_declaration = 9, assign = 10, @"comptime" = 11, node_list = 12, @@ -96,6 +99,32 @@ pub const Node = packed struct(u128) { main_one = 32, main_two = 33, main_zero = 34, + call_two = 35, + slice_type = 36, + argument_declaration = 37, + compiler_intrinsic = 38, + ssize_type = 39, + usize_type = 40, + void_type = 41, + call = 42, + many_pointer_type = 43, + enum_literal = 44, + address_of = 45, + keyword_false = 46, + compare_equal = 47, + compare_not_equal = 48, + compare_less_than = 49, + compare_greater_than = 50, + compare_less_or_equal = 51, + compare_greater_or_equal = 52, + @"if" = 53, + if_else = 54, + @"switch" = 55, + switch_case = 56, + enum_type = 57, + enum_field = 58, + extern_qualifier = 59, + function_prototype = 60, }; }; @@ -109,52 +138,105 @@ const Analyzer = struct { tokens: []const Token, token_i: u32 = 0, nodes: ArrayList(Node) = .{}, - file: []const u8, + source_file: []const u8, + file_index: File.Index, allocator: Allocator, temporal_node_heap: ArrayList(Node.Index) = .{}, node_lists: ArrayList(Node.List) = .{}, fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { - if (analyzer.tokens[analyzer.token_i].id == token_id) { - const result = analyzer.token_i; + const token_i = analyzer.token_i; + const token = analyzer.tokens[token_i]; + const is_expected_token = token.id == token_id; + if (is_expected_token) { analyzer.token_i += 1; + const result = token_i; return result; } else { + std.debug.print("Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) }); return error.unexpected_token; } } fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 { const token = analyzer.tokens[token_index]; - return analyzer.file[token.start..][0..token.len]; + return analyzer.source_file[token.start..][0..token.len]; } - fn symbolDeclaration(analyzer: *Analyzer) !Node.Index { + fn symbolDeclaration(analyzer: *Analyzer) anyerror!Node.Index { const first = analyzer.token_i; assert(analyzer.tokens[first].id == .fixed_keyword_var or analyzer.tokens[first].id == .fixed_keyword_const); analyzer.token_i += 1; - _ = try analyzer.expectToken(.identifier); + const declaration_name_token = try analyzer.expectToken(.identifier); + const declaration_name = analyzer.bytes(declaration_name_token); + std.debug.print("Starting parsing declaration \"{s}\"\n", .{declaration_name}); - // TODO: type - _ = try analyzer.expectToken(.equal); + std.debug.print("Current token: {}\n", .{analyzer.tokens[analyzer.token_i].id}); - const init_node = try analyzer.expression(); - - _ = try analyzer.expectToken(.semicolon); - - // TODO: - const type_node = Node.Index.invalid; - const declaration = Node{ - .id = .simple_variable_declaration, - .token = first, - .left = type_node, - .right = init_node, + const type_node_index = switch (analyzer.tokens[analyzer.token_i].id) { + .colon => blk: { + analyzer.token_i += 1; + break :blk try analyzer.typeExpression(); + }, + else => Node.Index.invalid, }; - const declaration_init_node = analyzer.nodes.items[init_node.unwrap()]; - std.debug.print("Declaration init node: {}\n", .{declaration_init_node}); + _ = try analyzer.expectToken(.equal); - return analyzer.addNode(declaration); + const init_node_index = try analyzer.expression(); + + const init_node = analyzer.nodes.items[init_node_index.unwrap()]; + switch (init_node.id) { + .function_definition => {}, + else => _ = try analyzer.expectToken(.semicolon), + } + + // TODO: + const declaration = Node{ + .id = .simple_symbol_declaration, + .token = first, + .left = type_node_index, + .right = init_node_index, + }; + + std.debug.print("Adding declaration \"{s}\" with init node of type: {s}\n", .{ declaration_name, @tagName(init_node.id) }); + // if (analyzer.token_i < analyzer.tokens.len) { + // const first_token = analyzer.tokens[first]; + // const last_token = analyzer.tokens[analyzer.token_i]; + // const declaration_source_start = first_token.start; + // const declaration_source_end = last_token.start; + // + // std.debug.print("[ALL]\n", .{}); + // std.debug.print("Source file ({} bytes) :\n```\n{s}\n```\n", .{ analyzer.source_file.len, analyzer.source_file }); + // + // std.debug.print("[BEFORE]\n", .{}); + // + // std.debug.print("Tokens before the declaration: ", .{}); + // for (analyzer.tokens[0..first]) |t| { + // std.debug.print("{s} ", .{@tagName(t.id)}); + // } + // std.debug.print("\n", .{}); + // std.debug.print("Source before the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[0..analyzer.tokens[first].start]}); + // std.debug.print("[DECLARATION]\n", .{}); + // + // std.debug.print("First token: {}\n", .{first_token}); + // std.debug.print("Last token: {}\n", .{last_token}); + // + // std.debug.print("Tokens including declaration ([{}-{}])", .{ first, analyzer.token_i }); + // for (analyzer.tokens[first..][0 .. analyzer.token_i - first]) |t| { + // std.debug.print("{s} ", .{@tagName(t.id)}); + // } + // std.debug.print("\n", .{}); + // + // std.debug.print("Source for the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[declaration_source_start..declaration_source_end]}); + // std.debug.print("[AFTER]\n", .{}); + // + // // TODO + // // print("Tokens for file #{}\n", .{analyzer. + // // for (analyzer.tokens[ + // } + + return try analyzer.addNode(declaration); } fn containerMembers(analyzer: *Analyzer) !Members { @@ -163,6 +245,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; + std.debug.print("First token for container member: {s}\n", .{@tagName(analyzer.tokens[first].id)}); const member_node_index: Node.Index = switch (analyzer.tokens[first].id) { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .left_brace => blk: { @@ -182,6 +265,8 @@ const Analyzer = struct { else => |t| @panic(@tagName(t)), }; + std.debug.print("Container member {s}\n", .{@tagName(analyzer.nodes.items[member_node_index.unwrap()].id)}); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index); } @@ -197,7 +282,11 @@ const Analyzer = struct { .left = members_array[0], .right = members_array[1], }, - else => |len| std.debug.panic("Len: {}", .{len}), + else => |len| .{ + .len = len, + .left = try analyzer.nodeList(members_array), + .right = Node.Index.invalid, + }, }; return members; @@ -210,13 +299,16 @@ const Analyzer = struct { const function_prototype = try analyzer.functionPrototype(); const is_comptime = false; _ = is_comptime; - const function_body = try analyzer.block(.{ .is_comptime = false }); - return analyzer.addNode(.{ - .id = .function_definition, - .token = token, - .left = function_prototype, - .right = function_body, - }); + return switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => try analyzer.addNode(.{ + .id = .function_definition, + .token = token, + .left = function_prototype, + .right = try analyzer.block(.{ .is_comptime = false }), + }), + .semicolon => function_prototype, + else => |t| @panic(@tagName(t)), + }; } fn functionPrototype(analyzer: *Analyzer) !Node.Index { @@ -225,12 +317,46 @@ const Analyzer = struct { const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); const return_type = try analyzer.typeExpression(); - return analyzer.addNode(.{ + const simple_function_prototype = try analyzer.addNode(.{ .id = .simple_function_prototype, .token = token, .left = arguments, .right = return_type, }); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon, .left_brace => simple_function_prototype, + else => blk: { + var list = Node.List{}; + while (true) { + const attribute = switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon, .left_brace => break, + .fixed_keyword_extern => b: { + const result = try analyzer.addNode(.{ + .id = .extern_qualifier, + .token = analyzer.token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + analyzer.token_i += 1; + break :b result; + }, + else => b: { + if (true) unreachable; + break :b undefined; + }, + }; + try list.append(analyzer.allocator, attribute); + } + + break :blk try analyzer.addNode(.{ + .id = .function_prototype, + .token = token, + .left = simple_function_prototype, + .right = try analyzer.nodeList(list.items), + }); + }, + }; } fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { @@ -240,14 +366,32 @@ const Analyzer = struct { var list = ArrayList(Node.Index){}; + var foo = false; while (analyzer.tokens[analyzer.token_i].id != end_token) { - @panic("TODO: argument list"); + const identifier = try analyzer.expectToken(.identifier); + _ = try analyzer.expectToken(.colon); + const type_expression = try analyzer.typeExpression(); + // const type_expression_node = analyzer.nodes.items[type_expression.unwrap()]; + // _ = type_expression_node; + // std.debug.print("Type expression node: {}\n", .{type_expression_node}); + foo = true; + + if (analyzer.tokens[analyzer.token_i].id == .comma) { + analyzer.token_i += 1; + } + + try list.append(analyzer.allocator, try analyzer.addNode(.{ + .id = .argument_declaration, + .token = identifier, + .left = type_expression, + .right = Node.Index.invalid, + })); } _ = try analyzer.expectToken(end_token); if (list.items.len != 0) { - @panic("TODO: arguments"); + return try analyzer.nodeList(list.items); } else { return Node.Index.invalid; } @@ -266,6 +410,7 @@ const Analyzer = struct { while (analyzer.tokens[analyzer.token_i].id != .right_brace) { const first_statement_token = analyzer.tokens[analyzer.token_i]; + std.debug.print("First statement token: {s}\n", .{@tagName(first_statement_token.id)}); const statement_index = switch (first_statement_token.id) { .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { .colon => { @@ -274,11 +419,17 @@ const Analyzer = struct { else => try analyzer.assignExpressionStatement(), }, .fixed_keyword_unreachable, .fixed_keyword_return => try analyzer.assignExpressionStatement(), - .fixed_keyword_while => try analyzer.whileStatement(options), + + .fixed_keyword_while => try analyzer.whileExpression(options), + .fixed_keyword_switch => try analyzer.switchExpression(), + .fixed_keyword_if => try analyzer.ifExpression(), .fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(), else => |t| @panic(@tagName(t)), }; + const node = analyzer.nodes.items[statement_index.unwrap()]; + std.debug.print("Adding statement: {s}\n", .{@tagName(node.id)}); + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); } @@ -323,10 +474,11 @@ const Analyzer = struct { .right = Node.Index.invalid, }, }; + return analyzer.addNode(node); } - fn whileStatement(analyzer: *Analyzer, options: Options) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + fn whileExpression(analyzer: *Analyzer, options: Options) anyerror!Node.Index { const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); _ = try analyzer.expectToken(.left_parenthesis); @@ -344,10 +496,117 @@ const Analyzer = struct { }); } + fn switchExpression(analyzer: *Analyzer) anyerror!Node.Index { + std.debug.print("Parsing switch...\n", .{}); + const switch_token = analyzer.token_i; + analyzer.token_i += 1; + _ = try analyzer.expectToken(.left_parenthesis); + const switch_expression = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + std.debug.print("Parsed switch expression...\n", .{}); + _ = try analyzer.expectToken(.left_brace); + + var list = Node.List{}; + + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const case_token = analyzer.token_i; + std.debug.print("Parsing switch case...\n", .{}); + const case_node = switch (analyzer.tokens[case_token].id) { + .fixed_keyword_else => blk: { + analyzer.token_i += 1; + break :blk Node.Index.invalid; + }, + else => blk: { + var array_list = Node.List{}; + while (true) { + try array_list.append(analyzer.allocator, try analyzer.expression()); + switch (analyzer.tokens[analyzer.token_i].id) { + .comma => analyzer.token_i += 1, + .equal => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .greater => break, + else => {}, + }, + else => {}, + } + } + + break :blk switch (array_list.items.len) { + 0 => unreachable, + 1 => array_list.items[0], + else => try analyzer.nodeList(array_list.items), + }; + }, + }; + _ = try analyzer.expectToken(.equal); + _ = try analyzer.expectToken(.greater); + const is_left_brace = analyzer.tokens[analyzer.token_i].id == .left_brace; + const expr = switch (is_left_brace) { + true => try analyzer.block(.{ + .is_comptime = false, + }), + false => try analyzer.assignExpression(), + }; + + _ = try analyzer.expectToken(.comma); + + const node = try analyzer.addNode(.{ + .id = .switch_case, + .token = case_token, + .left = case_node, + .right = expr, + }); + + try list.append(analyzer.allocator, node); + } + + _ = try analyzer.expectToken(.right_brace); + + return try analyzer.addNode(.{ + .id = .@"switch", + .token = switch_token, + .left = switch_expression, + .right = try analyzer.nodeList(list.items), + }); + } + + fn ifExpression(analyzer: *Analyzer) anyerror!Node.Index { + const if_token = analyzer.token_i; + analyzer.token_i += 1; + + _ = try analyzer.expectToken(.left_parenthesis); + const if_expression = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + + const if_block = try analyzer.block(.{ .is_comptime = false }); + + const if_node = try analyzer.addNode(.{ + .id = .@"if", + .token = if_token, + .left = if_expression, + .right = if_block, + }); + + const result = switch (analyzer.tokens[analyzer.token_i].id) { + .fixed_keyword_else => blk: { + analyzer.token_i += 1; + + break :blk try analyzer.addNode(.{ + .id = .if_else, + .token = if_token, + .left = if_node, + .right = try analyzer.expression(), + }); + }, + else => if_node, + }; + + return result; + } + fn assignExpression(analyzer: *Analyzer) !Node.Index { const expr = try analyzer.expression(); const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { - .semicolon => return expr, + .semicolon, .comma => return expr, .equal => .assign, else => |t| @panic(@tagName(t)), }; @@ -363,7 +622,7 @@ const Analyzer = struct { .right = try analyzer.expression(), }; std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); - return analyzer.addNode(node); + return try analyzer.addNode(node); } fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { @@ -390,7 +649,7 @@ const Analyzer = struct { const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; - return switch (parameters.len) { + return try switch (parameters.len) { 1 => analyzer.addNode(.{ .id = .compiler_intrinsic_one, .token = hash, @@ -403,59 +662,114 @@ const Analyzer = struct { .left = parameters[0], .right = parameters[1], }), - else => unreachable, + else => analyzer.addNode(.{ + .id = .compiler_intrinsic, + .token = hash, + .left = try analyzer.nodeList(parameters), + .right = Node.Index.invalid, + }), }; } - fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { - return analyzer.expressionPrecedence(0); + fn expression(analyzer: *Analyzer) anyerror!Node.Index { + return try analyzer.expressionPrecedence(0); } fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { var result = try analyzer.prefixExpression(); + if (result.valid) { + const prefix_node = analyzer.nodes.items[result.unwrap()]; + std.debug.print("Prefix: {}\n", .{prefix_node.id}); + } var banned_precedence: i32 = -1; while (analyzer.token_i < analyzer.tokens.len) { - const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period => -1, - else => |t| @panic(@tagName(t)), + const token = analyzer.tokens[analyzer.token_i]; + // std.debug.print("Looping in expression precedence with token {}\n", .{token}); + const precedence: i32 = switch (token.id) { + .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period, .fixed_keyword_const, .fixed_keyword_var => -1, + .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .equal => 30, + else => unreachable, + }, + else => |t| { + const start = token.start; + std.debug.print("Source file:\n```\n{s}\n```\n", .{analyzer.source_file[start..]}); + @panic(@tagName(t)); + }, }; + std.debug.print("Precedence: {} ({s}) (file #{})\n", .{ precedence, @tagName(token.id), analyzer.file_index.uniqueInteger() }); if (precedence < minimum_precedence) { + std.debug.print("Breaking for minimum_precedence\n", .{}); break; } if (precedence == banned_precedence) { + std.debug.print("Breaking for banned precedence\n", .{}); break; } + const operator_token = analyzer.token_i; + const is_bang_equal = analyzer.tokens[operator_token].id == .bang and analyzer.tokens[operator_token + 1].id == .equal; + analyzer.token_i += @as(u32, 1) + @intFromBool(is_bang_equal); + // TODO: fix this - const node_index = try analyzer.expressionPrecedence(1); - _ = node_index; - unreachable; + const right = try analyzer.expressionPrecedence(precedence + 1); + + const operation_id: Node.Id = switch (is_bang_equal) { + true => .compare_not_equal, + false => switch (analyzer.tokens[operator_token].id) { + else => |t| @panic(@tagName(t)), + }, + }; + + result = try analyzer.addNode(.{ + .id = operation_id, + .token = operator_token, + .left = result, + .right = right, + }); + + const associativity: Associativity = switch (operation_id) { + .compare_equal, .compare_not_equal, .compare_less_than, .compare_greater_than, .compare_less_or_equal, .compare_greater_or_equal => .none, + else => .left, + }; + + if (associativity == .none) { + banned_precedence = precedence; + } } return result; } fn prefixExpression(analyzer: *Analyzer) !Node.Index { - switch (analyzer.tokens[analyzer.token_i].id) { - // .bang => .bool_not, - // .minus => .negation, - // .tilde => .bit_not, - // .minus_percent => .negation_wrap, - // .ampersand => .address_of, - // .keyword_try => .@"try", - // .keyword_await => .@"await", - + const token = analyzer.token_i; + // std.debug.print("Prefix...\n", .{}); + const node_id: Node.Id = switch (analyzer.tokens[token].id) { else => |pref| { _ = pref; - return analyzer.primaryExpression(); + return try analyzer.primaryExpression(); }, - } + .at => .address_of, + .bang => switch (analyzer.tokens[token + 1].id) { + .equal => return try analyzer.primaryExpression(), + else => unreachable, + }, + .minus, .tilde => |t| @panic(@tagName(t)), + }; - return error.not_implemented; + return try analyzer.addNode(.{ + .id = node_id, + .token = blk: { + analyzer.token_i += 1; + break :blk token; + }, + .left = try analyzer.prefixExpression(), + .right = Node.Index.invalid, + }); } fn primaryExpression(analyzer: *Analyzer) !Node.Index { @@ -464,8 +778,8 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable => try analyzer.curlySuffixExpression(), - .fixed_keyword_fn => analyzer.function(), + .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable, .fixed_keyword_switch, .period, .fixed_keyword_enum, .keyword_signed_integer, .keyword_unsigned_integer => try analyzer.curlySuffixExpression(), + .fixed_keyword_fn => try analyzer.function(), .fixed_keyword_return => try analyzer.addNode(.{ .id = .@"return", .token = blk: { @@ -477,7 +791,7 @@ const Analyzer = struct { .right = Node.Index.invalid, }), // todo:? - // .left_brace => try analyzer.block(), + .left_brace => try analyzer.block(.{ .is_comptime = false }), else => |id| { log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); unreachable; @@ -500,7 +814,7 @@ const Analyzer = struct { const token_i = analyzer.token_i; assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); analyzer.token_i += 1; - return analyzer.addNode(.{ + return try analyzer.addNode(.{ .id = .keyword_noreturn, .token = token_i, .left = Node.Index.invalid, @@ -508,12 +822,15 @@ const Analyzer = struct { }); } - fn boolTrue(analyzer: *Analyzer) !Node.Index { + fn boolLiteral(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; - assert(analyzer.tokens[token_i].id == .fixed_keyword_true); analyzer.token_i += 1; - return analyzer.addNode(.{ - .id = .keyword_true, + return try analyzer.addNode(.{ + .id = switch (analyzer.tokens[token_i].id) { + .fixed_keyword_true => .keyword_true, + .fixed_keyword_false => .keyword_false, + else => unreachable, + }, .token = token_i, .left = Node.Index.invalid, .right = Node.Index.invalid, @@ -521,9 +838,54 @@ const Analyzer = struct { } fn typeExpression(analyzer: *Analyzer) !Node.Index { - return switch (analyzer.tokens[analyzer.token_i].id) { - .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash, .string_literal, .number_literal, .fixed_keyword_unreachable, .keyword_unsigned_integer, .keyword_signed_integer => try analyzer.errorUnionExpression(), - else => |id| @panic(@tagName(id)), + const first = analyzer.token_i; + return switch (analyzer.tokens[first].id) { + else => try analyzer.errorUnionExpression(), + .at => unreachable, // pointer + .bang => unreachable, // error + .left_bracket => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .at => { + // many item pointer + analyzer.token_i += 2; + _ = try analyzer.expectToken(.right_bracket); + + const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const; + analyzer.token_i += @intFromBool(is_const); + + const pointer_element_type = try analyzer.typeExpression(); + + return try analyzer.addNode(.{ + .id = .many_pointer_type, + .token = first, + .left = pointer_element_type, + .right = Node.Index.invalid, + }); + }, + else => { + const left_bracket = analyzer.token_i; + analyzer.token_i += 1; + // TODO: compute length + const length_expression = false; + _ = try analyzer.expectToken(.right_bracket); + + // Slice + if (!length_expression) { + // TODO: modifiers + const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const; + analyzer.token_i += @intFromBool(is_const); + + const slice_type = try analyzer.typeExpression(); + return try analyzer.addNode(.{ + .id = .slice_type, + .token = left_bracket, + .left = Node.Index.invalid, + .right = slice_type, + }); + } else { + unreachable; + } + }, + }, }; } @@ -531,7 +893,10 @@ const Analyzer = struct { const suffix_expression = try analyzer.suffixExpression(); return switch (analyzer.tokens[analyzer.token_i].id) { - .bang => unreachable, + .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .equal => suffix_expression, + else => unreachable, + }, else => suffix_expression, }; } @@ -550,26 +915,43 @@ const Analyzer = struct { var expression_list = ArrayList(Node.Index){}; while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { - std.debug.print("Loop\n", .{}); + const current_token = analyzer.tokens[analyzer.token_i]; + std.debug.print("Current token: {s}\n", .{@tagName(current_token.id)}); const parameter = try analyzer.expression(); try expression_list.append(analyzer.allocator, parameter); - analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { - .comma, .right_parenthesis => true, + const parameter_node = analyzer.nodes.items[parameter.unwrap()]; + std.debug.print("Paremeter node: {s}\n", .{@tagName(parameter_node.id)}); + const next_token = analyzer.tokens[analyzer.token_i]; + std.debug.print("next token: {s}\n", .{@tagName(next_token.id)}); + analyzer.token_i += @intFromBool(switch (next_token.id) { + .comma => true, .colon, .right_brace, .right_bracket => unreachable, - else => unreachable, + .right_parenthesis => false, + else => |t| @panic(@tagName(t)), }); } _ = try analyzer.expectToken(.right_parenthesis); // const is_comma = analyzer.tokens[analyzer.token_i].id == .comma; - return analyzer.addNode(switch (expression_list.items.len) { + return try analyzer.addNode(switch (expression_list.items.len) { 0 => .{ .id = .call_one, .token = left_parenthesis, .left = result, .right = Node.Index.invalid, }, - else => |len| std.debug.panic("len: {}", .{len}), + 1 => .{ + .id = .call_two, + .token = left_parenthesis, + .left = result, + .right = expression_list.items[0], + }, + else => .{ + .id = .call, + .token = left_parenthesis, + .left = result, + .right = try analyzer.nodeList(expression_list.items), + }, }); } else { return result; @@ -583,7 +965,7 @@ const Analyzer = struct { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; const token = analyzer.tokens[token_i]; - return switch (token.id) { + return try switch (token.id) { .string_literal => blk: { analyzer.token_i += 1; break :blk analyzer.addNode(.{ @@ -618,9 +1000,9 @@ const Analyzer = struct { }); }, }, - .fixed_keyword_noreturn => try analyzer.noReturn(), - .fixed_keyword_true => try analyzer.boolTrue(), - .fixed_keyword_unreachable => try analyzer.addNode(.{ + .fixed_keyword_noreturn => analyzer.noReturn(), + .fixed_keyword_true, .fixed_keyword_false => analyzer.boolLiteral(), + .fixed_keyword_unreachable => analyzer.addNode(.{ .id = .@"unreachable", .token = blk: { analyzer.token_i += 1; @@ -630,7 +1012,7 @@ const Analyzer = struct { .right = Node.Index.invalid, }), .hash => analyzer.compilerIntrinsic(), - .keyword_unsigned_integer, .keyword_signed_integer => |signedness| try analyzer.addNode(.{ + .keyword_unsigned_integer, .keyword_signed_integer => |signedness| analyzer.addNode(.{ .id = switch (signedness) { .keyword_unsigned_integer => .unsigned_integer_type, .keyword_signed_integer => .signed_integer_type, @@ -640,9 +1022,78 @@ const Analyzer = struct { analyzer.token_i += 1; break :blk token_i; }, - .left = @bitCast(@as(u32, std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10) catch unreachable)), + .left = @bitCast(@as(u32, try std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10))), .right = Node.Index.invalid, }), + .fixed_keyword_usize, .fixed_keyword_ssize => |size_type| analyzer.addNode(.{ + .id = switch (size_type) { + .fixed_keyword_usize => .usize_type, + .fixed_keyword_ssize => .ssize_type, + else => unreachable, + }, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + .fixed_keyword_void => analyzer.addNode(.{ + .id = .void_type, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + .fixed_keyword_switch => try analyzer.switchExpression(), + .period => switch (analyzer.tokens[token_i + 1].id) { + .identifier => try analyzer.addNode(.{ + .id = .enum_literal, + .token = blk: { + analyzer.token_i += 2; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + else => |t| @panic(@tagName(t)), + }, + .fixed_keyword_enum => blk: { + analyzer.token_i += 1; + _ = try analyzer.expectToken(.left_brace); + + var enum_field_list = Node.List{}; + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const enum_name = try analyzer.expectToken(.identifier); + const value_associated = switch (analyzer.tokens[analyzer.token_i].id) { + .comma => comma: { + analyzer.token_i += 1; + break :comma Node.Index.invalid; + }, + else => |t| @panic(@tagName(t)), + }; + + const enum_field_node = try analyzer.addNode(.{ + .id = .enum_field, + .token = enum_name, + .left = value_associated, + .right = Node.Index.invalid, + }); + + try enum_field_list.append(analyzer.allocator, enum_field_node); + } + + analyzer.token_i += 1; + + break :blk try analyzer.addNode(.{ + .id = .enum_type, + .token = token_i, + .left = try analyzer.nodeList(enum_field_list.items), + .right = Node.Index.invalid, + }); + }, else => |foo| { switch (foo) { .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }), @@ -658,7 +1109,7 @@ const Analyzer = struct { return switch (token.id) { .left_bracket => unreachable, .period => switch (analyzer.tokens[analyzer.token_i + 1].id) { - .identifier => analyzer.addNode(.{ + .identifier => try analyzer.addNode(.{ .id = .field_access, .token = blk: { const main_token = analyzer.token_i; @@ -684,7 +1135,13 @@ const Analyzer = struct { fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - std.debug.print("Adding node #{} {s}\n", .{ index, @tagName(node.id) }); + std.debug.print("Adding node #{} (0x{x}) {s} to file #{}\n", .{ index, @intFromPtr(&analyzer.nodes.items[index]), @tagName(node.id), analyzer.file_index.uniqueInteger() }); + // if (node.id == .identifier) { + // std.debug.print("Node identifier: {s}\n", .{analyzer.bytes(node.token)}); + // } + if (node.id == .call) { + std.debug.print("Call two: {}\n", .{node}); + } return Node.Index{ .value = @intCast(index), }; @@ -695,10 +1152,12 @@ const Analyzer = struct { var new_node_list = try ArrayList(Node.Index).initCapacity(analyzer.allocator, input.len); try new_node_list.appendSlice(analyzer.allocator, input); try analyzer.node_lists.append(analyzer.allocator, new_node_list); - - return .{ - .value = @intCast(index), - }; + return try analyzer.addNode(.{ + .id = .node_list, + .token = 0, + .left = .{ .value = @intCast(index) }, + .right = Node.Index.invalid, + }); } }; @@ -708,11 +1167,12 @@ const Members = struct { right: Node.Index, }; -pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { +pub fn analyze(allocator: Allocator, tokens: []const Token, source_file: []const u8, file_index: File.Index) !Result { const start = std.time.Instant.now() catch unreachable; var analyzer = Analyzer{ .tokens = tokens, - .file = file, + .source_file = source_file, + .file_index = file_index, .allocator = allocator, }; const node_index = try analyzer.addNode(.{ @@ -725,10 +1185,12 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R assert(node_index.value == 0); assert(node_index.valid); + std.debug.print("Start Parsing file root members\n", .{}); const members = try analyzer.containerMembers(); + std.debug.print("End Parsing file root members\n", .{}); switch (members.len) { - 0 => unreachable, + 0 => analyzer.nodes.items[0].id = .main_zero, 1 => { analyzer.nodes.items[0].id = .main_one; analyzer.nodes.items[0].left = members.left; @@ -738,7 +1200,10 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R analyzer.nodes.items[0].left = members.left; analyzer.nodes.items[0].right = members.right; }, - else => unreachable, + else => { + analyzer.nodes.items[0].id = .main; + analyzer.nodes.items[0].left = members.left; + }, } const end = std.time.Instant.now() catch unreachable; @@ -775,3 +1240,8 @@ pub const SymbolDeclaration = struct { initialization_node: Node.Index, mutability_token: Token.Index, }; + +const Associativity = enum { + none, + left, +}; diff --git a/src/main.zig b/src/main.zig index 583297d..2fabb73 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const equal = std.mem.eql; const Compilation = @import("Compilation.zig"); @@ -9,20 +10,64 @@ const default_src_file = "src/test/main.nat"; pub fn main() !void { const allocator = std.heap.page_allocator; - const arguments = try std.process.argsAlloc(allocator); - if (arguments.len == 2) { - try singleCompilation(allocator, arguments[1]); - } else { - @panic("Wrong arguments"); - } -} - -fn singleCompilation(allocator: Allocator, main_file_path: []const u8) !void { + const compilation_descriptor = try parseArguments(allocator); const compilation = try Compilation.init(allocator); - try compilation.compileModule(.{ - .main_package_path = main_file_path, - }); + try compilation.compileModule(compilation_descriptor); +} + +const ArgumentParsingError = error{ + main_package_path_not_specified, +}; + +fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor { + const arguments = (try std.process.argsAlloc(allocator))[1..]; + + var maybe_executable_path: ?[]const u8 = null; + var maybe_main_package_path: ?[]const u8 = null; + var target_triplet: []const u8 = "x86_64-linux-gnu"; + + var i: usize = 0; + while (i < arguments.len) : (i += 1) { + const current_argument = arguments[i]; + if (equal(u8, current_argument, "-o")) { + if (i <= arguments.len) { + maybe_executable_path = arguments[i + 1]; + assert(maybe_executable_path.?.len != 0); + i += 1; + } else { + unreachable; + } + } else if (equal(u8, current_argument, "-target")) { + if (i <= arguments.len) { + target_triplet = arguments[i + 1]; + i += 1; + } else { + unreachable; + } + } else { + maybe_main_package_path = current_argument; + } + } + + const main_package_path = maybe_main_package_path orelse return error.main_package_path_not_specified; + + const executable_path = maybe_executable_path orelse blk: { + const executable_name = std.fs.path.basename(main_package_path[0 .. main_package_path.len - "/main.nat".len]); + assert(executable_name.len > 0); + const result = try std.mem.concat(allocator, u8, &.{ "nat/", executable_name }); + break :blk result; + }; + + const cross_target = try std.zig.CrossTarget.parse(.{ .arch_os_abi = target_triplet }); + const target = cross_target.toTarget(); + std.debug.print("Target: {}\n", .{target}); + + return .{ + .main_package_path = main_package_path, + .executable_path = executable_path, + .target = target, + }; } test { diff --git a/test/first/main.nat b/test/first/main.nat index 45bfaac..34ec86b 100644 --- a/test/first/main.nat +++ b/test/first/main.nat @@ -1,3 +1,3 @@ const main = fn() s32 { return 0; -}; +} diff --git a/test/hello_world/main.nat b/test/hello_world/main.nat new file mode 100644 index 0000000..595a9f5 --- /dev/null +++ b/test/hello_world/main.nat @@ -0,0 +1,6 @@ +const std = #import("std"); + +const main = fn() s32 { + std.print("Hello world!\n", 13); + return 0; +} diff --git a/test/stack/main.nat b/test/stack/main.nat new file mode 100644 index 0000000..4574781 --- /dev/null +++ b/test/stack/main.nat @@ -0,0 +1,4 @@ +const main = fn() s32 { + var a : s32 = 0; + return a; +} From aefabd6108299620f90ae32127987bf847319d59 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Tue, 7 Nov 2023 15:00:48 -0600 Subject: [PATCH 2/6] almost working register allocator --- src/backend/intermediate_representation.zig | 58 +- src/backend/x86_64.zig | 755 ++++++++++++++++---- src/data_structures.zig | 8 +- src/frontend/semantic_analyzer.zig | 70 +- src/frontend/syntactic_analyzer.zig | 16 +- 5 files changed, 676 insertions(+), 231 deletions(-) diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 2b4fd0a..bc91dbf 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -307,12 +307,12 @@ pub const Function = struct { .phi => {}, .ret => |ret_index| { const ret = function.ir.returns.get(ret_index); - switch (ret.instruction.valid) { - true => { + switch (ret.instruction.invalid) { + false => { const ret_value = function.ir.instructions.get(ret.instruction).*; try writer.print(" {s}", .{@tagName(ret_value)}); }, - false => try writer.writeAll(" void"), + true => try writer.writeAll(" void"), } }, // .load => |load_index| { @@ -518,13 +518,13 @@ pub const Builder = struct { const function_declaration_index = ir_call.function; const function_declaration = builder.ir.function_declarations.get(function_declaration_index); const function_definition_index = function_declaration.definition; - switch (function_definition_index.valid) { - true => { + switch (function_definition_index.invalid) { + false => { const function = builder.ir.function_definitions.get(function_definition_index); const first_block = function.blocks.items[0]; break :blk first_block; }, - false => continue, + true => continue, } }, .@"unreachable", .ret, .store => continue, @@ -625,10 +625,10 @@ pub const Builder = struct { .phi => blk: { var did_something = false; var head = &instruction.phi; - next: while (head.valid) { + next: while (!head.invalid) { const phi = builder.ir.phis.get(head.*); const phi_jump = builder.ir.jumps.get(phi.jump); - assert(phi_jump.source.valid); + assert(!phi_jump.source.invalid); for (reachable_blocks) |block_index| { if (phi_jump.source.eq(block_index)) { @@ -655,12 +655,12 @@ pub const Builder = struct { var only_value = Instruction.Index.invalid; var it = phi_index; - while (it.valid) { + while (!it.invalid) { const phi = builder.ir.phis.get(it); const phi_value = builder.ir.instructions.get(phi.instruction); if (phi_value.* == .phi) unreachable; // TODO: undefined - if (only_value.valid) { + if (!only_value.invalid) { if (!only_value.eq(phi.instruction)) { break :trivial_blk null; } @@ -675,7 +675,7 @@ pub const Builder = struct { }; if (trivial_phi) |trivial_value| { - if (trivial_value.valid) { + if (!trivial_value.invalid) { // Option to delete const delete = false; if (delete) { @@ -740,8 +740,8 @@ pub const Builder = struct { }; for (operands) |operand_instruction_index_pointer| { - switch (operand_instruction_index_pointer.valid) { - true => { + switch (operand_instruction_index_pointer.invalid) { + false => { const operand_value = builder.ir.instructions.get(operand_instruction_index_pointer.*); switch (operand_value.*) { .copy => |copy_value| { @@ -759,7 +759,7 @@ pub const Builder = struct { else => |t| @panic(@tagName(t)), } }, - false => {}, + true => {}, } } @@ -847,13 +847,13 @@ pub const Builder = struct { var arguments = try ArrayList(Instruction.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); const sema_syscall_number = sema_syscall.number; - assert(sema_syscall_number.valid); + assert(!sema_syscall_number.invalid); const number_value_index = try builder.emitSyscallArgument(sema_syscall_number); arguments.appendAssumeCapacity(number_value_index); for (sema_syscall.getArguments()) |sema_syscall_argument| { - assert(sema_syscall_argument.valid); + assert(!sema_syscall_argument.invalid); const argument_value_index = try builder.emitSyscallArgument(sema_syscall_argument); arguments.appendAssumeCapacity(argument_value_index); } @@ -890,7 +890,7 @@ pub const Builder = struct { const loop_body_block = try builder.newBlock(); const loop_prologue_block = if (options.emit_exit_block) try builder.newBlock() else BasicBlock.Index.invalid; - const loop_head_block = switch (condition.valid) { + const loop_head_block = switch (!condition.invalid) { false => loop_body_block, true => unreachable, }; @@ -902,7 +902,7 @@ pub const Builder = struct { const sema_body_block = builder.module.blocks.get(sema_loop_body.block); builder.currentFunction().current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); - if (loop_prologue_block.valid) { + if (!loop_prologue_block.invalid) { builder.ir.blocks.get(loop_prologue_block).seal(); } @@ -921,7 +921,7 @@ pub const Builder = struct { unreachable; } - if (loop_prologue_block.valid) { + if (!loop_prologue_block.invalid) { builder.currentFunction().current_basic_block = loop_prologue_block; } }, @@ -933,13 +933,13 @@ pub const Builder = struct { const sema_ret = builder.module.returns.get(sema_ret_index); const return_value = try builder.emitReturnValue(sema_ret.value); const phi_instruction = builder.ir.instructions.get(builder.currentFunction().return_phi_node); - const phi = switch (phi_instruction.phi.valid) { - true => unreachable, - false => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, + const phi = switch (phi_instruction.phi.invalid) { + false => unreachable, + true => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, }; //builder.ir.phis.get(phi_instruction.phi); const exit_jump = try builder.jump(.{ .source = builder.currentFunction().current_basic_block, - .destination = switch (phi_instruction.phi.valid) { + .destination = switch (!phi_instruction.phi.invalid) { true => phi.block, false => builder.currentFunction().return_phi_block, }, @@ -1056,8 +1056,8 @@ pub const Builder = struct { fn processCall(builder: *Builder, sema_call_index: Compilation.Call.Index) anyerror!Instruction.Index { const sema_call = builder.module.calls.get(sema_call_index); const sema_argument_list_index = sema_call.arguments; - const argument_list: []const Instruction.Index = switch (sema_argument_list_index.valid) { - true => blk: { + const argument_list: []const Instruction.Index = switch (sema_argument_list_index.invalid) { + false => blk: { var argument_list = ArrayList(Instruction.Index){}; const sema_argument_list = builder.module.argument_lists.get(sema_argument_list_index); try argument_list.ensureTotalCapacity(builder.allocator, sema_argument_list.array.items.len); @@ -1067,7 +1067,7 @@ pub const Builder = struct { } break :blk argument_list.items; }, - false => &.{}, + true => &.{}, }; const call_index = try builder.call(.{ @@ -1172,15 +1172,15 @@ pub const Builder = struct { fn jump(builder: *Builder, descriptor: Jump) !Jump.Index { const destination_block = builder.ir.blocks.get(descriptor.destination); assert(!destination_block.sealed); - assert(descriptor.source.valid); + assert(!descriptor.source.invalid); const jump_allocation = try builder.ir.jumps.append(builder.allocator, descriptor); return jump_allocation.index; } fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { - assert(builder.current_function_index.valid); + assert(!builder.current_function_index.invalid); const current_function = builder.currentFunction(); - assert(current_function.current_basic_block.valid); + assert(!current_function.current_basic_block.invalid); return builder.appendToBlock(current_function.current_basic_block, instruction); } diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index d15d8e4..a114761 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -3,7 +3,7 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const print = std.debug.print; const emit = @import("emit.zig"); -const ir = @import("./intermediate_representation.zig"); +const ir = @import("intermediate_representation.zig"); const Compilation = @import("../Compilation.zig"); @@ -18,6 +18,19 @@ const Register = struct { list: List = .{}, index: Index, + const invalid = Register{ + .index = .{ + .physical = .no_register, + }, + }; + + fn isValid(register: Register) bool { + return switch (register.index) { + .physical => |physical| physical != .no_register, + .virtual => true, + }; + } + const Index = union(enum) { physical: Register.Physical, virtual: Register.Virtual.Index, @@ -929,6 +942,7 @@ const StackObject = struct { const InstructionSelection = struct { local_value_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Register) = .{}, + value_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Register) = .{}, block_map: data_structures.AutoHashMap(ir.BasicBlock.Index, BasicBlock.Index) = .{}, liveins: data_structures.AutoArrayHashMap(Register.Physical, Register.Virtual.Index) = .{}, memory_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Memory.Index) = .{}, @@ -994,6 +1008,7 @@ const InstructionSelection = struct { fn loadRegisterFromStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, destination_register: Register.Physical, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { _ = virtual_register; const stack_object = instruction_selection.stack_objects.items[frame_index]; + print("Stack object size: {}\n", .{stack_object.size}); switch (@divExact(stack_object.size, 8)) { @sizeOf(u64) => { switch (register_class) { @@ -1034,31 +1049,74 @@ const InstructionSelection = struct { else => |t| @panic(@tagName(t)), } }, + @sizeOf(u32) => switch (register_class) { + .gp32 => { + const instruction_id = Instruction.Id.mov32rm; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp32, + .u = .{ + .register = .{ + .index = .{ + .physical = destination_register, + }, + }, + }, + .flags = .{ .type = .def }, + }; + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + print("Inserting instruction at index {}\n", .{insert_before_instruction_index}); + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + }, else => std.debug.panic("Stack object size: {}\n", .{stack_object.size}), } } // TODO: add value map on top of local value map? - fn lookupRegisterForValue(instruction_selection: *InstructionSelection, ir_instruction_index: ir.Instruction.Index) ?Register { - if (instruction_selection.local_value_map.get(ir_instruction_index)) |register| { + fn lookupRegisterForValue(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !Register { + if (instruction_selection.value_map.get(ir_instruction_index)) |register| { return register; } - return null; + const gop = try instruction_selection.local_value_map.getOrPutValue(mir.allocator, ir_instruction_index, Register.invalid); + return gop.value_ptr.*; } fn getRegisterForValue(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !Register { - if (instruction_selection.lookupRegisterForValue(ir_instruction_index)) |register| { + const register = try instruction_selection.lookupRegisterForValue(mir, ir_instruction_index); + if (register.isValid()) { return register; } - const ir_type = getIrType(mir.ir, ir_instruction_index); - const value_type = resolveType(ir_type); + const instruction = mir.ir.instructions.get(ir_instruction_index); + if (instruction.* != .stack or !instruction_selection.stack_map.contains(ir_instruction_index)) { + const ir_type = getIrType(mir.ir, ir_instruction_index); + const value_type = resolveType(ir_type); + const register_class = register_classes.get(value_type); + const new_register = try mir.createVirtualRegister(register_class); + try instruction_selection.value_map.putNoClobber(mir.allocator, ir_instruction_index, new_register); + return new_register; + } - const register_class = register_classes.get(value_type); - const virtual_register = try mir.createVirtualRegister(register_class); - try instruction_selection.local_value_map.putNoClobber(mir.allocator, ir_instruction_index, virtual_register); - return virtual_register; + unreachable; } // Moving an immediate to a register @@ -1187,18 +1245,28 @@ const InstructionSelection = struct { } } - fn updateValueMap(instruction_selection: *InstructionSelection, allocator: Allocator, ir_instruction_index: ir.Instruction.Index, register: Register) !void { - const gop = try instruction_selection.local_value_map.getOrPut(allocator, ir_instruction_index); - if (gop.found_existing) { - const stored_register = gop.value_ptr.*; - if (std.meta.eql(stored_register, register)) { - unreachable; - } else { - std.debug.panic("Register mismatch: Stored: {} Got: {}\n", .{ stored_register, register }); - } + fn updateValueMap(instruction_selection: *InstructionSelection, allocator: Allocator, ir_instruction_index: ir.Instruction.Index, register: Register, local: bool) !void { + if (local) { + try instruction_selection.local_value_map.putNoClobber(allocator, ir_instruction_index, register); } else { - gop.value_ptr.* = register; + const gop = try instruction_selection.value_map.getOrPutValue(allocator, ir_instruction_index, Register.invalid); + if (!gop.value_ptr.isValid()) { + gop.value_ptr.* = register; + } else if (!std.meta.eql(gop.value_ptr.index, register.index)) { + unreachable; + } } + // const gop = try instruction_selection.local_value_map.getOrPut(allocator, ir_instruction_index); + // if (gop.found_existing) { + // const stored_register = gop.value_ptr.*; + // if (std.meta.eql(stored_register, register)) { + // unreachable; + // } else { + // std.debug.panic("Register mismatch: Stored: {} Got: {}\n", .{ stored_register, register }); + // } + // } else { + // gop.value_ptr.* = register; + // } } fn lowerArguments(instruction_selection: *InstructionSelection, mir: *MIR, ir_function: *ir.Function) !void { @@ -1253,18 +1321,16 @@ const InstructionSelection = struct { // const operand_register_class = register_class_operand_matcher.get(operand_reference.id); const virtual_register_index = try instruction_selection.createLiveIn(mir, physical_register, register_class); + const result_register = try mir.createVirtualRegister(register_class); try mir.append(instruction_selection, .copy, &.{ Operand{ .id = operand_id, .u = .{ - .register = .{ - .index = .{ - .virtual = virtual_register_index, - }, - }, + .register = result_register, }, .flags = .{ .dead_or_kill = true, + .type = .def, }, }, Operand{ @@ -1272,7 +1338,7 @@ const InstructionSelection = struct { .u = .{ .register = .{ .index = .{ - .physical = physical_register, + .virtual = virtual_register_index, }, }, }, @@ -1284,11 +1350,8 @@ const InstructionSelection = struct { mir.blocks.get(instruction_selection.current_block).current_stack_index += 1; - try instruction_selection.local_value_map.putNoClobber(mir.allocator, ir_argument_instruction_index, Register{ - .index = .{ - .virtual = virtual_register_index, - }, - }); + try instruction_selection.updateValueMap(mir.allocator, ir_argument_instruction_index, result_register, true); + try instruction_selection.value_map.putNoClobber(mir.allocator, ir_argument_instruction_index, result_register); } } @@ -1314,8 +1377,57 @@ const InstructionSelection = struct { return virtual_register_index; } + + fn emitLiveInCopies(instruction_selection: *InstructionSelection, mir: *MIR, entry_block_index: BasicBlock.Index) !void { + const entry_block = mir.blocks.get(entry_block_index); + for (instruction_selection.liveins.keys(), instruction_selection.liveins.values()) |livein_physical_register, livein_virtual_register| { + const vr = mir.virtual_registers.get(livein_virtual_register); + const destination_operand = Operand{ + .id = switch (vr.register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => |t| @panic(@tagName(t)), + }, + .u = .{ + .register = .{ + .index = .{ + .virtual = livein_virtual_register, + }, + }, + }, + .flags = .{ + .type = .def, + }, + }; + const source_operand = Operand{ + .id = destination_operand.id, + .u = .{ + .register = .{ + .index = .{ + .physical = livein_physical_register, + }, + }, + }, + .flags = .{}, + }; + + const instruction_index = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try entry_block.instructions.insert(mir.allocator, 0, instruction_index); + + // TODO: addLiveIn MachineBasicBlock ? unreachable; + } + print("After livein: {}\n", .{instruction_selection.function}); + } }; +fn getRegisterClass(register: Register.Physical) Register.Class { + _ = register; +} + const Instruction = struct { id: Id, operands: ArrayList(Operand.Index), @@ -1332,6 +1444,7 @@ const Instruction = struct { mov64mr, mov32ri, mov32ri64, + movsx64rm32, movsx64rr32, ret, syscall, @@ -1385,7 +1498,7 @@ const Instruction = struct { .mir = mir, }; - if (index.valid) { + if (!index.invalid) { const operand = mir.operands.get(index); if ((!arguments.use and operand.flags.type == .use) or (!arguments.def and operand.flags.type == .def)) { it.advance(); @@ -1395,39 +1508,54 @@ const Instruction = struct { return it; } - fn next(it: *I) switch (arguments.element) { - .instruction => ?*Instruction, - .operand => ?*Operand, - } { - if (it.index.valid) { - var operand = it.mir.operands.get(it.index); - switch (arguments.element) { + const ReturnValue = switch (arguments.element) { + .instruction => Instruction, + .operand => Operand, + }; + + fn next(it: *I) ?ReturnValue.Index { + const original_operand_index = it.index; + switch (it.index.invalid) { + false => switch (arguments.element) { .instruction => { - const instruction = operand.parent; - const i_desc = it.mir.instructions.get(instruction); - print("Instruction: {}\n", .{i_desc.id}); + const original_operand = it.mir.operands.get(original_operand_index); + const instruction = original_operand.parent; + // const i_desc = it.mir.instructions.get(instruction); + // print("Instruction: {}\n", .{i_desc.id}); while (true) { it.advance(); - if (!it.index.valid) return null; - operand = it.mir.operands.get(it.index); - if (!operand.parent.eq(instruction)) break; + if (it.index.invalid) break; + const it_operand = it.mir.operands.get(it.index); + if (!it_operand.parent.eq(instruction)) break; } - return it.mir.instructions.get(operand.parent); + return instruction; }, - .operand => return operand, - } - } else { - return null; + .operand => { + it.advance(); + return original_operand_index; + }, + }, + true => return null, } } + fn nextPointer(it: *I) ?*ReturnValue { + if (it.next()) |next_index| { + const result = switch (arguments.element) { + .instruction => it.mir.instructions.get(next_index), + .operand => it.mir.operands.get(next_index), + }; + return result; + } else return null; + } + fn advance(it: *I) void { - assert(it.index.valid); + assert(!it.index.invalid); it.advanceRaw(); if (!arguments.use) { - if (it.index.valid) { + if (!it.index.invalid) { const operand = it.mir.operands.get(it.index); if (operand.flags.type == .use) { it.index = Operand.Index.invalid; @@ -1436,7 +1564,7 @@ const Instruction = struct { } } } else { - while (it.index.valid) { + while (!it.index.invalid) { const operand = it.mir.operands.get(it.index); if (!arguments.def and operand.flags.type == .def) { it.advanceRaw(); @@ -1448,7 +1576,7 @@ const Instruction = struct { } fn advanceRaw(it: *I) void { - assert(it.index.valid); + assert(!it.index.invalid); const current_operand = it.mir.operands.get(it.index); assert(current_operand.u == .register); const next_index = current_operand.u.register.list.next; @@ -1480,7 +1608,7 @@ pub const Operand = struct { fn isOnRegisterUseList(operand: *const Operand) bool { assert(operand.u == .register); - return operand.u.register.list.previous.valid; + return !operand.u.register.list.previous.invalid; } const Id = enum { @@ -1764,6 +1892,22 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .implicit_def = false, }, }, + .movsx64rm32 = .{ + .format = .mrm_source_reg, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + .flags = .{ + .implicit_def = false, + }, + }, .movsx64rr32 = .{ .format = .mrm_source_reg, .operands = &.{ @@ -1935,12 +2079,16 @@ pub const MIR = struct { var instruction_i: usize = ir_block.instructions.items.len; print("Instruction count: {}\n", .{instruction_i}); + var folded_load = false; + while (instruction_i > 0) { instruction_i -= 1; const ir_instruction_index = ir_block.instructions.items[instruction_i]; const ir_instruction = mir.ir.instructions.get(ir_instruction_index); + instruction_selection.local_value_map.clearRetainingCapacity(); + print("Instruction #{}\n", .{instruction_i}); switch (ir_instruction.*) { @@ -1972,7 +2120,9 @@ pub const MIR = struct { .u = .{ .register = physical_register, }, - .flags = .{}, + .flags = .{ + .type = .def, + }, }, Operand{ .id = operand_id, @@ -2040,73 +2190,134 @@ pub const MIR = struct { const syscall = try mir.buildInstruction(instruction_selection, .syscall, &.{}); try instruction_selection.instruction_cache.append(mir.allocator, syscall); - const physical_return_register = Register{ - .index = .{ - .physical = .rax, + const produce_syscall_return_value = switch (instruction_i == ir_block.instructions.items.len - 2) { + true => blk: { + const last_block_instruction = mir.ir.instructions.get(ir_block.instructions.items[ir_block.instructions.items.len - 1]); + break :blk switch (last_block_instruction.*) { + .@"unreachable" => false, + else => |t| @panic(@tagName(t)), + }; }, - }; - const physical_return_operand = Operand{ - .id = .gp64, - .u = .{ - .register = physical_return_register, - }, - .flags = .{ .type = .def }, + false => true, }; - const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); - const virtual_return_operand = Operand{ - .id = .gp64, - .u = .{ - .register = virtual_return_register, - }, - .flags = .{ .type = .def }, - }; + if (produce_syscall_return_value) { + const physical_return_register = Register{ + .index = .{ + .physical = .rax, + }, + }; + const physical_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = physical_return_register, + }, + .flags = .{ .type = .def }, + }; - const syscall_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ - virtual_return_operand, - physical_return_operand, - }); - try instruction_selection.instruction_cache.append(mir.allocator, syscall_result_copy); + const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const virtual_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = virtual_return_register, + }, + .flags = .{ .type = .def }, + }; + + const syscall_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + virtual_return_operand, + physical_return_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, syscall_result_copy); + } }, .sign_extend => |ir_cast_index| { const ir_sign_extend = mir.ir.casts.get(ir_cast_index); - const ir_source_instruction = ir_sign_extend.value; + assert(!folded_load); + const ir_source_instruction = blk: { + var source = ir_sign_extend.value; + const source_instruction = mir.ir.instructions.get(source); + const result = switch (source_instruction.*) { + .load => b: { + const load = mir.ir.loads.get(source_instruction.load); + folded_load = true; + break :b load.instruction; + }, + else => |t| @panic(@tagName(t)), + }; + break :blk result; + }; const destination_type = resolveType(ir_sign_extend.type); - const source_register = try instruction_selection.getRegisterForValue(mir, ir_source_instruction); const source_type = resolveType(getIrType(mir.ir, ir_source_instruction)); if (destination_type != source_type) { const instruction_id: Instruction.Id = switch (source_type) { .i32 => switch (destination_type) { - .i64 => .movsx64rr32, + .i64 => switch (folded_load) { + true => .movsx64rm32, + false => .movsx64rr32, + }, else => unreachable, }, else => |t| @panic(@tagName(t)), }; + const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); assert(instruction_descriptor.operands.len == 2); const destination_operand_index = 0; - const source_operand_index = 1; - const source_operand = mir.constrainOperandRegisterClass(instruction_descriptor, source_register, source_operand_index, .{}); const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); const destination_operand = mir.constrainOperandRegisterClass(instruction_descriptor, destination_register, destination_operand_index, .{ .type = .def }); + const source_operand_index = 1; + + const source_operand = switch (folded_load) { + true => blk: { + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_source_instruction); + const memory_id: Operand.Id = switch (source_type) { + .i32 => .i32mem, + .i64 => .i64mem, + else => |t| @panic(@tagName(t)), + }; + const operand = Operand{ + .id = memory_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, + }, + .flags = .{}, + }; + break :blk operand; + }, + false => blk: { + const source_register = try instruction_selection.getRegisterForValue(mir, ir_source_instruction); + break :blk mir.constrainOperandRegisterClass(instruction_descriptor, source_register, source_operand_index, .{}); + }, + }; const sign_extend = try mir.buildInstruction(instruction_selection, instruction_id, &.{ destination_operand, source_operand, }); + try instruction_selection.instruction_cache.append(mir.allocator, sign_extend); + + try instruction_selection.updateValueMap(mir.allocator, ir_instruction_index, destination_register, false); } else { unreachable; } }, .load => |ir_load_index| { + if (folded_load) { + folded_load = false; + continue; + } + const ir_load = mir.ir.loads.get(ir_load_index); - const ir_destination = ir_load.instruction; - const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_destination); - const value_type = resolveType(getIrType(mir.ir, ir_destination)); + const ir_source = ir_load.instruction; + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_source); + const value_type = resolveType(getIrType(mir.ir, ir_source)); switch (value_type) { inline .i32, @@ -2151,6 +2362,8 @@ pub const MIR = struct { source_operand, }); try instruction_selection.instruction_cache.append(mir.allocator, load); + + try instruction_selection.updateValueMap(mir.allocator, ir_instruction_index, destination_register, false); }, else => |t| @panic(@tagName(t)), } @@ -2338,35 +2551,9 @@ pub const MIR = struct { instruction_selection.instruction_cache.clearRetainingCapacity(); } - - instruction_selection.local_value_map.clearRetainingCapacity(); } - // for (ir_function.blocks.items, function.blocks.items) |ir_block_index, block_index| { - // const ir_block = mir.ir.blocks.get(ir_block_index); - // instruction_selection.current_block = block_index; - // - // for (ir_block.instructions.items) |ir_instruction_index| { - // const ir_instruction = mir.ir.instructions.get(ir_instruction_index); - // switch (ir_instruction.*) { - // .load_string_literal => |ir_load_string_literal_index| { - // // const ir_string_literal = mir.ir.string_literals.get(ir_load_string_literal_index); - // const virtual_register = try mir.createVirtualRegister(Register.Class.gp64); - // const virtual_operand = Operand.new(.gp64, virtual_register, .{ .type = .def }); - // try mir.append(instruction_selection, .lea64r, &.{ - // virtual_operand, - // Operand.Lea64Mem.stringLiteral(ir_load_string_literal_index), - // }); - // - // try instruction_selection.updateValueMap(allocator, ir_instruction_index, virtual_register); - // }, - // .@"unreachable" => try mir.append(instruction_selection, .ud2, &.{}), - // else => |t| @panic(@tagName(t)), - // } - // } - // - // instruction_selection.local_value_map.clearRetainingCapacity(); - // } + try instruction_selection.emitLiveInCopies(mir, function.blocks.items[0]); print("=========\n{}=========\n", .{function}); } @@ -2400,7 +2587,7 @@ pub const MIR = struct { const operand = mir.operands.get(operand_index); assert(operand.u == .register); assert(!std.meta.eql(operand.u.register.index, register)); - operand.flags.renamable = true; + operand.flags.renamable = false; mir.removeRegisterOperandFromUseList(instruction_selection, operand); operand.u.register.index = register; mir.addRegisterOperandFromUseList(instruction_selection, operand_index); @@ -2412,8 +2599,8 @@ pub const MIR = struct { const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); const head_index = head_index_ptr.*; - switch (head_index.valid) { - true => { + switch (head_index.invalid) { + false => { const head_operand = mir.operands.get(head_index); assert(std.meta.eql(head_operand.u.register.index, operand.u.register.index)); const last_operand_index = head_operand.u.register.list.previous; @@ -2433,18 +2620,19 @@ pub const MIR = struct { }, } }, - false => { + true => { operand.u.register.list.previous = operand_index; operand.u.register.list.next = Operand.Index.invalid; head_index_ptr.* = operand_index; }, } } + fn removeRegisterOperandFromUseList(mir: *MIR, instruction_selection: *InstructionSelection, operand: *Operand) void { assert(operand.isOnRegisterUseList()); const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); const head_index = head_index_ptr.*; - assert(head_index.valid); + assert(!head_index.invalid); const operand_previous = operand.u.register.list.previous; const operand_next = operand.u.register.list.next; @@ -2457,9 +2645,9 @@ pub const MIR = struct { previous.u.register.list.next = operand_next; } - const next = switch (operand_next.valid) { - true => mir.operands.get(operand_next), - false => head, + const next = switch (operand_next.invalid) { + false => mir.operands.get(operand_next), + true => head, }; next.u.register.list.previous = operand_previous; @@ -2632,7 +2820,7 @@ pub const MIR = struct { break :blk null; }; // TODO: handle allocation error here - register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction, live_register, hint, false) catch unreachable; + register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction_index, live_register, hint, false) catch unreachable; } live_register.last_use = instruction_index; @@ -2643,12 +2831,10 @@ pub const MIR = struct { fn isRegisterInClass(register: Register.Physical, register_class: Register.Class) bool { const result = std.mem.indexOfScalar(Register.Physical, registers_by_class.get(register_class), register) != null; - print("Is {s} in class {s}: {}\n", .{ @tagName(register), @tagName(register_class), result }); return result; } - fn allocateVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction: *Instruction, live_register: *LiveRegister, maybe_hint: ?Register, look_at_physical_register_uses: bool) !void { - _ = instruction; + fn allocateVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, live_register: *LiveRegister, maybe_hint: ?Register, look_at_physical_register_uses: bool) !void { assert(live_register.physical == .no_register); const virtual_register = live_register.virtual; const register_class = mir.virtual_registers.get(live_register.virtual).register_class; @@ -2656,11 +2842,7 @@ pub const MIR = struct { if (maybe_hint) |hint_register| { if (hint_register.index == .physical // TODO : and isAllocatable - and isRegisterInClass(hint_register.index.physical, register_class) - - // TODO and !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { - - ) { + and isRegisterInClass(hint_register.index.physical, register_class) and !register_allocator.isRegisterUsedInInstruction(hint_register.index.physical, look_at_physical_register_uses)) { if (register_allocator.register_states.get(hint_register.index.physical) == .free) { register_allocator.assignVirtualToPhysicalRegister(live_register, hint_register.index.physical); return; @@ -2670,8 +2852,21 @@ pub const MIR = struct { const maybe_hint2 = register_allocator.traceCopies(mir, instruction_selection, virtual_register); if (maybe_hint2) |hint| { - _ = hint; - unreachable; + // TODO + const allocatable = true; + if (hint == .physical and allocatable and isRegisterInClass(hint.physical, register_class) and !register_allocator.isRegisterUsedInInstruction(hint.physical, look_at_physical_register_uses)) { + const physical_register = hint.physical; + if (register_allocator.register_states.get(physical_register) == .free) { + register_allocator.assignVirtualToPhysicalRegister(live_register, physical_register); + return; + } else { + print("Second hint {s} not free\n", .{@tagName(physical_register)}); + } + } else { + unreachable; + } + } else { + print("Can't take hint for VR{} for instruction #{}\n", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); } const register_class_members = registers_by_class.get(register_class); @@ -2679,16 +2874,14 @@ pub const MIR = struct { var best_cost: u32 = SpillCost.impossible; var best_register = Register.Physical.no_register; - print("Candidates for {s}: ", .{@tagName(register_class)}); - for (register_class_members) |candidate_register| { - print("{s}, ", .{@tagName(candidate_register)}); - } + // print("Candidates for {s}: ", .{@tagName(register_class)}); + // for (register_class_members) |candidate_register| { + // print("{s}, ", .{@tagName(candidate_register)}); + // } print("\n", .{}); for (register_class_members) |candidate_register| { - print("Checking candidate register {s}\n", .{@tagName(candidate_register)}); if (register_allocator.isRegisterUsedInInstruction(candidate_register, look_at_physical_register_uses)) continue; const spill_cost = register_allocator.computeSpillCost(candidate_register); - print("Spill cost: {}\n", .{spill_cost}); if (spill_cost == 0) { register_allocator.assignVirtualToPhysicalRegister(live_register, candidate_register); @@ -2753,8 +2946,43 @@ pub const MIR = struct { } } - fn traceCopies(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) ?Register.Index { + fn traceCopyChain(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, register: Register) ?Register.Index { _ = register_allocator; + const chain_length_limit = 3; + _ = chain_length_limit; + var chain_try_count: u32 = 0; + _ = chain_try_count; + while (true) { + switch (register.index) { + .physical => return register.index, + .virtual => |vri| { + const virtual_head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = vri, + }, + }); + + var vdef = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, virtual_head_index_ptr.*); + + const vdef_instruction = vdef.nextPointer() orelse break; + if (vdef.nextPointer()) |_| break; + + switch (vdef_instruction.id) { + else => |t| @panic(@tagName(t)), + } + unreachable; + }, + } + } + + return null; + } + + fn traceCopies(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) ?Register.Index { const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ .index = .{ .virtual = virtual_register_index, @@ -2766,8 +2994,27 @@ pub const MIR = struct { .element = .instruction, }).new(mir, head_index_ptr.*); - while (define_instructions.next()) |_| { - unreachable; + const definition_limit = 3; + var try_count: u32 = 0; + while (define_instructions.next()) |instruction_index| { + const instruction = mir.instructions.get(instruction_index); + switch (instruction.id) { + .mov32rm => unreachable, + .copy => { + const operand_index = instruction.operands.items[1]; + const operand = mir.operands.get(operand_index); + + if (register_allocator.traceCopyChain(mir, instruction_selection, operand.u.register)) |register| { + return register; + } + + print("Missed oportunity for register allocation tracing copy chain for VR{}\n", .{virtual_register_index.uniqueInteger()}); + }, + else => |t| @panic(@tagName(t)), + } + + try_count += 1; + if (try_count >= definition_limit) break; } return null; @@ -2786,9 +3033,16 @@ pub const MIR = struct { // TODO: debug info } + fn usePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const displaced_any = try register_allocator.displacePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + register_allocator.register_states.set(physical_register, .preassigned); + register_allocator.markUsedRegisterInInstruction(physical_register); + return displaced_any; + } + fn displacePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { const state = register_allocator.register_states.getPtr(physical_register); - print("Trying to displace register {s} with state {s}\n", .{ @tagName(physical_register), @tagName(state.*) }); + // print("Trying to displace register {s} with state {s}\n", .{ @tagName(physical_register), @tagName(state.*) }); return switch (state.*) { .free => false, .preassigned => blk: { @@ -2799,6 +3053,7 @@ pub const MIR = struct { const live_reg = register_allocator.live_virtual_registers.getPtr(virtual_register).?; const before = mir.getNextInstructionIndex(instruction_index); try register_allocator.reload(mir, instruction_selection, before, virtual_register, physical_register); + state.* = .free; live_reg.physical = .no_register; live_reg.reloaded = true; break :blk true; @@ -2861,16 +3116,24 @@ pub const MIR = struct { fn defineVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, operand_index: Operand.Index, virtual_register: Register.Virtual.Index, look_at_physical_register_uses: bool) !bool { const instruction = mir.instructions.get(instruction_index); + const operand = mir.operands.get(operand_index); const gop = try register_allocator.live_virtual_registers.getOrPut(mir.allocator, virtual_register); if (!gop.found_existing) { gop.value_ptr.* = .{ .virtual = virtual_register, }; - unreachable; + if (!operand.flags.dead_or_kill) { + var live_out = false; + if (live_out) { + // TODO + } else { + operand.flags.dead_or_kill = true; + } + } } const live_register = gop.value_ptr; if (live_register.physical == .no_register) { - try register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction, live_register, null, look_at_physical_register_uses); + try register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction_index, live_register, null, look_at_physical_register_uses); } else { assert(!register_allocator.isRegisterUsedInInstruction(live_register.physical, look_at_physical_register_uses)); } @@ -2881,7 +3144,7 @@ pub const MIR = struct { const instruction_descriptor = instruction_descriptors.get(instruction.id); if (!instruction_descriptor.flags.implicit_def) { const spill_before = mir.getNextInstructionIndex(instruction_index); - const kill = !live_register.last_use.valid; + const kill = live_register.last_use.invalid; try register_allocator.spill(mir, instruction_selection, spill_before, virtual_register, physical_register, kill, live_register.live_out); live_register.last_use = Instruction.Index.invalid; @@ -2950,7 +3213,7 @@ pub const MIR = struct { const limit = 8; var count: u32 = 0; - while (iterator.next()) |use_instruction| { + while (iterator.nextPointer()) |use_instruction| { if (!use_instruction.parent.eq(instruction_selection.current_block)) { register_allocator.may_live_across_blocks.set(virtual_register_index.uniqueInteger()); // TODO: return !basic_block.successorsEmpty() @@ -3041,14 +3304,177 @@ pub const MIR = struct { pub fn allocateRegisters(mir: *MIR) !void { print("\n[REGISTER ALLOCATION]\n\n", .{}); const function_count = mir.functions.len; - _ = function_count; var function_iterator = mir.functions.iterator(); - _ = function_iterator; const register_count = @typeInfo(Register.Physical).Enum.fields.len; _ = register_count; const register_unit_count = 173; _ = register_unit_count; + for (0..function_count) |function_index| { + const function = function_iterator.nextPointer().?; + const instruction_selection = &mir.instruction_selections.items[function_index]; + print("Allocating registers for {}\n", .{function}); + + var block_i: usize = function.blocks.items.len; + var register_allocator = try RegisterAllocator.init(mir, instruction_selection); + + while (block_i > 0) { + block_i -= 1; + + const block_index = function.blocks.items[block_i]; + const block = mir.blocks.get(block_index); + + var instruction_i: usize = block.instructions.items.len; + + while (instruction_i > 0) { + instruction_i -= 1; + + const instruction_index = block.instructions.items[instruction_i]; + const instruction = mir.instructions.get(instruction_index); + print("===============\nInstruction {} (#{})\n", .{ instruction_i, instruction_index.uniqueInteger() }); + print("{}\n", .{function}); + + register_allocator.used_in_instruction = RegisterBitset.initEmpty(); + + const max_operand_count = 32; + var define_bitset = std.StaticBitSet(max_operand_count).initEmpty(); + var physical_register_bitset = std.StaticBitSet(max_operand_count).initEmpty(); + var register_mask_bitset = std.StaticBitSet(max_operand_count).initEmpty(); + var virtual_register_define = false; + var assign_live_throughs = false; + + for (instruction.operands.items, 0..) |operand_index, operand_i| { + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| { + const is_define = operand.flags.type == .def; + const is_physical = register.index == .physical; + if (is_define and !is_physical) { + virtual_register_define = true; + } + define_bitset.setValue(operand_i, is_define); + physical_register_bitset.setValue(operand_i, is_physical); + if (is_physical and is_define) { + const physical_register = register.index.physical; + const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + }, + else => {}, + } + } + + if (define_bitset.count() > 0) { + if (virtual_register_define) { + var rearranged_implicit_operands = true; + if (assign_live_throughs) { + unreachable; + } else { + while (rearranged_implicit_operands) { + rearranged_implicit_operands = false; + + for (instruction.operands.items) |operand_index| { + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => switch (register.index) { + .virtual => |virtual_register| { + rearranged_implicit_operands = try register_allocator.defineVirtualRegister(mir, instruction_selection, instruction_index, operand_index, virtual_register, false); + if (rearranged_implicit_operands) { + break; + } + }, + .physical => {}, + }, + else => {}, + }, + .lea64mem => |lea64mem| { + assert(lea64mem.gp64 == null); + assert(lea64mem.scale_reg == null); + }, + else => {}, + } + } + } + } + } + + var operand_i = instruction.operands.items.len; + while (operand_i > 0) { + operand_i -= 1; + + if (define_bitset.isSet(operand_i) and physical_register_bitset.isSet(operand_i)) { + const operand_index = instruction.operands.items[operand_i]; + const operand = mir.operands.get(operand_index); + const physical_register = operand.u.register.index.physical; + register_allocator.freePhysicalRegister(physical_register); + register_allocator.unmarkUsedRegisterInInstruction(physical_register); + } + } + } + + if (register_mask_bitset.count() > 0) { + unreachable; + } + + // Physical register use + if (physical_register_bitset.count() > 0) { + for (instruction.operands.items, 0..) |operand_index, operand_i| { + if (!define_bitset.isSet(operand_i) and physical_register_bitset.isSet(operand_i)) { + const operand = mir.operands.get(operand_index); + const physical_register = operand.u.register.index.physical; + if (!register_allocator.reserved.contains(physical_register)) { + const displaced_any = try register_allocator.usePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + } + } + } + + var rearranged_implicit_operands = true; + while (rearranged_implicit_operands) { + rearranged_implicit_operands = false; + for (instruction.operands.items, 0..) |operand_index, operand_i| { + if (!define_bitset.isSet(operand_i)) { + const operand = mir.operands.get(operand_index); + if (operand.u == .register and operand.u.register.index == .virtual) { + const virtual_register = operand.u.register.index.virtual; + rearranged_implicit_operands = try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register, @intCast(operand_i)); + if (rearranged_implicit_operands) break; + } + } + } + } + + if (instruction.id == .copy and instruction.operands.items.len == 2) { + const dst_register = mir.operands.get(instruction.operands.items[0]).u.register.index; + const src_register = mir.operands.get(instruction.operands.items[1]).u.register.index; + + if (std.meta.eql(dst_register, src_register)) { + try register_allocator.coalesced.append(mir.allocator, instruction_index); + print("Avoiding copy...\n", .{}); + } + } + } + + for (register_allocator.coalesced.items) |coalesced| { + for (block.instructions.items, 0..) |instruction_index, i| { + if (coalesced.eq(instruction_index)) { + const result = block.instructions.orderedRemove(i); + assert(result.eq(coalesced)); + break; + } + } else unreachable; + } + + print("{}\n============\n", .{function}); + } + } + // for (0..function_count) |function_index| { // const function = function_iterator.nextPointer().?; // const instruction_selection = &mir.instruction_selections.items[function_index]; @@ -3314,10 +3740,16 @@ pub const MIR = struct { } fn getRegisterListHead(mir: *MIR, instruction_selection: *InstructionSelection, register: Register) *Operand.Index { - return switch (register.index) { - .physical => |physical| instruction_selection.physical_register_use_or_definition_list.getPtr(physical), - .virtual => |virtual_register_index| &mir.virtual_registers.get(virtual_register_index).use_def_list_head, - }; + switch (register.index) { + .physical => |physical| { + const operand_index = instruction_selection.physical_register_use_or_definition_list.getPtr(physical); + return operand_index; + }, + .virtual => |virtual_register_index| { + const virtual_register = mir.virtual_registers.get(virtual_register_index); + return &virtual_register.use_def_list_head; + }, + } } const Function = struct { @@ -3338,7 +3770,7 @@ pub const MIR = struct { try writer.print("{s}", .{@tagName(instruction.id)}); for (instruction.operands.items, 0..) |operand_index, i| { const operand = function.mir.operands.get(operand_index); - try writer.writeByte(' '); + try writer.print(" O{} ", .{operand_index.uniqueInteger()}); switch (operand.u) { .register => |register| { switch (register.index) { @@ -3346,6 +3778,13 @@ pub const MIR = struct { .virtual => |virtual| try writer.print("VR{}", .{virtual.uniqueInteger()}), } }, + .memory => |memory| { + const base = memory.addressing_mode.base; + switch (base) { + .register_base => unreachable, + .frame_index => |frame_index| try writer.print("SF{}", .{frame_index}), + } + }, else => try writer.writeAll(@tagName(operand.u)), } // switch (operand.u) { @@ -3405,12 +3844,18 @@ pub const MIR = struct { => {}, } } + instruction_allocation.ptr.* = .{ .id = instruction, .operands = list, .parent = instruction_selection.current_block, }; + if (instruction == .copy) { + const i = instruction_allocation.ptr.*; + print("Built copy: DST: {}. SRC: {}\n", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); + } + return instruction_allocation.index; } diff --git a/src/data_structures.zig b/src/data_structures.zig index 315c21d..85ea8d0 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -40,10 +40,10 @@ pub fn BlockList(comptime T: type) type { index: u6, block: u24, _reserved: bool = false, - valid: bool = true, + invalid: bool = false, pub const invalid = Index{ - .valid = false, + .invalid = true, .index = 0, .block = 0, }; @@ -53,7 +53,7 @@ pub fn BlockList(comptime T: type) type { } pub fn uniqueInteger(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return @as(u30, @truncate(@as(u32, @bitCast(index)))); } @@ -114,7 +114,7 @@ pub fn BlockList(comptime T: type) type { } pub fn get(list: *List, index: Index) *T { - assert(index.valid); + assert(!index.invalid); return &list.blocks.items[index.block].items[index.index]; } diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index ce296d6..b839d0f 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -214,23 +214,23 @@ const Analyzer = struct { fn processCall(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Call.Index { const node = analyzer.getScopeNode(scope_index, node_index); print("Node index: {}. Left index: {}\n", .{ node_index.uniqueInteger(), node.left.uniqueInteger() }); - assert(node.left.valid); - const left_value_index = switch (node.left.valid) { + assert(!node.left.invalid); + const left_value_index = switch (!node.left.invalid) { true => blk: { const member_or_namespace_node_index = node.left; - assert(member_or_namespace_node_index.valid); + assert(!member_or_namespace_node_index.invalid); const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, member_or_namespace_node_index); break :blk this_value_allocation.index; }, false => unreachable, //Value.Index.invalid, }; - const left_type = switch (left_value_index.valid) { - true => switch (analyzer.module.values.get(left_value_index).*) { + const left_type = switch (left_value_index.invalid) { + false => switch (analyzer.module.values.get(left_value_index).*) { .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.types.get(analyzer.module.functions.get(function_index).prototype).function).return_type, else => |t| @panic(@tagName(t)), }, - false => Type.Index.invalid, + true => Type.Index.invalid, }; const arguments_index = switch (node.id) { .call, .call_two => |call_tag| (try analyzer.module.argument_lists.append(analyzer.allocator, .{ @@ -340,8 +340,8 @@ const Analyzer = struct { for (switch_case_node_list, 0..) |switch_case_node_index, index| { _ = index; const switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_index); - switch (switch_case_node.left.valid) { - true => { + switch (switch_case_node.left.invalid) { + false => { const switch_case_condition_node = analyzer.getScopeNode(scope_index, switch_case_node.left); var switch_case_group = ArrayList(u32){}; switch (switch_case_condition_node.id) { @@ -390,7 +390,7 @@ const Analyzer = struct { switch_case_groups.appendAssumeCapacity(switch_case_group); }, - false => { + true => { unreachable; // if (existing_enums.items.len == enum_type.fields.items.len) { // unreachable; @@ -433,9 +433,9 @@ const Analyzer = struct { fn processAssignment(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value { const node = analyzer.getScopeNode(scope_index, node_index); assert(node.id == .assign); - const assignment = switch (node.left.valid) { + const assignment = switch (node.left.invalid) { // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` - false => { + true => { var result = Value{ .unresolved = .{ .node_index = node.right, @@ -446,7 +446,7 @@ const Analyzer = struct { return result; }, - true => { + false => { // const id = analyzer.tokenIdentifier(.token); // print("id: {s}\n", .{id}); // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); @@ -470,9 +470,9 @@ const Analyzer = struct { fn processReturn(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) !Value { const node = analyzer.getScopeNode(scope_index, node_index); - const return_expression: Value.Index = switch (node_index.valid) { + const return_expression: Value.Index = switch (node_index.invalid) { // TODO: expect type - true => ret: { + false => ret: { const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); return_value_allocation.ptr.* = .{ .unresolved = .{ @@ -482,7 +482,7 @@ const Analyzer = struct { try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, node.left); break :ret return_value_allocation.index; }, - false => @panic("TODO: ret void"), + true => @panic("TODO: ret void"), }; const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ @@ -501,7 +501,7 @@ const Analyzer = struct { fn lookupDeclarationInCurrentAndParentScopes(analyzer: *Analyzer, scope_index: Scope.Index, identifier_hash: u32) ?DeclarationLookup { var scope_iterator = scope_index; - while (scope_iterator.valid) { + while (!scope_iterator.invalid) { const scope = analyzer.module.scopes.get(scope_iterator); if (scope.declarations.get(identifier_hash)) |declaration_index| { return .{ @@ -535,8 +535,8 @@ const Analyzer = struct { const declaration = analyzer.module.declarations.get(declaration_index); // Up until now, only arguments have no initialization value - const typecheck_result = switch (declaration.init_value.valid) { - true => blk: { + const typecheck_result = switch (declaration.init_value.invalid) { + false => blk: { const init_value = analyzer.module.values.get(declaration.init_value); print("Declaration found: {}\n", .{init_value}); const is_unresolved = init_value.* == .unresolved; @@ -560,14 +560,14 @@ const Analyzer = struct { const typecheck_result = try analyzer.typeCheck(expect_type, declaration.type); if (init_value.isComptime() and declaration.mutability == .@"const") { - assert(declaration.init_value.valid); + assert(!declaration.init_value.invalid); assert(typecheck_result == .success); return declaration.init_value; } break :blk typecheck_result; }, - false => try analyzer.typeCheck(expect_type, declaration.type), + true => try analyzer.typeCheck(expect_type, declaration.type), }; const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ @@ -580,7 +580,7 @@ const Analyzer = struct { else => declaration.type, }, .flexible_integer => blk: { - assert(declaration.type.valid); + assert(!declaration.type.invalid); break :blk declaration.type; }, }, @@ -627,7 +627,7 @@ const Analyzer = struct { }, .compiler_intrinsic => { const argument_list_node_index = node.left; - assert(argument_list_node_index.valid); + assert(!argument_list_node_index.invalid); const node_list_node = analyzer.getScopeNode(scope_index, argument_list_node_index); const node_list = analyzer.getScopeNodeList(scope_index, node_list_node); @@ -692,7 +692,7 @@ const Analyzer = struct { }, false => false_block: { const file_type = import_file.file.ptr.type; - assert(file_type.valid); + assert(!file_type.invalid); break :false_block file_type; }, }, @@ -714,7 +714,7 @@ const Analyzer = struct { }; const number_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_nodes.items[0]); const number = number_allocation.index; - assert(number.valid); + assert(!number.invalid); var arguments = std.mem.zeroes([6]Value.Index); for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { const argument_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_node_index); @@ -840,7 +840,7 @@ const Analyzer = struct { const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); switch (left_allocation.ptr.*) { .type => |type_index| { - if (type_index.valid) { + if (!type_index.invalid) { const left_type = analyzer.module.types.get(type_index); switch (left_type.*) { .@"struct" => |struct_index| { @@ -930,7 +930,7 @@ const Analyzer = struct { const field_node = analyzer.getScopeNode(scope_index, field_node_index); const identifier = analyzer.tokenIdentifier(scope_index, field_node.token); print("Enum field: {s}\n", .{identifier}); - assert(!field_node.left.valid); + assert(field_node.left.invalid); const enum_hash_name = try analyzer.processIdentifier(identifier); @@ -1049,9 +1049,9 @@ const Analyzer = struct { const arguments_node_index = simple_function_prototype_node.left; const return_type_node_index = simple_function_prototype_node.right; - const arguments: ?[]const Declaration.Index = switch (arguments_node_index.valid) { - false => null, - true => blk: { + const arguments: ?[]const Declaration.Index = switch (arguments_node_index.invalid) { + true => null, + false => blk: { const argument_list_node = analyzer.getScopeNode(scope_index, arguments_node_index); // print("Function prototype argument list node: {}\n", .{function_prototype_node.left.uniqueInteger()}); const argument_node_list = switch (argument_list_node.id) { @@ -1161,7 +1161,7 @@ const Analyzer = struct { const scope = new_scope.ptr; const scope_index = new_scope.index; - const is_file = !parent_scope_index.valid; + const is_file = parent_scope_index.invalid; assert(is_file); const struct_allocation = try analyzer.module.structs.append(analyzer.allocator, .{ @@ -1171,7 +1171,7 @@ const Analyzer = struct { .@"struct" = struct_allocation.index, }); - if (!parent_scope_index.valid) { + if (parent_scope_index.invalid) { file.type = type_allocation.index; } @@ -1270,14 +1270,14 @@ const Analyzer = struct { fn symbolDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index, scope_type: ScopeType) !Declaration.Index { const declaration_node = analyzer.getScopeNode(scope_index, node_index); assert(declaration_node.id == .simple_symbol_declaration); - const expect_type = switch (declaration_node.left.valid) { - true => switch (scope_type) { + const expect_type = switch (declaration_node.left.invalid) { + false => switch (scope_type) { .local => ExpectType{ .type_index = try analyzer.resolveType(scope_index, declaration_node.left), }, .global => ExpectType.none, }, - false => ExpectType.none, + true => ExpectType.none, }; const mutability: Compilation.Mutability = switch (analyzer.getScopeToken(scope_index, declaration_node.token).id) { .fixed_keyword_const => .@"const", @@ -1292,7 +1292,7 @@ const Analyzer = struct { } // TODO: Check if it is a keyword - assert(declaration_node.right.valid); + assert(!declaration_node.right.invalid); const argument = null; assert(argument == null); diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index bd97791..fa0ef7b 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -36,24 +36,24 @@ pub const Node = packed struct(u128) { pub const Index = packed struct(u32) { value: u31, - valid: bool = true, + invalid: bool = false, pub const invalid = Index{ .value = 0, - .valid = false, + .invalid = true, }; pub fn get(index: Index) ?u32 { - return if (index.valid) index.value else null; + return if (index.invvalid) null else index.value; } pub fn unwrap(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return index.value; } pub fn uniqueInteger(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return index.value; } }; @@ -677,7 +677,7 @@ const Analyzer = struct { fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { var result = try analyzer.prefixExpression(); - if (result.valid) { + if (!result.invalid) { const prefix_node = analyzer.nodes.items[result.unwrap()]; std.debug.print("Prefix: {}\n", .{prefix_node.id}); } @@ -906,7 +906,7 @@ const Analyzer = struct { while (true) { const suffix_operator = try analyzer.suffixOperator(result); - if (suffix_operator.valid) { + if (!suffix_operator.invalid) { result = suffix_operator; } else { if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { @@ -1183,7 +1183,7 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, source_file: []const }); assert(node_index.value == 0); - assert(node_index.valid); + assert(!node_index.invalid); std.debug.print("Start Parsing file root members\n", .{}); const members = try analyzer.containerMembers(); From b24bd8221e39fdd3a81f4d7b126f7e1cdfebddd7 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 8 Nov 2023 09:27:45 -0600 Subject: [PATCH 3/6] Add custom logger --- src/Compilation.zig | 166 +++++++++++++++++++- src/backend/emit.zig | 11 +- src/backend/intermediate_representation.zig | 22 ++- src/backend/x86_64.zig | 88 +++++++---- src/frontend/lexical_analyzer.zig | 17 +- src/frontend/semantic_analyzer.zig | 104 ++++++------ src/frontend/syntactic_analyzer.zig | 115 ++++++++------ src/main.zig | 65 +------- 8 files changed, 365 insertions(+), 223 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 765f7f7..90792dc 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -3,7 +3,6 @@ const Compilation = @This(); const std = @import("std"); const assert = std.debug.assert; const equal = std.mem.eql; -const print = std.debug.print; const Allocator = std.mem.Allocator; @@ -40,7 +39,104 @@ build_directory: std.fs.Dir, const cache_dir_name = "cache"; const installation_dir_name = "installation"; -pub fn init(allocator: Allocator) !*Compilation { +const ArgumentParsingError = error{ + main_package_path_not_specified, +}; + +fn reportUnterminatedArgumentError(string: []const u8) noreturn { + std.debug.panic("Unterminated argument: {s}", .{string}); +} + +fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor { + const arguments = (try std.process.argsAlloc(allocator))[1..]; + + var maybe_executable_path: ?[]const u8 = null; + var maybe_main_package_path: ?[]const u8 = null; + var target_triplet: []const u8 = "x86_64-linux-gnu"; + + var i: usize = 0; + while (i < arguments.len) : (i += 1) { + const current_argument = arguments[i]; + if (equal(u8, current_argument, "-o")) { + if (i + 1 != arguments.len) { + maybe_executable_path = arguments[i + 1]; + assert(maybe_executable_path.?.len != 0); + i += 1; + } else { + reportUnterminatedArgumentError(current_argument); + } + } else if (equal(u8, current_argument, "-target")) { + if (i + 1 != arguments.len) { + target_triplet = arguments[i + 1]; + i += 1; + } else { + reportUnterminatedArgumentError(current_argument); + } + } else if (equal(u8, current_argument, "-log")) { + if (i + 1 != arguments.len) { + i += 1; + + var log_argument_iterator = std.mem.splitScalar(u8, arguments[i], ','); + + while (log_argument_iterator.next()) |log_argument| { + var log_argument_splitter = std.mem.splitScalar(u8, log_argument, '.'); + const log_scope_candidate = log_argument_splitter.next() orelse unreachable; + var recognized_scope = false; + + inline for (@typeInfo(LoggerScope).Enum.fields) |logger_scope_enum_field| { + const log_scope = @field(LoggerScope, logger_scope_enum_field.name); + + if (equal(u8, @tagName(log_scope), log_scope_candidate)) { + const LogScope = getLoggerScopeType(log_scope); + + if (log_argument_splitter.next()) |particular_log_candidate| { + var recognized_particular = false; + inline for (@typeInfo(LogScope.Logger).Enum.fields) |particular_log_field| { + const particular_log = @field(LogScope.Logger, particular_log_field.name); + + if (equal(u8, particular_log_candidate, @tagName(particular_log))) { + LogScope.Logger.bitset.setPresent(particular_log, true); + recognized_particular = true; + } + } else if (!recognized_particular) std.debug.panic("Unrecognized particular log \"{s}\" in scope {s}", .{ particular_log_candidate, @tagName(log_scope) }); + } else { + unreachable; + } + + logger_bitset.setPresent(log_scope, true); + + recognized_scope = true; + } + } else if (!recognized_scope) std.debug.panic("Unrecognized log scope: {s}", .{log_scope_candidate}); + } + } else { + reportUnterminatedArgumentError(current_argument); + } + } else { + maybe_main_package_path = current_argument; + } + } + + const main_package_path = maybe_main_package_path orelse return error.main_package_path_not_specified; + + const executable_path = maybe_executable_path orelse blk: { + const executable_name = std.fs.path.basename(main_package_path[0 .. main_package_path.len - "/main.nat".len]); + assert(executable_name.len > 0); + const result = try std.mem.concat(allocator, u8, &.{ "nat/", executable_name }); + break :blk result; + }; + + const cross_target = try std.zig.CrossTarget.parse(.{ .arch_os_abi = target_triplet }); + const target = cross_target.toTarget(); + + return .{ + .main_package_path = main_package_path, + .executable_path = executable_path, + .target = target, + }; +} + +pub fn init(allocator: Allocator) !void { const compilation: *Compilation = try allocator.create(Compilation); const self_exe_path = try std.fs.selfExePathAlloc(allocator); @@ -56,7 +152,9 @@ pub fn init(allocator: Allocator) !*Compilation { try compilation.build_directory.makePath(cache_dir_name); try compilation.build_directory.makePath(installation_dir_name); - return compilation; + const compilation_descriptor = try parseArguments(allocator); + + try compilation.compileModule(compilation_descriptor); } pub const Struct = struct { @@ -617,7 +715,7 @@ pub const Module = struct { }; pub fn importFile(module: *Module, allocator: Allocator, current_file_index: File.Index, import_name: []const u8) !ImportPackageResult { - print("import: '{s}'\n", .{import_name}); + logln(.compilation, .import, "import: '{s}'\n", .{import_name}); if (equal(u8, import_name, "std")) { return module.importPackage(allocator, module.main_package.dependencies.get("std").?); } @@ -672,7 +770,7 @@ pub const Module = struct { .relative_path = relative_path, .package = package, }); - std.debug.print("Adding file #{}: {s}\n", .{ file_allocation.index.uniqueInteger(), full_path }); + logln(.compilation, .new_file, "Adding file #{}: {s}\n", .{ file_allocation.index.uniqueInteger(), full_path }); path_lookup.value_ptr.* = file_allocation.ptr; // break :blk file; break :blk .{ @@ -691,7 +789,7 @@ pub const Module = struct { pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); - print("Import full path: {s}\n", .{full_path}); + logln(.compilation, .import, "Import full path: {s}\n", .{full_path}); const import_file = try module.getFile(allocator, full_path, package.source_path, package); try import_file.ptr.addPackageReference(allocator, package); @@ -719,9 +817,7 @@ pub const Module = struct { file.status = .loaded_into_memory; try file.lex(allocator, file_index); - print("Start of parsing file #{}\n", .{file_index.uniqueInteger()}); try file.parse(allocator, file_index); - print("End of parsing file #{}\n", .{file_index.uniqueInteger()}); } fn getString(map: *StringKeyMap([]const u8), key: u32) ?[]const u8 { @@ -1020,3 +1116,57 @@ pub const File = struct { file.status = .parsed; } }; + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, return_address: ?usize) noreturn { + std.builtin.default_panic(message, stack_trace, return_address); +} + +const LoggerScope = enum { + compilation, + lexer, + parser, + sema, + ir, + codegen, +}; + +const Logger = enum { + import, + new_file, + arguments, + var bitset = std.EnumSet(Logger).initEmpty(); +}; + +fn getLoggerScopeType(comptime logger_scope: LoggerScope) type { + comptime { + return switch (logger_scope) { + .compilation => @This(), + .lexer => lexical_analyzer, + .parser => syntactic_analyzer, + .sema => semantic_analyzer, + .ir => intermediate_representation, + .codegen => emit, + }; + } +} + +var logger_bitset = std.EnumSet(LoggerScope).initEmpty(); + +var writer = std.io.getStdErr().writer(); + +fn shouldLog(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger) bool { + return logger_bitset.contains(logger_scope) and getLoggerScopeType(logger_scope).Logger.bitset.contains(logger); +} + +pub fn logln(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger, comptime format: []const u8, arguments: anytype) void { + if (shouldLog(logger_scope, logger)) { + log(logger_scope, logger, format, arguments); + writer.writeByte('\n') catch unreachable; + } +} + +pub fn log(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger, comptime format: []const u8, arguments: anytype) void { + if (shouldLog(logger_scope, logger)) { + std.fmt.format(writer, format, arguments) catch unreachable; + } +} diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 87876c5..ce6aca2 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -105,10 +105,15 @@ pub fn InstructionSelector(comptime Instruction: type) type { }; } +const x86_64 = @import("x86_64.zig"); +const aarch64 = @import("aarch64.zig"); + +pub const Logger = x86_64.Logger; + pub fn get(comptime arch: std.Target.Cpu.Arch) type { const backend = switch (arch) { - .x86_64 => @import("x86_64.zig"), - .aarch64 => @import("aarch64.zig"), + .x86_64 => x86_64, + .aarch64 => aarch64, else => {}, }; @@ -138,10 +143,8 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { // switch (@import("builtin").os.tag) { // .linux => switch (@import("builtin").cpu.arch == arch) { // true => { - // std.debug.print("Executing...\n", .{}); // const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn); // entryPoint(); - // std.debug.print("This should not print...\n", .{}); // }, // false => {}, // }, diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index bc91dbf..3494b20 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -1,9 +1,10 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const print = std.debug.print; const Compilation = @import("../Compilation.zig"); +const log = Compilation.log; +const logln = Compilation.logln; const Module = Compilation.Module; const Package = Compilation.Package; @@ -14,6 +15,13 @@ const AutoArrayHashMap = data_structures.AutoArrayHashMap; const AutoHashMap = data_structures.AutoHashMap; const StringKeyMap = data_structures.StringKeyMap; +pub const Logger = enum { + function, + phi_removal, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + pub const Result = struct { blocks: BlockList(BasicBlock) = .{}, calls: BlockList(Call) = .{}, @@ -398,6 +406,7 @@ pub const Builder = struct { }; const function_decl_name = builder.ir.getFunctionName(function_declaration_allocation.index); + _ = function_decl_name; if (sema_prototype.arguments) |sema_arguments| { try function_declaration.arguments.ensureTotalCapacity(builder.allocator, @intCast(sema_arguments.len)); @@ -429,9 +438,6 @@ pub const Builder = struct { const return_type = builder.module.types.get(sema_prototype.return_type); const is_noreturn = return_type.* == .noreturn; - if (std.mem.eql(u8, function_decl_name, "print")) { - print("WTDASDAS", .{}); - } if (!is_noreturn) { const exit_block = try builder.newBlock(); @@ -545,7 +551,7 @@ pub const Builder = struct { fn optimizeFunction(builder: *Builder, function: *Function) !void { // HACK - print("\n[BEFORE OPTIMIZE]:\n{}", .{function}); + logln(.ir, .function, "\n[BEFORE OPTIMIZE]:\n{}", .{function}); var reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); var did_something = true; @@ -616,7 +622,7 @@ pub const Builder = struct { } } - print("[AFTER OPTIMIZE]:\n{}", .{function}); + logln(.ir, .function, "[AFTER OPTIMIZE]:\n{}", .{function}); } fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { @@ -686,7 +692,7 @@ pub const Builder = struct { }; } } else { - print("TODO: maybe this phi removal is wrong?", .{}); + logln(.ir, .phi_removal, "TODO: maybe this phi removal is wrong?", .{}); instruction.* = .{ .copy = trivial_value, }; @@ -957,7 +963,7 @@ pub const Builder = struct { }, .declaration => |sema_declaration_index| { const sema_declaration = builder.module.declarations.get(sema_declaration_index); - print("Name: {s}\n", .{builder.module.getName(sema_declaration.name).?}); + //logln("Name: {s}\n", .{builder.module.getName(sema_declaration.name).?}); assert(sema_declaration.scope_type == .local); const declaration_type = builder.module.types.get(sema_declaration.type); switch (declaration_type.*) { diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index a114761..17b6942 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const print = std.debug.print; +const panic = std.debug.panic; const emit = @import("emit.zig"); const ir = @import("intermediate_representation.zig"); @@ -12,8 +12,32 @@ const ArrayList = data_structures.ArrayList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; const BlockList = data_structures.BlockList; +const log = Compilation.log; +const logln = Compilation.logln; + const x86_64 = @This(); +pub const Logger = enum { + register_allocation_new_instructions, + instruction_selection_block, + instruction_selection_ir_function, + instruction_selection_new_instruction, + instruction_selection_cache_flush, + instruction_selection_mir_function, + register_allocation_block, + register_allocation_problematic_hint, + register_allocation_assignment, + register_allocation_reload, + register_allocation_function_before, + register_allocation_new_instruction, + register_allocation_new_instruction_function_before, + register_allocation_instruction_avoid_copy, + register_allocation_function_after, + register_allocation_operand_list_verification, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + const Register = struct { list: List = .{}, index: Index, @@ -1008,7 +1032,6 @@ const InstructionSelection = struct { fn loadRegisterFromStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, destination_register: Register.Physical, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { _ = virtual_register; const stack_object = instruction_selection.stack_objects.items[frame_index]; - print("Stack object size: {}\n", .{stack_object.size}); switch (@divExact(stack_object.size, 8)) { @sizeOf(u64) => { switch (register_class) { @@ -1043,7 +1066,7 @@ const InstructionSelection = struct { destination_operand, source_operand, }); - print("Inserting instruction at index {}", .{insert_before_instruction_index}); + logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}", .{insert_before_instruction_index}); try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); }, else => |t| @panic(@tagName(t)), @@ -1081,12 +1104,12 @@ const InstructionSelection = struct { destination_operand, source_operand, }); - print("Inserting instruction at index {}\n", .{insert_before_instruction_index}); + logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}\n", .{insert_before_instruction_index}); try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); }, else => |t| @panic(@tagName(t)), }, - else => std.debug.panic("Stack object size: {}\n", .{stack_object.size}), + else => panic("Stack object size: {} bits\n", .{stack_object.size}), } } @@ -1420,7 +1443,6 @@ const InstructionSelection = struct { // TODO: addLiveIn MachineBasicBlock ? unreachable; } - print("After livein: {}\n", .{instruction_selection.function}); } }; @@ -2001,16 +2023,15 @@ pub const MIR = struct { instruction_selections: ArrayList(InstructionSelection) = .{}, virtual_registers: BlockList(Register.Virtual) = .{}, - pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !MIR { - print("\n[INSTRUCTION SELECTION]\n\n", .{}); - var mir_stack = MIR{ + pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !*MIR { + logln(.codegen, .instruction_selection_block, "\n[INSTRUCTION SELECTION]\n\n", .{}); + const mir = try allocator.create(MIR); + mir.* = .{ .allocator = allocator, .ir = intermediate, .target = target, }; - const mir = &mir_stack; - try mir.blocks.ensureCapacity(allocator, intermediate.blocks.len); try mir.functions.ensureCapacity(allocator, intermediate.function_definitions.len); try mir.instruction_selections.ensureUnusedCapacity(allocator, intermediate.function_definitions.len); @@ -2019,7 +2040,7 @@ pub const MIR = struct { while (function_definition_iterator.nextPointer()) |ir_function| { const fn_name = mir.ir.getFunctionName(ir_function.declaration); - print("=========\n{}=========\n", .{ir_function}); + logln(.codegen, .instruction_selection_ir_function, "=========\n{}=========\n", .{ir_function}); const instruction_selection = mir.instruction_selections.addOneAssumeCapacity(); const function_allocation = try mir.functions.addOne(mir.allocator); @@ -2065,7 +2086,6 @@ pub const MIR = struct { try instruction_selection.lowerArguments(mir, ir_function); - print("Block count: {}\n", .{function.blocks.items.len}); var block_i: usize = function.blocks.items.len; while (block_i > 0) { @@ -2077,7 +2097,6 @@ pub const MIR = struct { const ir_block = mir.ir.blocks.get(ir_block_index); var instruction_i: usize = ir_block.instructions.items.len; - print("Instruction count: {}\n", .{instruction_i}); var folded_load = false; @@ -2089,7 +2108,7 @@ pub const MIR = struct { instruction_selection.local_value_map.clearRetainingCapacity(); - print("Instruction #{}\n", .{instruction_i}); + logln(.codegen, .instruction_selection_new_instruction, "Instruction #{}\n", .{instruction_i}); switch (ir_instruction.*) { .ret => |ir_ret_index| { @@ -2545,7 +2564,7 @@ pub const MIR = struct { const instruction_index = instruction_selection.instruction_cache.items[i]; const instruction = mir.instructions.get(instruction_index); - print("Inserting instruction #{} ({s}) into index {} (instruction count: {})\n", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); + logln(.codegen, .instruction_selection_cache_flush, "Inserting instruction #{} ({s}) into index {} (instruction count: {})\n", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); try block.instructions.insert(mir.allocator, block.current_stack_index, instruction_index); } @@ -2555,10 +2574,10 @@ pub const MIR = struct { try instruction_selection.emitLiveInCopies(mir, function.blocks.items[0]); - print("=========\n{}=========\n", .{function}); + logln(.codegen, .instruction_selection_mir_function, "=========\n{}=========\n", .{function}); } - return mir_stack; + return mir; } fn getNextInstructionIndex(mir: *MIR, instruction_index: Instruction.Index) usize { @@ -2783,8 +2802,8 @@ pub const MIR = struct { // TODO: asserts const assert_result = !operand.flags.isKill() or live_register.last_use.eq(instruction_index); if (assert_result) { - print("Existing live register at instruction #{}: {}\n", .{ instruction_index.uniqueInteger(), live_register }); - print("Function until now: {}\n", .{instruction_selection.function}); + // logln("Existing live register at instruction #{}: {}\n", .{ instruction_index.uniqueInteger(), live_register }); + // logln("Function until now: {}\n", .{instruction_selection.function}); assert(assert_result); } }, @@ -2860,13 +2879,13 @@ pub const MIR = struct { register_allocator.assignVirtualToPhysicalRegister(live_register, physical_register); return; } else { - print("Second hint {s} not free\n", .{@tagName(physical_register)}); + logln(.codegen, .register_allocation_problematic_hint, "Second hint {s} not free\n", .{@tagName(physical_register)}); } } else { unreachable; } } else { - print("Can't take hint for VR{} for instruction #{}\n", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); + logln(.codegen, .register_allocation_problematic_hint, "Can't take hint for VR{} for instruction #{}\n", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); } const register_class_members = registers_by_class.get(register_class); @@ -2878,7 +2897,7 @@ pub const MIR = struct { // for (register_class_members) |candidate_register| { // print("{s}, ", .{@tagName(candidate_register)}); // } - print("\n", .{}); + // print("\n", .{}); for (register_class_members) |candidate_register| { if (register_allocator.isRegisterUsedInInstruction(candidate_register, look_at_physical_register_uses)) continue; const spill_cost = register_allocator.computeSpillCost(candidate_register); @@ -3008,7 +3027,7 @@ pub const MIR = struct { return register; } - print("Missed oportunity for register allocation tracing copy chain for VR{}\n", .{virtual_register_index.uniqueInteger()}); + logln(.codegen, .register_allocation_problematic_hint, "Missed oportunity for register allocation tracing copy chain for VR{}\n", .{virtual_register_index.uniqueInteger()}); }, else => |t| @panic(@tagName(t)), } @@ -3029,7 +3048,7 @@ pub const MIR = struct { .virtual = virtual_register, }); - print("Assigning V{} to {s}\n", .{ virtual_register.uniqueInteger(), @tagName(register) }); + logln(.codegen, .register_allocation_assignment, "Assigning V{} to {s}\n", .{ virtual_register.uniqueInteger(), @tagName(register) }); // TODO: debug info } @@ -3065,7 +3084,7 @@ pub const MIR = struct { fn reload(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, before_index: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical) !void { const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); const register_class = mir.virtual_registers.get(virtual_register).register_class; - print("Frame index: {}\n", .{frame_index}); + logln(.codegen, .register_allocation_reload, "Frame index: {}\n", .{frame_index}); try instruction_selection.loadRegisterFromStackSlot(mir, before_index, physical_register, frame_index, register_class, virtual_register); } @@ -3302,7 +3321,7 @@ pub const MIR = struct { } pub fn allocateRegisters(mir: *MIR) !void { - print("\n[REGISTER ALLOCATION]\n\n", .{}); + logln(.codegen, .register_allocation_block, "[REGISTER ALLOCATION]\n\n", .{}); const function_count = mir.functions.len; var function_iterator = mir.functions.iterator(); const register_count = @typeInfo(Register.Physical).Enum.fields.len; @@ -3313,7 +3332,7 @@ pub const MIR = struct { for (0..function_count) |function_index| { const function = function_iterator.nextPointer().?; const instruction_selection = &mir.instruction_selections.items[function_index]; - print("Allocating registers for {}\n", .{function}); + logln(.codegen, .register_allocation_function_before, "Allocating registers for {}\n", .{function}); var block_i: usize = function.blocks.items.len; var register_allocator = try RegisterAllocator.init(mir, instruction_selection); @@ -3331,8 +3350,8 @@ pub const MIR = struct { const instruction_index = block.instructions.items[instruction_i]; const instruction = mir.instructions.get(instruction_index); - print("===============\nInstruction {} (#{})\n", .{ instruction_i, instruction_index.uniqueInteger() }); - print("{}\n", .{function}); + logln(.codegen, .register_allocation_new_instruction, "===============\nInstruction {} (#{})\n", .{ instruction_i, instruction_index.uniqueInteger() }); + logln(.codegen, .register_allocation_new_instruction_function_before, "{}\n", .{function}); register_allocator.used_in_instruction = RegisterBitset.initEmpty(); @@ -3456,7 +3475,7 @@ pub const MIR = struct { if (std.meta.eql(dst_register, src_register)) { try register_allocator.coalesced.append(mir.allocator, instruction_index); - print("Avoiding copy...\n", .{}); + logln(.codegen, .register_allocation_instruction_avoid_copy, "Avoiding copy...\n", .{}); } } } @@ -3471,7 +3490,7 @@ pub const MIR = struct { } else unreachable; } - print("{}\n============\n", .{function}); + logln(.codegen, .register_allocation_function_after, "{}\n============\n", .{function}); } } @@ -3726,7 +3745,7 @@ pub const MIR = struct { const instruction_index = operand.parent; assert(instruction_index.valid); const instruction = mir.instructions.get(instruction_index); - print("Verifying instruction #{}, operand #{}\n", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); + logln(.codegen, .register_allocation_operand_list_verification, "Verifying instruction #{}, operand #{}\n", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); _ = instruction; assert(operand.u == .register); assert(operand.u.register.index == .virtual and operand.u.register.index.virtual.eq(register)); @@ -3853,7 +3872,8 @@ pub const MIR = struct { if (instruction == .copy) { const i = instruction_allocation.ptr.*; - print("Built copy: DST: {}. SRC: {}\n", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); + _ = i; + // print("Built copy: DST: {}. SRC: {}\n", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); } return instruction_allocation.index; diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index f1334ac..45fbc47 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -11,6 +11,7 @@ const enumFromString = data_structures.enumFromString; const Compilation = @import("../Compilation.zig"); const File = Compilation.File; +const logln = Compilation.logln; const fs = @import("../fs.zig"); pub const Token = packed struct(u64) { @@ -112,6 +113,12 @@ pub const Result = struct { time: u64, }; +pub const Logger = enum { + main, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) !Result { _ = file_index; const time_start = std.time.Instant.now() catch unreachable; @@ -134,8 +141,7 @@ pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) ! } // const identifier = text[start_index..][0 .. index - start_index]; - // _ = identifier; - // std.debug.print("Identifier: {s}\n", .{identifier}); + // logln("Identifier: {s}", .{identifier}); if (start_character == 'u' or start_character == 's') { var index_integer = start_index + 1; @@ -205,11 +211,8 @@ pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) ! }); } - const should_log = true; - if (should_log) { - for (tokens.items, 0..) |token, i| { - std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); - } + for (tokens.items, 0..) |token, i| { + logln(.lexer, .main, "#{} {s}\n", .{ i, @tagName(token.id) }); } const time_end = std.time.Instant.now() catch unreachable; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index b839d0f..415fc4f 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -2,6 +2,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const equal = std.mem.eql; +const panic = std.debug.panic; const Compilation = @import("../Compilation.zig"); const File = Compilation.File; const Module = Compilation.Module; @@ -23,6 +24,23 @@ const Struct = Compilation.Struct; const Type = Compilation.Type; const Value = Compilation.Value; +const log = Compilation.log; +const logln = Compilation.logln; + +pub const Logger = enum { + type, + identifier, + symbol_declaration, + scope_node, + node, + typecheck, + @"switch", + block, + call, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + const lexical_analyzer = @import("lexical_analyzer.zig"); const Token = lexical_analyzer.Token; @@ -35,8 +53,6 @@ const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const HashMap = data_structures.AutoHashMap; -const print = std.debug.print; - const Analyzer = struct { allocator: Allocator, module: *Module, @@ -52,7 +68,7 @@ const Analyzer = struct { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); const result = &file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; - print("Fetching node #{} (0x{x}) from scope #{} from file #{} with id: {s}\n", .{ node_index.uniqueInteger(), @intFromPtr(result), scope_index.uniqueInteger(), scope.file.uniqueInteger(), @tagName(result.id) }); + logln(.sema, .scope_node, "Fetching node #{} (0x{x}) from scope #{} from file #{} with id: {s}\n", .{ node_index.uniqueInteger(), @intFromPtr(result), scope_index.uniqueInteger(), scope.file.uniqueInteger(), @tagName(result.id) }); return result.*; } @@ -116,7 +132,7 @@ const Analyzer = struct { } fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { - print("Resolving block from scope #{} in file #{}\n", .{ scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger() }); + logln(.sema, .block, "Resolving block from scope #{} in file #{}\n", .{ scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger() }); var reaches_end = true; const block_node = analyzer.getScopeNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; @@ -138,7 +154,8 @@ const Analyzer = struct { .block, .block_zero, .block_one, .block_two => false, else => |t| @panic(@tagName(t)), }; - print("Is comptime: {}\n", .{is_comptime}); + + logln(.sema, .block, "Is comptime: {}\n", .{is_comptime}); var statements = ArrayList(Value.Index){}; @@ -213,7 +230,7 @@ const Analyzer = struct { fn processCall(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Call.Index { const node = analyzer.getScopeNode(scope_index, node_index); - print("Node index: {}. Left index: {}\n", .{ node_index.uniqueInteger(), node.left.uniqueInteger() }); + logln(.sema, .call, "Node index: {}. Left index: {}\n", .{ node_index.uniqueInteger(), node.left.uniqueInteger() }); assert(!node.left.invalid); const left_value_index = switch (!node.left.invalid) { true => blk: { @@ -247,7 +264,7 @@ const Analyzer = struct { const function = analyzer.module.functions.get(function_index); const function_prototype = analyzer.module.function_prototypes.get(analyzer.module.types.get(function.prototype).function); const argument_declarations = function_prototype.arguments.?; - print("Argument declaration count: {}. Argument node list count: {}\n", .{ argument_declarations.len, call_argument_node_list.len }); + logln(.sema, .call, "Argument declaration count: {}. Argument node list count: {}\n", .{ argument_declarations.len, call_argument_node_list.len }); var argument_array = ArrayList(Value.Index){}; if (argument_declarations.len == call_argument_node_list.len) { for (argument_declarations, call_argument_node_list) |argument_declaration_index, argument_node_index| { @@ -276,7 +293,7 @@ const Analyzer = struct { break :b argument_array; } else { - std.debug.panic("Function call has argument count mismatch: call has {}, function declaration has {}\n", .{ call_argument_node_list.len, argument_declarations.len }); + panic("Function call has argument count mismatch: call has {}, function declaration has {}\n", .{ call_argument_node_list.len, argument_declarations.len }); } }, else => |t| @panic(@tagName(t)), @@ -306,7 +323,8 @@ const Analyzer = struct { if (enum_field.name == enum_name_hash) { return enum_name_hash; } - print("Existing \"{s}\" != current \"{s}\"\n", .{ existing, enum_name }); + + logln(.sema, .typecheck, "Existing enum field \"{s}\" != enum literal \"{s}\"\n", .{ existing, enum_name }); } else { return null; } @@ -411,7 +429,7 @@ const Analyzer = struct { unreachable; }; - print("Index: {}\n", .{group_index}); + logln(.sema, .@"switch", "Index: {}\n", .{group_index}); const true_switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_list[group_index]); var result = Value{ @@ -448,7 +466,7 @@ const Analyzer = struct { }, false => { // const id = analyzer.tokenIdentifier(.token); - // print("id: {s}\n", .{id}); + // logln("id: {s}\n", .{id}); // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { @@ -518,18 +536,9 @@ const Analyzer = struct { fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { const identifier = analyzer.tokenIdentifier(node_scope_index, node_token); - print("Referencing identifier: \"{s}\"\n", .{identifier}); + logln(.sema, .identifier, "Referencing identifier: \"{s}\"\n", .{identifier}); const identifier_hash = try analyzer.processIdentifier(identifier); - if (equal(u8, identifier, "print")) { - print("WTF\n", .{}); - } - - if (equal(u8, identifier, "windows")) { - print("WTF\n", .{}); - unreachable; - } - if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_hash)) |lookup| { const declaration_index = lookup.declaration; const declaration = analyzer.module.declarations.get(declaration_index); @@ -538,7 +547,7 @@ const Analyzer = struct { const typecheck_result = switch (declaration.init_value.invalid) { false => blk: { const init_value = analyzer.module.values.get(declaration.init_value); - print("Declaration found: {}\n", .{init_value}); + logln(.sema, .identifier, "Declaration found: {}\n", .{init_value}); const is_unresolved = init_value.* == .unresolved; switch (is_unresolved) { true => { @@ -554,8 +563,8 @@ const Analyzer = struct { false => {}, } - print("Declaration resolved as: {}\n", .{init_value}); - print("Declaration mutability: {s}. Is comptime: {}\n", .{ @tagName(declaration.mutability), init_value.isComptime() }); + logln(.sema, .identifier, "Declaration resolved as: {}\n", .{init_value}); + logln(.sema, .identifier, "Declaration mutability: {s}. Is comptime: {}\n", .{ @tagName(declaration.mutability), init_value.isComptime() }); const typecheck_result = try analyzer.typeCheck(expect_type, declaration.type); @@ -613,7 +622,7 @@ const Analyzer = struct { }; } else { const scope = analyzer.module.scopes.get(scope_index); - std.debug.panic("Identifier \"{s}\" not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ identifier, scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getScopeToken(scope_index, node_token), analyzer.getScopeSourceFile(scope_index)) }); + panic("Identifier \"{s}\" not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ identifier, scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getScopeToken(scope_index, node_token), analyzer.getScopeSourceFile(scope_index)) }); } } @@ -641,7 +650,7 @@ const Analyzer = struct { fn resolveNode(analyzer: *Analyzer, value: *Value, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!void { const node = analyzer.getScopeNode(scope_index, node_index); - print("Resolving node #{} in scope #{} from file #{}: {}\n", .{ node_index.uniqueInteger(), scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger(), node }); + logln(.sema, .node, "Resolving node #{} in scope #{} from file #{}: {}\n", .{ node_index.uniqueInteger(), scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger(), node }); assert(value.* == .unresolved); @@ -669,8 +678,8 @@ const Analyzer = struct { }, .compiler_intrinsic_one, .compiler_intrinsic_two, .compiler_intrinsic => blk: { const intrinsic_name = analyzer.tokenIdentifier(scope_index, node.token + 1); - print("Intrinsic: {s}\n", .{intrinsic_name}); - const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse std.debug.panic("Unknown intrinsic: {s}\n", .{intrinsic_name}); + logln(.sema, .node, "Intrinsic: {s}\n", .{intrinsic_name}); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse panic("Unknown intrinsic: {s}\n", .{intrinsic_name}); switch (intrinsic) { .import => { assert(node.id == .compiler_intrinsic_one); @@ -679,7 +688,7 @@ const Analyzer = struct { .string_literal => { const import_name = analyzer.tokenStringLiteral(scope_index, import_argument.token); const import_file = try analyzer.module.importFile(analyzer.allocator, analyzer.current_file, import_name); - print("Importing \"{s}\"...\n", .{import_name}); + logln(.sema, .node, "Importing \"{s}\"...\n", .{import_name}); const result = .{ .type = switch (import_file.file.is_new) { @@ -687,7 +696,7 @@ const Analyzer = struct { const new_file_index = import_file.file.index; try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, new_file_index); const analyze_result = try analyzeFile(value, analyzer.allocator, analyzer.module, new_file_index); - print("Done analyzing {s}!\n", .{import_name}); + logln(.sema, .node, "Done analyzing {s}!\n", .{import_name}); break :true_block analyze_result; }, false => false_block: { @@ -705,7 +714,7 @@ const Analyzer = struct { }, .syscall => { var argument_nodes = try analyzer.getArguments(scope_index, node_index); - print("Argument count: {}\n", .{argument_nodes.items.len}); + logln(.sema, .node, "Argument count: {}\n", .{argument_nodes.items.len}); if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { const argument_expect_type = .{ .flexible_integer = .{ @@ -741,7 +750,7 @@ const Analyzer = struct { assert(node.id == .compiler_intrinsic_one); const message_node = analyzer.getScopeNode(scope_index, node.left); switch (message_node.id) { - .string_literal => std.debug.panic("error: {s}", .{analyzer.tokenStringLiteral(scope_index, message_node.token)}), + .string_literal => panic("error: {s}", .{analyzer.tokenStringLiteral(scope_index, message_node.token)}), else => |t| @panic(@tagName(t)), } unreachable; @@ -833,11 +842,12 @@ const Analyzer = struct { .call = try analyzer.processCall(scope_index, node_index), }, .field_access => blk: { - print("left alocation...\n", .{}); + logln(.sema, .node, "left alocation...\n", .{}); const identifier = analyzer.tokenIdentifier(scope_index, node.right.value); - print("Field access identifier for RHS: \"{s}\"\n", .{identifier}); + logln(.sema, .node, "Field access identifier for RHS: \"{s}\"\n", .{identifier}); analyzer.debugNode(scope_index, node_index); const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + switch (left_allocation.ptr.*) { .type => |type_index| { if (!type_index.invalid) { @@ -852,13 +862,13 @@ const Analyzer = struct { .declaration_reference => |declaration_reference| { const declaration = analyzer.module.declarations.get(declaration_reference.value); const declaration_name = analyzer.module.getName(declaration.name).?; - print("Decl ref: {s}\n", .{declaration_name}); - print("TODO: maybe this should not be runtime", .{}); + logln(.sema, .node, "Decl ref: {s}\n", .{declaration_name}); + logln(.sema, .node, "TODO: maybe this should not be runtime", .{}); unreachable; }, else => |t| @panic(@tagName(t)), } - print("Right: {}\n", .{right_value}); + logln(.sema, .node, "Right: {}\n", .{right_value}); // struct_scope.declarations.get(identifier); unreachable; @@ -877,7 +887,7 @@ const Analyzer = struct { }; const enum_field = analyzer.module.enum_fields.get(result); const enum_field_name = analyzer.module.getName(enum_field.name).?; - print("Enum field name resolution: {s}\n", .{enum_field_name}); + logln(.sema, .node, "Enum field name resolution: {s}\n", .{enum_field_name}); break :blk .{ .enum_field = result, }; @@ -886,7 +896,7 @@ const Analyzer = struct { } unreachable; } else { - std.debug.panic("Identifier \"{s}\" not found. Type empty", .{identifier}); + panic("Identifier \"{s}\" not found. Type empty", .{identifier}); } }, .declaration_reference => |declaration_reference| { @@ -929,7 +939,7 @@ const Analyzer = struct { for (field_node_list.items) |field_node_index| { const field_node = analyzer.getScopeNode(scope_index, field_node_index); const identifier = analyzer.tokenIdentifier(scope_index, field_node.token); - print("Enum field: {s}\n", .{identifier}); + logln(.sema, .node, "Enum field: {s}\n", .{identifier}); assert(field_node.left.invalid); const enum_hash_name = try analyzer.processIdentifier(identifier); @@ -966,7 +976,7 @@ const Analyzer = struct { const node = analyzer.getScopeNode(scope_index, node_index); const source_file = analyzer.getScopeSourceFile(scope_index); const token = analyzer.getScopeToken(scope_index, node.token); - print("Debugging node {s}:\n\n```\n{s}\n```\n", .{ @tagName(node.id), source_file[token.start..] }); + logln(.sema, .node, "Debugging node {s}:\n\n```\n{s}\n```\n", .{ @tagName(node.id), source_file[token.start..] }); } fn processStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !u32 { @@ -989,7 +999,7 @@ const Analyzer = struct { const token = analyzer.getScopeToken(scope_index, type_node.token); const source_file = analyzer.getScopeSourceFile(scope_index); const identifier = tokenBytes(token, source_file); - print("Identifier: \"{s}\"\n", .{identifier}); + logln(.sema, .type, "Identifier: \"{s}\"", .{identifier}); const resolved_value_index = try analyzer.doIdentifier(scope_index, ExpectType.type, type_node.token, scope_index); const resolved_value = analyzer.module.values.get(resolved_value_index); break :blk switch (resolved_value.*) { @@ -1000,7 +1010,7 @@ const Analyzer = struct { .keyword_noreturn => Type.noreturn, inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { const bit_count: u16 = @intCast(type_node.left.value); - print("Bit count: {}\n", .{bit_count}); + logln(.sema, .type, "Bit count: {}", .{bit_count}); break :blk switch (bit_count) { inline 8, 16, 32, 64 => |hardware_bit_count| Type.Integer.getIndex(.{ .bit_count = hardware_bit_count, @@ -1053,7 +1063,7 @@ const Analyzer = struct { true => null, false => blk: { const argument_list_node = analyzer.getScopeNode(scope_index, arguments_node_index); - // print("Function prototype argument list node: {}\n", .{function_prototype_node.left.uniqueInteger()}); + // logln("Function prototype argument list node: {}\n", .{function_prototype_node.left.uniqueInteger()}); const argument_node_list = switch (argument_list_node.id) { .node_list => analyzer.getScopeNodeList(scope_index, argument_list_node), else => |t| @panic(@tagName(t)), @@ -1248,7 +1258,7 @@ const Analyzer = struct { if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_index)) |lookup| { const declaration_name = analyzer.tokenIdentifier(lookup.scope, identifier_token); - std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); + panic("Existing name in lookup: {s}", .{declaration_name}); } // Check if the symbol name is already occupied in the same scope @@ -1287,7 +1297,7 @@ const Analyzer = struct { const expected_identifier_token_index = declaration_node.token + 1; const expected_identifier_token = analyzer.getScopeToken(scope_index, expected_identifier_token_index); if (expected_identifier_token.id != .identifier) { - print("Error: found: {}", .{expected_identifier_token.id}); + logln(.sema, .symbol_declaration, "Error: found: {}", .{expected_identifier_token.id}); @panic("Expected identifier"); } // TODO: Check if it is a keyword @@ -1483,7 +1493,7 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, const value = module.values.get(decl.init_value); module.entry_point = switch (value.*) { .function => |function_index| function_index.uniqueInteger(), - .unresolved => std.debug.panic("Unresolved declaration: {s}\n", .{declaration_name}), + .unresolved => panic("Unresolved declaration: {s}\n", .{declaration_name}), else => |t| @panic(@tagName(t)), }; break; diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index fa0ef7b..1d16353 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -2,7 +2,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const equal = std.mem.eql; -const log = std.log; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; @@ -14,6 +13,8 @@ const Token = lexical_analyzer.Token; const Compilation = @import("../Compilation.zig"); const File = Compilation.File; +const log = Compilation.log; +const logln = Compilation.logln; pub const Result = struct { nodes: ArrayList(Node), @@ -25,6 +26,21 @@ pub const Options = packed struct { is_comptime: bool, }; +pub const Logger = enum { + token_errors, + symbol_declaration, + node_creation, + main_node, + container_members, + block, + assign, + suffix, + precedence, + @"switch", + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + // TODO: pack it to be more efficient pub const Node = packed struct(u128) { token: u32, @@ -153,7 +169,7 @@ const Analyzer = struct { const result = token_i; return result; } else { - std.debug.print("Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) }); + logln(.parser, .token_errors, "Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) }); return error.unexpected_token; } } @@ -169,9 +185,9 @@ const Analyzer = struct { analyzer.token_i += 1; const declaration_name_token = try analyzer.expectToken(.identifier); const declaration_name = analyzer.bytes(declaration_name_token); - std.debug.print("Starting parsing declaration \"{s}\"\n", .{declaration_name}); + logln(.parser, .symbol_declaration, "Starting parsing declaration \"{s}\"", .{declaration_name}); - std.debug.print("Current token: {}\n", .{analyzer.tokens[analyzer.token_i].id}); + logln(.parser, .symbol_declaration, "Current token: {}", .{analyzer.tokens[analyzer.token_i].id}); const type_node_index = switch (analyzer.tokens[analyzer.token_i].id) { .colon => blk: { @@ -199,37 +215,37 @@ const Analyzer = struct { .right = init_node_index, }; - std.debug.print("Adding declaration \"{s}\" with init node of type: {s}\n", .{ declaration_name, @tagName(init_node.id) }); + logln(.parser, .symbol_declaration, "Adding declaration \"{s}\" with init node of type: {s}", .{ declaration_name, @tagName(init_node.id) }); // if (analyzer.token_i < analyzer.tokens.len) { // const first_token = analyzer.tokens[first]; // const last_token = analyzer.tokens[analyzer.token_i]; // const declaration_source_start = first_token.start; // const declaration_source_end = last_token.start; // - // std.debug.print("[ALL]\n", .{}); - // std.debug.print("Source file ({} bytes) :\n```\n{s}\n```\n", .{ analyzer.source_file.len, analyzer.source_file }); + // logln("[ALL]\n", .{}); + // logln("Source file ({} bytes) :\n```\n{s}\n```\n", .{ analyzer.source_file.len, analyzer.source_file }); // - // std.debug.print("[BEFORE]\n", .{}); + // logln("[BEFORE]\n", .{}); // - // std.debug.print("Tokens before the declaration: ", .{}); + // logln("Tokens before the declaration: ", .{}); // for (analyzer.tokens[0..first]) |t| { - // std.debug.print("{s} ", .{@tagName(t.id)}); + // logln("{s} ", .{@tagName(t.id)}); // } - // std.debug.print("\n", .{}); - // std.debug.print("Source before the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[0..analyzer.tokens[first].start]}); - // std.debug.print("[DECLARATION]\n", .{}); + // logln("\n", .{}); + // logln("Source before the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[0..analyzer.tokens[first].start]}); + // logln("[DECLARATION]\n", .{}); // - // std.debug.print("First token: {}\n", .{first_token}); - // std.debug.print("Last token: {}\n", .{last_token}); + // logln("First token: {}\n", .{first_token}); + // logln("Last token: {}\n", .{last_token}); // - // std.debug.print("Tokens including declaration ([{}-{}])", .{ first, analyzer.token_i }); + // logln("Tokens including declaration ([{}-{}])", .{ first, analyzer.token_i }); // for (analyzer.tokens[first..][0 .. analyzer.token_i - first]) |t| { - // std.debug.print("{s} ", .{@tagName(t.id)}); + // logln("{s} ", .{@tagName(t.id)}); // } - // std.debug.print("\n", .{}); + // logln("\n", .{}); // - // std.debug.print("Source for the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[declaration_source_start..declaration_source_end]}); - // std.debug.print("[AFTER]\n", .{}); + // logln("Source for the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[declaration_source_start..declaration_source_end]}); + // logln("[AFTER]\n", .{}); // // // TODO // // print("Tokens for file #{}\n", .{analyzer. @@ -245,7 +261,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; - std.debug.print("First token for container member: {s}\n", .{@tagName(analyzer.tokens[first].id)}); + logln(.parser, .container_members, "First token for container member: {s}", .{@tagName(analyzer.tokens[first].id)}); const member_node_index: Node.Index = switch (analyzer.tokens[first].id) { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .left_brace => blk: { @@ -265,7 +281,7 @@ const Analyzer = struct { else => |t| @panic(@tagName(t)), }; - std.debug.print("Container member {s}\n", .{@tagName(analyzer.nodes.items[member_node_index.unwrap()].id)}); + logln(.parser, .container_members, "Container member {s}", .{@tagName(analyzer.nodes.items[member_node_index.unwrap()].id)}); try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index); } @@ -373,7 +389,7 @@ const Analyzer = struct { const type_expression = try analyzer.typeExpression(); // const type_expression_node = analyzer.nodes.items[type_expression.unwrap()]; // _ = type_expression_node; - // std.debug.print("Type expression node: {}\n", .{type_expression_node}); + // logln("Type expression node: {}\n", .{type_expression_node}); foo = true; if (analyzer.tokens[analyzer.token_i].id == .comma) { @@ -410,7 +426,7 @@ const Analyzer = struct { while (analyzer.tokens[analyzer.token_i].id != .right_brace) { const first_statement_token = analyzer.tokens[analyzer.token_i]; - std.debug.print("First statement token: {s}\n", .{@tagName(first_statement_token.id)}); + logln(.parser, .block, "First statement token: {s}\n", .{@tagName(first_statement_token.id)}); const statement_index = switch (first_statement_token.id) { .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { .colon => { @@ -428,7 +444,7 @@ const Analyzer = struct { }; const node = analyzer.nodes.items[statement_index.unwrap()]; - std.debug.print("Adding statement: {s}\n", .{@tagName(node.id)}); + logln(.parser, .block, "Adding statement: {s}\n", .{@tagName(node.id)}); try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); } @@ -497,20 +513,20 @@ const Analyzer = struct { } fn switchExpression(analyzer: *Analyzer) anyerror!Node.Index { - std.debug.print("Parsing switch...\n", .{}); + logln(.parser, .@"switch", "Parsing switch...\n", .{}); const switch_token = analyzer.token_i; analyzer.token_i += 1; _ = try analyzer.expectToken(.left_parenthesis); const switch_expression = try analyzer.expression(); _ = try analyzer.expectToken(.right_parenthesis); - std.debug.print("Parsed switch expression...\n", .{}); + logln(.parser, .@"switch", "Parsed switch expression...\n", .{}); _ = try analyzer.expectToken(.left_brace); var list = Node.List{}; while (analyzer.tokens[analyzer.token_i].id != .right_brace) { const case_token = analyzer.token_i; - std.debug.print("Parsing switch case...\n", .{}); + logln(.parser, .@"switch", "Parsing switch case...\n", .{}); const case_node = switch (analyzer.tokens[case_token].id) { .fixed_keyword_else => blk: { analyzer.token_i += 1; @@ -621,7 +637,8 @@ const Analyzer = struct { .left = expr, .right = try analyzer.expression(), }; - std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); + + logln(.parser, .assign, "assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); return try analyzer.addNode(node); } @@ -679,14 +696,14 @@ const Analyzer = struct { var result = try analyzer.prefixExpression(); if (!result.invalid) { const prefix_node = analyzer.nodes.items[result.unwrap()]; - std.debug.print("Prefix: {}\n", .{prefix_node.id}); + logln(.parser, .precedence, "Prefix: {}\n", .{prefix_node.id}); } var banned_precedence: i32 = -1; while (analyzer.token_i < analyzer.tokens.len) { const token = analyzer.tokens[analyzer.token_i]; - // std.debug.print("Looping in expression precedence with token {}\n", .{token}); + // logln("Looping in expression precedence with token {}\n", .{token}); const precedence: i32 = switch (token.id) { .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period, .fixed_keyword_const, .fixed_keyword_var => -1, .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) { @@ -695,19 +712,19 @@ const Analyzer = struct { }, else => |t| { const start = token.start; - std.debug.print("Source file:\n```\n{s}\n```\n", .{analyzer.source_file[start..]}); + logln(.parser, .precedence, "Source file:\n```\n{s}\n```\n", .{analyzer.source_file[start..]}); @panic(@tagName(t)); }, }; - std.debug.print("Precedence: {} ({s}) (file #{})\n", .{ precedence, @tagName(token.id), analyzer.file_index.uniqueInteger() }); + logln(.parser, .precedence, "Precedence: {} ({s}) (file #{})\n", .{ precedence, @tagName(token.id), analyzer.file_index.uniqueInteger() }); if (precedence < minimum_precedence) { - std.debug.print("Breaking for minimum_precedence\n", .{}); + logln(.parser, .precedence, "Breaking for minimum_precedence\n", .{}); break; } if (precedence == banned_precedence) { - std.debug.print("Breaking for banned precedence\n", .{}); + logln(.parser, .precedence, "Breaking for banned precedence\n", .{}); break; } @@ -747,7 +764,7 @@ const Analyzer = struct { fn prefixExpression(analyzer: *Analyzer) !Node.Index { const token = analyzer.token_i; - // std.debug.print("Prefix...\n", .{}); + // logln("Prefix...\n", .{}); const node_id: Node.Id = switch (analyzer.tokens[token].id) { else => |pref| { _ = pref; @@ -792,10 +809,7 @@ const Analyzer = struct { }), // todo:? .left_brace => try analyzer.block(.{ .is_comptime = false }), - else => |id| { - log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); - unreachable; - }, + else => |id| std.debug.panic("WARN: By default, calling curlySuffixExpression with {s}", .{@tagName(id)}), }; return result; @@ -916,13 +930,13 @@ const Analyzer = struct { var expression_list = ArrayList(Node.Index){}; while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { const current_token = analyzer.tokens[analyzer.token_i]; - std.debug.print("Current token: {s}\n", .{@tagName(current_token.id)}); + logln(.parser, .suffix, "Current token: {s}\n", .{@tagName(current_token.id)}); const parameter = try analyzer.expression(); try expression_list.append(analyzer.allocator, parameter); const parameter_node = analyzer.nodes.items[parameter.unwrap()]; - std.debug.print("Paremeter node: {s}\n", .{@tagName(parameter_node.id)}); + logln(.parser, .suffix, "Paremeter node: {s}\n", .{@tagName(parameter_node.id)}); const next_token = analyzer.tokens[analyzer.token_i]; - std.debug.print("next token: {s}\n", .{@tagName(next_token.id)}); + logln(.parser, .suffix, "next token: {s}\n", .{@tagName(next_token.id)}); analyzer.token_i += @intFromBool(switch (next_token.id) { .comma => true, .colon, .right_brace, .right_bracket => unreachable, @@ -988,7 +1002,7 @@ const Analyzer = struct { .colon => unreachable, else => blk: { const identifier = analyzer.bytes(token_i); - // std.debug.print("identifier: {s}\n", .{identifier}); + // logln("identifier: {s}\n", .{identifier}); analyzer.token_i += 1; if (equal(u8, identifier, "_")) { break :blk Node.Index.invalid; @@ -1122,7 +1136,7 @@ const Analyzer = struct { const right_token = analyzer.token_i; analyzer.token_i += 1; const result: Node.Index = @bitCast(right_token); - std.debug.print("WARNING: rhs has node index {} but it's token #{}\n", .{ result, right_token }); + logln(.parser, .suffix, "WARNING: rhs has node index {} but it's token #{}\n", .{ result, right_token }); break :blk result; }, }), @@ -1135,13 +1149,10 @@ const Analyzer = struct { fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - std.debug.print("Adding node #{} (0x{x}) {s} to file #{}\n", .{ index, @intFromPtr(&analyzer.nodes.items[index]), @tagName(node.id), analyzer.file_index.uniqueInteger() }); + logln(.parser, .node_creation, "Adding node #{} (0x{x}) {s} to file #{}\n", .{ index, @intFromPtr(&analyzer.nodes.items[index]), @tagName(node.id), analyzer.file_index.uniqueInteger() }); // if (node.id == .identifier) { - // std.debug.print("Node identifier: {s}\n", .{analyzer.bytes(node.token)}); + // logln("Node identifier: {s}\n", .{analyzer.bytes(node.token)}); // } - if (node.id == .call) { - std.debug.print("Call two: {}\n", .{node}); - } return Node.Index{ .value = @intCast(index), }; @@ -1185,9 +1196,9 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, source_file: []const assert(node_index.value == 0); assert(!node_index.invalid); - std.debug.print("Start Parsing file root members\n", .{}); + logln(.parser, .main_node, "Start Parsing file root members\n", .{}); const members = try analyzer.containerMembers(); - std.debug.print("End Parsing file root members\n", .{}); + logln(.parser, .main_node, "End Parsing file root members\n", .{}); switch (members.len) { 0 => analyzer.nodes.items[0].id = .main_zero, diff --git a/src/main.zig b/src/main.zig index 2fabb73..89c7c0e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,73 +1,12 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const equal = std.mem.eql; const Compilation = @import("Compilation.zig"); - -pub const seed = std.math.maxInt(u64); -const default_src_file = "src/test/main.nat"; +pub const panic = Compilation.panic; pub fn main() !void { const allocator = std.heap.page_allocator; - const compilation_descriptor = try parseArguments(allocator); - const compilation = try Compilation.init(allocator); - - try compilation.compileModule(compilation_descriptor); -} - -const ArgumentParsingError = error{ - main_package_path_not_specified, -}; - -fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor { - const arguments = (try std.process.argsAlloc(allocator))[1..]; - - var maybe_executable_path: ?[]const u8 = null; - var maybe_main_package_path: ?[]const u8 = null; - var target_triplet: []const u8 = "x86_64-linux-gnu"; - - var i: usize = 0; - while (i < arguments.len) : (i += 1) { - const current_argument = arguments[i]; - if (equal(u8, current_argument, "-o")) { - if (i <= arguments.len) { - maybe_executable_path = arguments[i + 1]; - assert(maybe_executable_path.?.len != 0); - i += 1; - } else { - unreachable; - } - } else if (equal(u8, current_argument, "-target")) { - if (i <= arguments.len) { - target_triplet = arguments[i + 1]; - i += 1; - } else { - unreachable; - } - } else { - maybe_main_package_path = current_argument; - } - } - - const main_package_path = maybe_main_package_path orelse return error.main_package_path_not_specified; - - const executable_path = maybe_executable_path orelse blk: { - const executable_name = std.fs.path.basename(main_package_path[0 .. main_package_path.len - "/main.nat".len]); - assert(executable_name.len > 0); - const result = try std.mem.concat(allocator, u8, &.{ "nat/", executable_name }); - break :blk result; - }; - - const cross_target = try std.zig.CrossTarget.parse(.{ .arch_os_abi = target_triplet }); - const target = cross_target.toTarget(); - std.debug.print("Target: {}\n", .{target}); - - return .{ - .main_package_path = main_package_path, - .executable_path = executable_path, - .target = target, - }; + try Compilation.init(allocator); } test { From 30baa0b53d79495dcec612e478a2b1aafd221bca Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 8 Nov 2023 22:26:22 -0600 Subject: [PATCH 4/6] Fix register allocator for barebones use --- lib/std/start.nat | 2 +- src/Compilation.zig | 19 +- src/backend/intermediate_representation.zig | 4 +- src/backend/macho.zig | 40 +- src/backend/pe.zig | 12 +- src/backend/x86_64.zig | 611 +++++++++----------- 6 files changed, 305 insertions(+), 383 deletions(-) diff --git a/lib/std/start.nat b/lib/std/start.nat index 1799dcf..3a2c6c3 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -5,5 +5,5 @@ comptime { const _start = fn () noreturn { const result = #import("main").main(); - std.os.exit(0); + std.os.exit(result); } diff --git a/src/Compilation.zig b/src/Compilation.zig index 90792dc..6604dbd 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -100,7 +100,7 @@ fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor { } } else if (!recognized_particular) std.debug.panic("Unrecognized particular log \"{s}\" in scope {s}", .{ particular_log_candidate, @tagName(log_scope) }); } else { - unreachable; + // LogScope.Logger.bitset = @TypeOf(LogScope.Logger.bitset).initFull(); } logger_bitset.setPresent(log_scope, true); @@ -1117,10 +1117,6 @@ pub const File = struct { } }; -pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, return_address: ?usize) noreturn { - std.builtin.default_panic(message, stack_trace, return_address); -} - const LoggerScope = enum { compilation, lexer, @@ -1170,3 +1166,16 @@ pub fn log(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger std.fmt.format(writer, format, arguments) catch unreachable; } } + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, return_address: ?usize) noreturn { + const print_stack_trace = true; + switch (print_stack_trace) { + true => std.builtin.default_panic(message, stack_trace, return_address), + false => { + writer.writeAll("\nPANIC: ") catch {}; + writer.writeAll(message) catch {}; + writer.writeByte('\n') catch {}; + std.os.abort(); + }, + } +} diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 3494b20..839e029 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -19,7 +19,9 @@ pub const Logger = enum { function, phi_removal, - pub var bitset = std.EnumSet(Logger).initEmpty(); + pub var bitset = std.EnumSet(Logger).initMany(&.{ + .function, + }); }; pub const Result = struct { diff --git a/src/backend/macho.zig b/src/backend/macho.zig index e5f9bf9..fa84c7b 100644 --- a/src/backend/macho.zig +++ b/src/backend/macho.zig @@ -487,7 +487,7 @@ pub fn interpretFile(allocator: Allocator, descriptor: Compilation.Module.Descri _ = allocator; _ = descriptor; const header: *const Header = @ptrCast(@alignCast(file.ptr)); - print("Header : {}\n", .{header}); + print("Header : {}", .{header}); assert(header.magic == Header.magic); var text_segment: LoadCommand.Segment64 = undefined; @@ -503,69 +503,69 @@ pub fn interpretFile(allocator: Allocator, descriptor: Compilation.Module.Descri if (equal(u8, segment_load_command.name[0..text_segment_name.len], text_segment_name)) { text_segment = segment_load_command.*; } - print("SLC: {}\n", .{segment_load_command}); - print("segment name: {s}\n", .{segment_load_command.name}); + print("SLC: {}", .{segment_load_command}); + print("segment name: {s}", .{segment_load_command.name}); const section_ptr: [*]const LoadCommand.Segment64.Section = @ptrFromInt(@intFromPtr(segment_load_command) + @sizeOf(LoadCommand.Segment64)); const sections = section_ptr[0..segment_load_command.section_count]; for (sections) |section| { - print("{}\n", .{section}); - print("Section name: {s}. Segment name: {s}\n", .{ section.name, section.segment_name }); + print("{}", .{section}); + print("Section name: {s}. Segment name: {s}", .{ section.name, section.segment_name }); } }, .dyld_chained_fixups => { const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .dyld_exports_trie => { const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .symbol_table => { const command: *const LoadCommand.SymbolTable = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .symbol_table_information => { const command: *const LoadCommand.SymbolTableInformation = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .load_dylinker => { const command: *const LoadCommand.Dylinker = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); const name: [*:0]const u8 = @ptrFromInt(@intFromPtr(command) + command.name_offset); - print("Name: {s}\n", .{name}); + print("Name: {s}", .{name}); }, .uuid_number => { const command: *const LoadCommand.Uuid = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .minimum_os_version => { const command: *const LoadCommand.MinimumVersion = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .source_version => { const command: *const LoadCommand.SourceVersion = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .dyld_main_entry_point => { const command: *const LoadCommand.EntryPoint = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .load_dylib => { const command: *const LoadCommand.Dylib = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); - print("Dylib: {s}\n", .{@as([*:0]const u8, @ptrFromInt(@intFromPtr(load_command_ptr) + @sizeOf(LoadCommand.Dylib)))}); + print("command: {}", .{command}); + print("Dylib: {s}", .{@as([*:0]const u8, @ptrFromInt(@intFromPtr(load_command_ptr) + @sizeOf(LoadCommand.Dylib)))}); }, .function_starts => { const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .data_in_code => { const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, .code_signature => { const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); - print("command: {}\n", .{command}); + print("command: {}", .{command}); }, else => |t| @panic(@tagName(t)), } diff --git a/src/backend/pe.zig b/src/backend/pe.zig index aca8210..dc5b0d3 100644 --- a/src/backend/pe.zig +++ b/src/backend/pe.zig @@ -22,21 +22,21 @@ pub const Writer = struct { } pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void { - print("File len: {}\n", .{writer.in_file.len}); + print("File len: {}", .{writer.in_file.len}); const dos_header: *const ImageDosHeader = @ptrCast(@alignCast(writer.in_file.ptr)); - print("File address: {}\n", .{dos_header.file_address_of_new_exe_header}); - print("File: {s}\n", .{writer.in_file[0x40..]}); + print("File address: {}", .{dos_header.file_address_of_new_exe_header}); + print("File: {s}", .{writer.in_file[0x40..]}); for (writer.in_file[0x40..], 0..) |byte, index| { if (byte == 'T') { - print("Index: {}\n", .{index}); + print("Index: {}", .{index}); break; } } assert(dos_header.magic_number == ImageDosHeader.magic); // assert(dos_header.file_address_of_new_exe_header == @sizeOf(ImageDosHeader)); - print("{}\n", .{dos_header}); + print("{}", .{dos_header}); const file_header: *const ImageFileHeader = @ptrCast(@alignCast(writer.in_file[dos_header.file_address_of_new_exe_header + 4 ..].ptr)); - print("File header: {}\n", .{file_header}); + print("File header: {}", .{file_header}); writer.append(std.mem.asBytes(&ImageDosHeader{ .file_address_of_new_exe_header = 208, diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 17b6942..6bf50c4 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -24,6 +24,7 @@ pub const Logger = enum { instruction_selection_new_instruction, instruction_selection_cache_flush, instruction_selection_mir_function, + instruction_selection_register_operand_list, register_allocation_block, register_allocation_problematic_hint, register_allocation_assignment, @@ -35,7 +36,20 @@ pub const Logger = enum { register_allocation_function_after, register_allocation_operand_list_verification, - pub var bitset = std.EnumSet(Logger).initEmpty(); + pub var bitset = std.EnumSet(Logger).initMany(&.{ + .instruction_selection_ir_function, + // .instruction_selection_register_operand_list, + .register_allocation_block, + // .register_allocation_problematic_hint, + // .register_allocation_assignment, + // .register_allocation_reload, + .register_allocation_function_before, + // .register_allocation_new_instruction, + // .register_allocation_new_instruction_function_before, + // .register_allocation_instruction_avoid_copy, + .register_allocation_function_after, + .register_allocation_operand_list_verification, + }); }; const Register = struct { @@ -1025,7 +1039,7 @@ const InstructionSelection = struct { else => |t| @panic(@tagName(t)), } }, - else => std.debug.panic("Stack object size: {}\n", .{stack_object.size}), + else => std.debug.panic("Stack object size: {}", .{stack_object.size}), } } @@ -1104,12 +1118,12 @@ const InstructionSelection = struct { destination_operand, source_operand, }); - logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}\n", .{insert_before_instruction_index}); + logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}", .{insert_before_instruction_index}); try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); }, else => |t| @panic(@tagName(t)), }, - else => panic("Stack object size: {} bits\n", .{stack_object.size}), + else => panic("Stack object size: {} bits", .{stack_object.size}), } } @@ -1285,7 +1299,7 @@ const InstructionSelection = struct { // if (std.meta.eql(stored_register, register)) { // unreachable; // } else { - // std.debug.panic("Register mismatch: Stored: {} Got: {}\n", .{ stored_register, register }); + // std.debug.panic("Register mismatch: Stored: {} Got: {}", .{ stored_register, register }); // } // } else { // gop.value_ptr.* = register; @@ -1365,9 +1379,7 @@ const InstructionSelection = struct { }, }, }, - .flags = .{ - .type = .def, - }, + .flags = .{}, }, }); @@ -1537,18 +1549,25 @@ const Instruction = struct { fn next(it: *I) ?ReturnValue.Index { const original_operand_index = it.index; + switch (it.index.invalid) { false => switch (arguments.element) { .instruction => { const original_operand = it.mir.operands.get(original_operand_index); const instruction = original_operand.parent; // const i_desc = it.mir.instructions.get(instruction); - // print("Instruction: {}\n", .{i_desc.id}); + // print("Instruction: {}", .{i_desc.id}); while (true) { it.advance(); - if (it.index.invalid) break; + + if (it.index.invalid) { + break; + } + const it_operand = it.mir.operands.get(it.index); - if (!it_operand.parent.eq(instruction)) break; + if (!it_operand.parent.eq(instruction)) { + break; + } } return instruction; @@ -1576,33 +1595,44 @@ const Instruction = struct { assert(!it.index.invalid); it.advanceRaw(); - if (!arguments.use) { - if (!it.index.invalid) { - const operand = it.mir.operands.get(it.index); - if (operand.flags.type == .use) { - it.index = Operand.Index.invalid; - } else { - //TODO: assert that is not debug + switch (arguments.use) { + true => { + while (!it.index.invalid) { + const operand = it.mir.operands.get(it.index); + if (!arguments.def and operand.flags.type == .def) { + it.advanceRaw(); + } else { + break; + } } - } - } else { - while (!it.index.invalid) { - const operand = it.mir.operands.get(it.index); - if (!arguments.def and operand.flags.type == .def) { - it.advanceRaw(); - } else { - break; + }, + false => { + if (!it.index.invalid) { + const operand = it.mir.operands.get(it.index); + if (operand.flags.type == .use) { + it.index = Operand.Index.invalid; + } else { + //TODO: assert that is not debug + } } - } + }, } } fn advanceRaw(it: *I) void { assert(!it.index.invalid); - const current_operand = it.mir.operands.get(it.index); + const old_index = it.index; + const current_operand = it.mir.operands.get(old_index); assert(current_operand.u == .register); const next_index = current_operand.u.register.list.next; it.index = next_index; + + if (it.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[ITERATOR] O{} -> NULL operand index", .{old_index.uniqueInteger()}); + } else { + const operand = it.mir.operands.get(it.index); + logln(.codegen, .register_allocation_problematic_hint, "[ITERATOR] O{} -> O{}: {}", .{ old_index.uniqueInteger(), it.index.uniqueInteger(), operand.flags }); + } } }; } @@ -2024,7 +2054,7 @@ pub const MIR = struct { virtual_registers: BlockList(Register.Virtual) = .{}, pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !*MIR { - logln(.codegen, .instruction_selection_block, "\n[INSTRUCTION SELECTION]\n\n", .{}); + logln(.codegen, .instruction_selection_block, "\n[INSTRUCTION SELECTION]\n", .{}); const mir = try allocator.create(MIR); mir.* = .{ .allocator = allocator, @@ -2040,7 +2070,7 @@ pub const MIR = struct { while (function_definition_iterator.nextPointer()) |ir_function| { const fn_name = mir.ir.getFunctionName(ir_function.declaration); - logln(.codegen, .instruction_selection_ir_function, "=========\n{}=========\n", .{ir_function}); + logln(.codegen, .instruction_selection_ir_function, "Selecting instructions for {}", .{ir_function}); const instruction_selection = mir.instruction_selections.addOneAssumeCapacity(); const function_allocation = try mir.functions.addOne(mir.allocator); @@ -2108,7 +2138,7 @@ pub const MIR = struct { instruction_selection.local_value_map.clearRetainingCapacity(); - logln(.codegen, .instruction_selection_new_instruction, "Instruction #{}\n", .{instruction_i}); + logln(.codegen, .instruction_selection_new_instruction, "Instruction #{}", .{instruction_i}); switch (ir_instruction.*) { .ret => |ir_ret_index| { @@ -2174,7 +2204,7 @@ pub const MIR = struct { const syscall_register_list = calling_convention.syscall_registers[0..ir_syscall.arguments.items.len]; for (ir_syscall.arguments.items, syscall_register_list) |ir_argument_index, syscall_register| { - //print("index: {}\n", .{index}); + //print("index: {}", .{index}); const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); const destination_register = Register{ .index = .{ @@ -2445,12 +2475,12 @@ pub const MIR = struct { .call => |ir_call_index| { const ir_call = mir.ir.calls.get(ir_call_index); for (ir_call.arguments, 0..) |ir_argument_index, index| { - // print("index: {}\n", .{index}); + // print("index: {}", .{index}); const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); const source_value_type = resolveType(getIrType(mir.ir, ir_argument_index)); const source_register_class = register_classes.get(source_value_type); const argument_register = calling_convention.argument_registers.get(source_register_class)[index]; - // print("Argument register: {}\n", .{argument_register}); + // print("Argument register: {}", .{argument_register}); const destination_register = Register{ .index = .{ @@ -2564,7 +2594,7 @@ pub const MIR = struct { const instruction_index = instruction_selection.instruction_cache.items[i]; const instruction = mir.instructions.get(instruction_index); - logln(.codegen, .instruction_selection_cache_flush, "Inserting instruction #{} ({s}) into index {} (instruction count: {})\n", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); + logln(.codegen, .instruction_selection_cache_flush, "Inserting instruction #{} ({s}) into index {} (instruction count: {})", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); try block.instructions.insert(mir.allocator, block.current_stack_index, instruction_index); } @@ -2574,7 +2604,7 @@ pub const MIR = struct { try instruction_selection.emitLiveInCopies(mir, function.blocks.items[0]); - logln(.codegen, .instruction_selection_mir_function, "=========\n{}=========\n", .{function}); + logln(.codegen, .instruction_selection_ir_function, "Selected instructions for {}", .{function}); } return mir; @@ -2618,13 +2648,17 @@ pub const MIR = struct { const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); const head_index = head_index_ptr.*; + logln(.codegen, .instruction_selection_register_operand_list, "Adding register list metadata to operand #{}", .{operand_index.uniqueInteger()}); + switch (head_index.invalid) { false => { const head_operand = mir.operands.get(head_index); assert(std.meta.eql(head_operand.u.register.index, operand.u.register.index)); + const last_operand_index = head_operand.u.register.list.previous; const last_operand = mir.operands.get(last_operand_index); assert(std.meta.eql(last_operand.u.register.index, operand.u.register.index)); + head_operand.u.register.list.previous = operand_index; operand.u.register.list.previous = last_operand_index; @@ -2640,6 +2674,8 @@ pub const MIR = struct { } }, true => { + logln(.codegen, .instruction_selection_register_operand_list, "List is empty, adding it to the top of the list", .{}); + operand.u.register.list.previous = operand_index; operand.u.register.list.next = Operand.Index.invalid; head_index_ptr.* = operand_index; @@ -2668,6 +2704,7 @@ pub const MIR = struct { false => mir.operands.get(operand_next), true => head, }; + next.u.register.list.previous = operand_previous; operand.u.register.list.previous = Operand.Index.invalid; @@ -2680,7 +2717,7 @@ pub const MIR = struct { else => unreachable, }; - // print("Old: {}. New: {}\n", .{ old_register_class, new_register_class }); + // print("Old: {}. New: {}", .{ old_register_class, new_register_class }); switch (old_register_class == new_register_class) { true => return new_register_class, false => unreachable, @@ -2692,7 +2729,7 @@ pub const MIR = struct { assert(register.index == .virtual); const operand_reference = instruction_descriptor.operands[operand_index]; const operand_register_class = register_class_operand_matcher.get(operand_reference.id); - // print("Constraint operand #{} with {} (out of {})\n", .{ operand_index, operand_register_class, operand_reference.id }); + // print("Constraint operand #{} with {} (out of {})", .{ operand_index, operand_register_class, operand_reference.id }); // const register_class = op if (mir.constrainRegisterClass(register, operand_register_class) == null) { @@ -2802,8 +2839,8 @@ pub const MIR = struct { // TODO: asserts const assert_result = !operand.flags.isKill() or live_register.last_use.eq(instruction_index); if (assert_result) { - // logln("Existing live register at instruction #{}: {}\n", .{ instruction_index.uniqueInteger(), live_register }); - // logln("Function until now: {}\n", .{instruction_selection.function}); + // logln("Existing live register at instruction #{}: {}", .{ instruction_index.uniqueInteger(), live_register }); + // logln("Function until now: {}", .{instruction_selection.function}); assert(assert_result); } }, @@ -2869,23 +2906,28 @@ pub const MIR = struct { } } + logln(.codegen, .register_allocation_problematic_hint, "Tracing copies for VR{} in instruction #{}", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); + const maybe_hint2 = register_allocator.traceCopies(mir, instruction_selection, virtual_register); if (maybe_hint2) |hint| { // TODO const allocatable = true; + logln(.codegen, .register_allocation_problematic_hint, "Hint: {}. Register class: {s}", .{ hint, @tagName(register_class) }); + if (hint == .physical and allocatable and isRegisterInClass(hint.physical, register_class) and !register_allocator.isRegisterUsedInInstruction(hint.physical, look_at_physical_register_uses)) { const physical_register = hint.physical; + if (register_allocator.register_states.get(physical_register) == .free) { register_allocator.assignVirtualToPhysicalRegister(live_register, physical_register); return; } else { - logln(.codegen, .register_allocation_problematic_hint, "Second hint {s} not free\n", .{@tagName(physical_register)}); + logln(.codegen, .register_allocation_problematic_hint, "Second hint {s} not free", .{@tagName(physical_register)}); } } else { unreachable; } } else { - logln(.codegen, .register_allocation_problematic_hint, "Can't take hint for VR{} for instruction #{}\n", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); + logln(.codegen, .register_allocation_problematic_hint, "Can't take hint for VR{} for instruction #{}", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); } const register_class_members = registers_by_class.get(register_class); @@ -2897,7 +2939,7 @@ pub const MIR = struct { // for (register_class_members) |candidate_register| { // print("{s}, ", .{@tagName(candidate_register)}); // } - // print("\n", .{}); + // print("", .{}); for (register_class_members) |candidate_register| { if (register_allocator.isRegisterUsedInInstruction(candidate_register, look_at_physical_register_uses)) continue; const spill_cost = register_allocator.computeSpillCost(candidate_register); @@ -2954,48 +2996,72 @@ pub const MIR = struct { _ = look_at_physical_register_uses; // TODO: register masks + // if (register_allocator.used_in_instruction.contains(physical_register)) { + // return true; + // } + // // TODO + // else { + // return false; + // } - if (register_allocator.used_in_instruction.contains(physical_register)) { - return true; - } - // TODO - //else if (look_at_physical_register_uses and register_classes.ph - else { - return false; - } + const result = register_allocator.used_in_instruction.contains(physical_register); + logln(.codegen, .register_allocation_problematic_hint, "Register {s} used in instruction: {}", .{ @tagName(physical_register), result }); + return result; } - fn traceCopyChain(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, register: Register) ?Register.Index { + fn traceCopyChain(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, given_register: Register) ?Register.Index { _ = register_allocator; const chain_length_limit = 3; - _ = chain_length_limit; var chain_try_count: u32 = 0; - _ = chain_try_count; + + var register = given_register; + while (true) { switch (register.index) { .physical => return register.index, .virtual => |vri| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] Operand: VR{}", .{vri.uniqueInteger()}); + const virtual_head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ .index = .{ .virtual = vri, }, }); + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] Head operand for VR{}: O{}", .{ vri.uniqueInteger(), virtual_head_index_ptr.uniqueInteger() }); + var vdef = Instruction.Iterator.Get(.{ .use = false, .def = true, .element = .instruction, }).new(mir, virtual_head_index_ptr.*); - const vdef_instruction = vdef.nextPointer() orelse break; - if (vdef.nextPointer()) |_| break; + const operand_index = vdef.index; - switch (vdef_instruction.id) { + const vdef_instruction = vdef.next() orelse break; + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] VR{} defined in operand #{} of instruction #{}", .{ vri.uniqueInteger(), operand_index.uniqueInteger(), vdef_instruction.uniqueInteger() }); + + const next_operand = vdef.index; + + if (vdef.next()) |unexpected_next_instruction| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] VR{} also defined in operand #{} unexpected next instruction #{}. Breaking...", .{ vri.uniqueInteger(), next_operand.uniqueInteger(), unexpected_next_instruction.uniqueInteger() }); + break; + } + + const instruction = mir.instructions.get(vdef_instruction); + switch (instruction.id) { + .copy => { + const copy_source_operand_index = instruction.operands.items[1]; + const copy_source_operand = mir.operands.get(copy_source_operand_index); + register = copy_source_operand.u.register; + }, else => |t| @panic(@tagName(t)), } - unreachable; }, } + + chain_try_count += 1; + if (chain_try_count >= chain_length_limit) break; } return null; @@ -3007,18 +3073,33 @@ pub const MIR = struct { .virtual = virtual_register_index, }, }); + + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Tracing copies for VR{}. Head operand: #{}", .{ virtual_register_index.uniqueInteger(), head_index_ptr.uniqueInteger() }); + var define_instructions = Instruction.Iterator.Get(.{ .use = false, .def = true, .element = .instruction, }).new(mir, head_index_ptr.*); + if (!define_instructions.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand before loop: #{}", .{define_instructions.index.uniqueInteger()}); + } + const definition_limit = 3; var try_count: u32 = 0; while (define_instructions.next()) |instruction_index| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Current instruction: #{}", .{instruction_index.uniqueInteger()}); + if (!define_instructions.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand: #{}", .{define_instructions.index.uniqueInteger()}); + } else { + // logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand: invalid", .{}); + } + const instruction = mir.instructions.get(instruction_index); switch (instruction.id) { - .mov32rm => unreachable, + .mov32rm => {}, + .mov32r0 => {}, .copy => { const operand_index = instruction.operands.items[1]; const operand = mir.operands.get(operand_index); @@ -3027,7 +3108,7 @@ pub const MIR = struct { return register; } - logln(.codegen, .register_allocation_problematic_hint, "Missed oportunity for register allocation tracing copy chain for VR{}\n", .{virtual_register_index.uniqueInteger()}); + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Missed oportunity for register allocation tracing copy chain for VR{}", .{virtual_register_index.uniqueInteger()}); }, else => |t| @panic(@tagName(t)), } @@ -3048,7 +3129,7 @@ pub const MIR = struct { .virtual = virtual_register, }); - logln(.codegen, .register_allocation_assignment, "Assigning V{} to {s}\n", .{ virtual_register.uniqueInteger(), @tagName(register) }); + logln(.codegen, .register_allocation_assignment, "Assigning V{} to {s}", .{ virtual_register.uniqueInteger(), @tagName(register) }); // TODO: debug info } @@ -3061,7 +3142,7 @@ pub const MIR = struct { fn displacePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { const state = register_allocator.register_states.getPtr(physical_register); - // print("Trying to displace register {s} with state {s}\n", .{ @tagName(physical_register), @tagName(state.*) }); + // print("Trying to displace register {s} with state {s}", .{ @tagName(physical_register), @tagName(state.*) }); return switch (state.*) { .free => false, .preassigned => blk: { @@ -3084,7 +3165,7 @@ pub const MIR = struct { fn reload(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, before_index: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical) !void { const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); const register_class = mir.virtual_registers.get(virtual_register).register_class; - logln(.codegen, .register_allocation_reload, "Frame index: {}\n", .{frame_index}); + logln(.codegen, .register_allocation_reload, "Frame index: {}", .{frame_index}); try instruction_selection.loadRegisterFromStackSlot(mir, before_index, physical_register, frame_index, register_class, virtual_register); } @@ -3265,7 +3346,7 @@ pub const MIR = struct { // // TODO: basic block liveins (regmasks?) // // const live_registers = register_allocator.live_virtual_registers.values(); - // print("Live register count: {}\n", .{live_registers.len}); + // print("Live register count: {}", .{live_registers.len}); // // for (live_registers) |live_register| { // const physical_register = live_register.physical; @@ -3321,7 +3402,7 @@ pub const MIR = struct { } pub fn allocateRegisters(mir: *MIR) !void { - logln(.codegen, .register_allocation_block, "[REGISTER ALLOCATION]\n\n", .{}); + logln(.codegen, .register_allocation_block, "\n[REGISTER ALLOCATION]\n", .{}); const function_count = mir.functions.len; var function_iterator = mir.functions.iterator(); const register_count = @typeInfo(Register.Physical).Enum.fields.len; @@ -3332,7 +3413,7 @@ pub const MIR = struct { for (0..function_count) |function_index| { const function = function_iterator.nextPointer().?; const instruction_selection = &mir.instruction_selections.items[function_index]; - logln(.codegen, .register_allocation_function_before, "Allocating registers for {}\n", .{function}); + logln(.codegen, .register_allocation_function_before, "Allocating registers for {}", .{function}); var block_i: usize = function.blocks.items.len; var register_allocator = try RegisterAllocator.init(mir, instruction_selection); @@ -3350,43 +3431,61 @@ pub const MIR = struct { const instruction_index = block.instructions.items[instruction_i]; const instruction = mir.instructions.get(instruction_index); - logln(.codegen, .register_allocation_new_instruction, "===============\nInstruction {} (#{})\n", .{ instruction_i, instruction_index.uniqueInteger() }); - logln(.codegen, .register_allocation_new_instruction_function_before, "{}\n", .{function}); + logln(.codegen, .register_allocation_new_instruction, "===============\nInstruction {} (#{})", .{ instruction_i, instruction_index.uniqueInteger() }); + logln(.codegen, .register_allocation_new_instruction_function_before, "{}", .{function}); register_allocator.used_in_instruction = RegisterBitset.initEmpty(); - const max_operand_count = 32; - var define_bitset = std.StaticBitSet(max_operand_count).initEmpty(); - var physical_register_bitset = std.StaticBitSet(max_operand_count).initEmpty(); - var register_mask_bitset = std.StaticBitSet(max_operand_count).initEmpty(); - var virtual_register_define = false; + var physical_register_use = false; + var register_mask = false; + var virtual_register_definition = false; + var register_definition = false; + var early_clobber = false; var assign_live_throughs = false; for (instruction.operands.items, 0..) |operand_index, operand_i| { + _ = operand_i; const operand = mir.operands.get(operand_index); switch (operand.u) { - .register => |register| { - const is_define = operand.flags.type == .def; - const is_physical = register.index == .physical; - if (is_define and !is_physical) { - virtual_register_define = true; - } - define_bitset.setValue(operand_i, is_define); - physical_register_bitset.setValue(operand_i, is_physical); - if (is_physical and is_define) { - const physical_register = register.index.physical; - const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); - if (!displaced_any) { - operand.flags.dead_or_kill = true; + .register => |register| switch (register.index) { + .virtual => { + if (operand.flags.type == .def) { + register_definition = true; + virtual_register_definition = true; + if (operand.flags.early_clobber) { + early_clobber = true; + assign_live_throughs = true; + } + + // TODO } - } + }, + .physical => |physical_register| { + if (!register_allocator.reserved.contains(physical_register)) { + if (operand.flags.type == .def) { + register_definition = true; + + const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (operand.flags.early_clobber) { + early_clobber = true; + } + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + + if (operand.readsRegister()) { + physical_register_use = true; + } + } + }, }, else => {}, } } - if (define_bitset.count() > 0) { - if (virtual_register_define) { + if (register_definition) { + if (virtual_register_definition) { var rearranged_implicit_operands = true; if (assign_live_throughs) { unreachable; @@ -3424,47 +3523,75 @@ pub const MIR = struct { while (operand_i > 0) { operand_i -= 1; - if (define_bitset.isSet(operand_i) and physical_register_bitset.isSet(operand_i)) { - const operand_index = instruction.operands.items[operand_i]; - const operand = mir.operands.get(operand_index); - const physical_register = operand.u.register.index.physical; - register_allocator.freePhysicalRegister(physical_register); - register_allocator.unmarkUsedRegisterInInstruction(physical_register); + const operand_index = instruction.operands.items[operand_i]; + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => switch (register.index) { + .physical => |physical_register| { + register_allocator.freePhysicalRegister(physical_register); + register_allocator.unmarkUsedRegisterInInstruction(physical_register); + }, + .virtual => {}, + }, + .use => {}, + }, + else => {}, } } } - if (register_mask_bitset.count() > 0) { + if (register_mask) { unreachable; } // Physical register use - if (physical_register_bitset.count() > 0) { - for (instruction.operands.items, 0..) |operand_index, operand_i| { - if (!define_bitset.isSet(operand_i) and physical_register_bitset.isSet(operand_i)) { - const operand = mir.operands.get(operand_index); - const physical_register = operand.u.register.index.physical; - if (!register_allocator.reserved.contains(physical_register)) { - const displaced_any = try register_allocator.usePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); - if (!displaced_any) { - operand.flags.dead_or_kill = true; - } - } + if (physical_register_use) { + for (instruction.operands.items) |operand_index| { + const operand = mir.operands.get(operand_index); + + switch (operand.flags.type) { + .def => {}, + .use => switch (operand.u) { + .register => |register| switch (register.index) { + .physical => |physical_register| { + if (!register_allocator.reserved.contains(physical_register)) { + const displaced_any = try register_allocator.usePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + }, + .virtual => {}, + }, + else => {}, + }, } } } + var undef_use = false; + _ = undef_use; var rearranged_implicit_operands = true; while (rearranged_implicit_operands) { rearranged_implicit_operands = false; for (instruction.operands.items, 0..) |operand_index, operand_i| { - if (!define_bitset.isSet(operand_i)) { - const operand = mir.operands.get(operand_index); - if (operand.u == .register and operand.u.register.index == .virtual) { - const virtual_register = operand.u.register.index.virtual; - rearranged_implicit_operands = try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register, @intCast(operand_i)); - if (rearranged_implicit_operands) break; - } + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => {}, + .use => switch (register.index) { + .physical => {}, + .virtual => |virtual_register_index| { + if (operand.flags.undef) { + unreachable; + } + rearranged_implicit_operands = try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register_index, @intCast(operand_i)); + if (rearranged_implicit_operands) break; + }, + }, + }, + else => {}, } } } @@ -3475,7 +3602,7 @@ pub const MIR = struct { if (std.meta.eql(dst_register, src_register)) { try register_allocator.coalesced.append(mir.allocator, instruction_index); - logln(.codegen, .register_allocation_instruction_avoid_copy, "Avoiding copy...\n", .{}); + logln(.codegen, .register_allocation_instruction_avoid_copy, "Avoiding copy...", .{}); } } } @@ -3490,248 +3617,33 @@ pub const MIR = struct { } else unreachable; } - logln(.codegen, .register_allocation_function_after, "{}\n============\n", .{function}); + logln(.codegen, .register_allocation_function_after, "Allocated registers for {}\n============", .{function}); } } - // for (0..function_count) |function_index| { - // const function = function_iterator.nextPointer().?; - // const instruction_selection = &mir.instruction_selections.items[function_index]; - // print("FN {s}\n", .{function.name}); - // - // var register_allocator = try RegisterAllocator.init(mir, instruction_selection); - // - // for (function.blocks.items) |block_index| { - // instruction_selection.current_block = block_index; - // register_allocator.coalesced.clearRetainingCapacity(); - // - // const block = mir.blocks.get(block_index); - // const instruction_count = block.instructions.items.len; - // var instruction_i = instruction_count; - // - // while (instruction_i > 0) { - // instruction_i -= 1; - // print("Instruction #{}\n", .{instruction_i}); - // - // register_allocator.used_in_instruction = RegisterBitset.initEmpty(); - // - // const instruction_index = block.instructions.items[instruction_i]; - // const instruction = mir.instructions.get(instruction_index); - // - // var register_define = false; - // var virtual_register_define = false; - // var early_clobber = false; - // var assign_live_throughs = false; - // var physical_register_use = false; - // var register_mask = false; - // - // for (instruction.operands.items) |operand_index| { - // const operand = mir.operands.get(operand_index); - // var register_buffer: [2]Register = undefined; - // const registers = getRegisters(operand, ®ister_buffer); - // - // for (registers) |register| switch (register.index) { - // .virtual => { - // switch (operand.flags.type) { - // .def => { - // register_define = true; - // virtual_register_define = true; - // // TODO early clobber, livethroughs - // if (operand.flags.early_clobber) { - // early_clobber = true; - // assign_live_throughs = true; - // } - // - // // TODO (tied and tied op undef) or (subreg and !undef) - // }, - // .use => {}, - // } - // }, - // .physical => |physical_register| { - // if (!register_allocator.reserved.contains(physical_register)) { - // switch (operand.flags.type) { - // .def => { - // register_define = true; - // const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); - // if (operand.flags.early_clobber) { - // early_clobber = true; - // } - // if (!displaced_any) { - // operand.flags.dead_or_kill = true; - // } - // }, - // .use => {}, - // } - // - // if (operand.readsRegister()) { - // physical_register_use = true; - // } - // } - // }, - // }; - // } - // - // if (register_define) { - // if (virtual_register_define) { - // var rearranged_implicit_operands = true; - // if (assign_live_throughs) { - // unreachable; - // } else { - // while (rearranged_implicit_operands) { - // rearranged_implicit_operands = false; - // - // for (instruction.operands.items) |operand_index| { - // const operand = mir.operands.get(operand_index); - // switch (operand.u) { - // .register => |register| switch (operand.flags.type) { - // .def => switch (register.index) { - // .virtual => |virtual_register| { - // rearranged_implicit_operands = try register_allocator.defineVirtualRegister(mir, instruction_selection, instruction_index, operand_index, virtual_register, false); - // if (rearranged_implicit_operands) { - // break; - // } - // }, - // .physical => {}, - // }, - // else => {}, - // }, - // .lea64mem => |lea64mem| { - // assert(lea64mem.gp64 == null); - // assert(lea64mem.scale_reg == null); - // }, - // else => {}, - // } - // } - // } - // } - // } - // - // var operand_i = instruction.operands.items.len; - // while (operand_i > 0) { - // operand_i -= 1; - // const operand_index = instruction.operands.items[operand_i]; - // const operand = mir.operands.get(operand_index); - // var register_buffer: [2]Register = undefined; - // const registers = getRegisters(operand, ®ister_buffer); - // for (registers) |register| { - // switch (operand.flags.type) { - // .def => { - // if (operand.id == .lea64mem) unreachable; - // // TODO: missing checks - // switch (register.index) { - // .virtual => unreachable, - // .physical => |physical_register| switch (register_allocator.reserved.contains(physical_register)) { - // true => {}, - // false => { - // register_allocator.freePhysicalRegister(physical_register); - // register_allocator.unmarkUsedRegisterInInstruction(physical_register); - // }, - // }, - // } - // }, - // .use => {}, - // } - // } - // } - // } - // - // if (register_mask) { - // unreachable; - // } - // - // if (physical_register_use) { - // unreachable; - // } - // - // var undef_use = false; - // - // while (true) { - // var rearrange_implicit_operands = false; - // operand_loop: for (instruction.operands.items, 0..) |operand_index, operand_i| { - // const operand = mir.operands.get(operand_index); - // var register_buffer: [2]Register = undefined; - // const registers = getRegisters(operand, ®ister_buffer); - // - // for (registers) |register| { - // switch (operand.flags.type) { - // .use => switch (register.index) { - // .virtual => |virtual_register_index| switch (operand.flags.undef) { - // true => undef_use = true, - // false => { - // _ = register_allocator.mayLiveIn(mir, instruction_selection, virtual_register_index); - // assert(!operand.flags.internal_read); - // assert(operand.readsRegister()); - // - // if (try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register_index, @intCast(operand_i))) { - // break :operand_loop; - // } - // }, - // }, - // .physical => {}, - // }, - // .def => {}, - // } - // } - // } - // - // if (!rearrange_implicit_operands) break; - // } - // - // if (undef_use) { - // unreachable; - // } - // - // if (early_clobber) { - // unreachable; - // } - // - // if (instruction.id == .copy and instruction.operands.items.len == 2) { - // const dst_register = mir.operands.get(instruction.operands.items[0]).u.register; - // const src_register = mir.operands.get(instruction.operands.items[1]).u.register; - // - // if (std.meta.eql(dst_register, src_register)) { - // try register_allocator.coalesced.append(mir.allocator, instruction_index); - // } - // } - // } - // - // // TODO: - // // try register_allocator.reloadAtBegin(instruction_selection.current_block); - // - // // Remove coalesced instructions - // for (register_allocator.coalesced.items) |instruction_index| { - // _ = instruction_index; - // unreachable; - // } - // - // // TODO: fix debug values - // - // } - // - // print("After register allocation before clearing virtual registers:\n{}\n", .{function}); - // - // const clear_virtual_registers = true; - // if (clear_virtual_registers) { - // mir.clearVirtualRegisters(); - // } - // - // unreachable; - // } - - unreachable; + const clear_virtual_registers = true; + if (clear_virtual_registers) { + mir.clearVirtualRegisters(); + } } fn clearVirtualRegisters(mir: *MIR) void { var vr_it = mir.virtual_registers.iterator(); var vr_index = vr_it.getCurrentIndex(); + var verified_virtual_register_count: usize = 0; + var skipped: usize = 0; while (vr_it.nextPointer()) |vr| { - if (!vr.use_def_list_head.valid) { + verified_virtual_register_count += 1; + if (vr.use_def_list_head.invalid) { + skipped += 1; continue; } mir.verifyUseList(vr.use_def_list_head, vr_index); vr_index = vr_it.getCurrentIndex(); } + + logln(.codegen, .register_allocation_operand_list_verification, "Verified {} virtual registers ({} skipped)", .{ verified_virtual_register_count, skipped }); } fn verifyUseList(mir: *MIR, start_operand_index: Operand.Index, register: Register.Virtual.Index) void { @@ -3741,15 +3653,17 @@ pub const MIR = struct { .element = .operand, }).new(mir, start_operand_index); - while (iterator.next()) |operand| { + while (iterator.nextPointer()) |operand| { const instruction_index = operand.parent; - assert(instruction_index.valid); + assert(!instruction_index.invalid); const instruction = mir.instructions.get(instruction_index); - logln(.codegen, .register_allocation_operand_list_verification, "Verifying instruction #{}, operand #{}\n", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); + logln(.codegen, .register_allocation_operand_list_verification, "Verifying instruction #{}, operand #{}", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); _ = instruction; assert(operand.u == .register); assert(operand.u.register.index == .virtual and operand.u.register.index.virtual.eq(register)); } + + unreachable; } pub fn encode(mir: *MIR) !emit.Result { @@ -3806,10 +3720,7 @@ pub const MIR = struct { }, else => try writer.writeAll(@tagName(operand.u)), } - // switch (operand.u) { - // .memory => - // else => |t| @panic(@tagName(t)), - // } + if (i < instruction.operands.items.len - 1) { try writer.writeByte(','); } @@ -3873,7 +3784,7 @@ pub const MIR = struct { if (instruction == .copy) { const i = instruction_allocation.ptr.*; _ = i; - // print("Built copy: DST: {}. SRC: {}\n", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); + // print("Built copy: DST: {}. SRC: {}", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); } return instruction_allocation.index; From 22243d296c986075d76f678ac144964570f608be Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Thu, 9 Nov 2023 23:20:34 -0600 Subject: [PATCH 5/6] add encoding and elf linking --- src/Compilation.zig | 2 +- src/backend/elf.zig | 336 +++++++---- src/backend/emit.zig | 173 ++++-- src/backend/intermediate_representation.zig | 19 +- src/backend/x86_64.zig | 631 ++++++++++++++++---- src/frontend/semantic_analyzer.zig | 2 +- 6 files changed, 880 insertions(+), 283 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 6604dbd..ef81403 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -695,7 +695,7 @@ pub const Module = struct { casts: BlockList(Cast) = .{}, string_literal_types: data_structures.AutoArrayHashMap(u32, Type.Index) = .{}, array_types: data_structures.AutoArrayHashMap(Array, Type.Index) = .{}, - entry_point: ?u32 = null, + entry_point: Function.Index = Function.Index.invalid, pub const Descriptor = struct { main_package_path: []const u8, diff --git a/src/backend/elf.zig b/src/backend/elf.zig index 14fab53..7d5d188 100644 --- a/src/backend/elf.zig +++ b/src/backend/elf.zig @@ -7,58 +7,96 @@ const Allocator = data_structures.Allocator; const ArrayList = data_structures.ArrayList; const emit = @import("emit.zig"); +const page_size = 0x1000; -pub const Writer = struct { - bytes: ArrayList(u8), - allocator: Allocator, +pub fn writeToMemory(image: *emit.Result) !std.ArrayListAlignedUnmanaged(u8, page_size) { + var file = try std.ArrayListAlignedUnmanaged(u8, 0x1000).initCapacity(image.allocator, 0x100000); + _ = try image.insertSection(0, .{ + .name = "", + .size = page_size, + .alignment = page_size, + .flags = .{ + .read = true, + .write = false, + .execute = false, + }, + .type = .loadable_program, + }); - pub fn init(allocator: Allocator) !Writer { - return .{ - .bytes = try ArrayList(u8).initCapacity(allocator, 0x10000), - .allocator = allocator, - }; + const symbol_table_index = try image.addSection(.{ + .name = ".symtab", + .size = page_size, + .alignment = @alignOf(SymbolTable.Entry), + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .symbol_table, + }); + const string_table_index = try image.addSection(.{ + .name = ".strtab", + .size = page_size, + .alignment = 1, + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .string_table, + }); + const section_header_string_table_index = try image.addSection(.{ + .name = ".shstrtab", + .size = page_size, + .alignment = 1, + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .string_table, + }); + + const base_virtual_address = 0x400000; + const text_section_index = 1; + + const program_header_count = blk: { + var result: usize = 0; + for (image.sections.items) |section| { + result += @intFromBool(switch (section.type) { + .null => false, + .loadable_program => true, + .string_table => false, + .symbol_table => false, + }); + } + break :blk result; + }; + + var symbol_name_offset: u32 = 0; + + image.writeToSection(symbol_table_index, std.mem.asBytes(&SymbolTable.Entry{ + .name_offset = symbol_name_offset, + .information = 0, + .other = 0, + .section_header_index = 0, + .value = 0, + .size = 0, + })); + + image.writeToSection(string_table_index, ""); + image.writeByteToSection(string_table_index, 0); + symbol_name_offset += 1; + + for (image.sections.items) |section| { + image.writeToSection(section_header_string_table_index, section.name); + image.writeByteToSection(section_header_string_table_index, 0); } - pub fn getHeader(writer: *Writer) *Header { - return @ptrCast(@alignCast(writer.bytes.items.ptr)); - } + { + var program_segment_offset: usize = 0; - pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void { - const section_fields = @typeInfo(@TypeOf(image.sections)).Struct.fields; - const section_count = blk: { - var result: u16 = 0; - inline for (section_fields) |section_field| { - const section_size = @field(image.sections, section_field.name).index; - result += @intFromBool(section_size > 0); - } - break :blk result; - }; - - const program_header_count = section_count; - const program_start_offset = @sizeOf(Header) + program_header_count * @sizeOf(ProgramHeader); - - var section_offsets: [section_fields.len]u32 = undefined; - - const program_end_offset = blk: { - var result: u32 = program_start_offset; - inline for (section_fields, 0..) |section_field, section_index| { - const section = &@field(image.sections, section_field.name); - if (section.index > 0) { - const section_offset = std.mem.alignForward(u32, result, section.alignment); - section_offsets[section_index] = section_offset; - result = std.mem.alignForward(u32, section_offset + @as(u32, @intCast(section.index)), section.alignment); - } - } - - break :blk result; - }; - - const elf_file_end_offset = program_end_offset + @sizeOf(SectionHeader) * section_count; - try writer.bytes.resize(writer.allocator, elf_file_end_offset); - - const base_address = 0x200000; - - writer.getHeader().* = Header{ + image.writeToSection(0, std.mem.asBytes(&Header{ .endianness = .little, .machine = switch (image.target.cpu.arch) { .x86_64 => .AMD64, @@ -68,79 +106,134 @@ pub const Writer = struct { .linux => .systemv, else => unreachable, }, - .entry = base_address + section_offsets[0] + image.entry_point, - .section_header_offset = program_end_offset, - .program_header_count = program_header_count, - .section_header_count = section_count, - .name_section_header_index = 0, - }; + .entry = 0, + .section_header_offset = 0, + .program_header_count = @intCast(program_header_count), + .section_header_count = @intCast(image.sections.items.len), + .section_header_string_table_index = @intCast(section_header_string_table_index), + })); - var program_header_offset: usize = @sizeOf(Header); - var section_header_offset = program_end_offset; - inline for (section_fields, section_offsets) |section_field, section_offset| { - const section_name = section_field.name; - const section = &@field(image.sections, section_name); - if (section.index > 0) { - const program_header: *ProgramHeader = @ptrCast(@alignCast(writer.bytes.items[program_header_offset..].ptr)); - program_header.* = .{ - .type = .load, - .flags = .{ - .executable = equal(u8, section_name, "text"), - .writable = equal(u8, section_name, "data"), - .readable = true, - }, - .offset = 0, - .virtual_address = base_address, - .physical_address = base_address, - .size_in_file = section.index, - .size_in_memory = section.index, - .alignment = 0, - }; + for (image.sections.items, 0..) |section, section_index| { + switch (section.type) { + .loadable_program => { + program_segment_offset = std.mem.alignForward(usize, program_segment_offset, section.alignment); + const virtual_address = base_virtual_address + program_segment_offset; + const program_segment_size = switch (section_index) { + 0 => @sizeOf(Header) + @sizeOf(ProgramHeader) * program_header_count, + else => section.index, + }; + image.writeToSection(0, std.mem.asBytes(&ProgramHeader{ + .type = .load, + .flags = ProgramHeader.Flags{ + .executable = section.flags.execute, + .writable = section.flags.write, + .readable = section.flags.read, + }, + .offset = program_segment_offset, + .virtual_address = virtual_address, + .physical_address = virtual_address, + .size_in_file = program_segment_size, + .size_in_memory = program_segment_size, + .alignment = section.alignment, + })); - const source = section.content[0..section.index]; - const destination = writer.bytes.items[section_offset..][0..source.len]; - @memcpy(destination, source); - - const section_header: *SectionHeader = @ptrCast(@alignCast(writer.bytes.items[section_header_offset..].ptr)); - section_header.* = .{ - .name_offset = 0, - .type = .program_data, - .flags = .{ - .alloc = equal(u8, section_name, "text"), - .executable = equal(u8, section_name, "text"), - .writable = equal(u8, section_name, "data"), - }, - .address = base_address + section_offset, - .offset = section_offset, - .size = section.index, - .link = 0, - .info = 0, - .alignment = 0, - .entry_size = 0, - }; + program_segment_offset += program_segment_size; + }, + .null, + .string_table, + .symbol_table, + => {}, } } } - pub fn writeToFile(writer: *const Writer, file_path: []const u8) !void { - std.debug.print("Writing file to {s}\n", .{file_path}); - const flags = switch (@import("builtin").os.tag) { - .windows => .{}, - else => .{ - .mode = 0o777, - }, + { + var section_offset: usize = 0; + var section_headers = try ArrayList(SectionHeader).initCapacity(image.allocator, image.sections.items.len); + var section_name_offset: u32 = 0; + + for (image.sections.items, 0..) |section, section_i| { + section_offset = std.mem.alignForward(usize, section_offset, section.alignment); + const virtual_address = base_virtual_address + section_offset; + + for (section.symbol_table.keys(), section.symbol_table.values()) |symbol_name, symbol_offset| { + const symbol_address = virtual_address + symbol_offset; + image.writeToSection(symbol_table_index, std.mem.asBytes(&SymbolTable.Entry{ + .name_offset = symbol_name_offset, + .information = 0x10, + .other = 0, + .section_header_index = @intCast(section_i), + .value = symbol_address, + .size = 0, + })); + + image.writeToSection(string_table_index, symbol_name); + image.writeByteToSection(string_table_index, 0); + + symbol_name_offset += @intCast(symbol_name.len + 1); + } + + const source = section.content[0..section.index]; + file.items.len = section_offset + source.len; + try file.replaceRange(image.allocator, section_offset, source.len, source); + + section_headers.appendAssumeCapacity(SectionHeader{ + .name_offset = section_name_offset, + .type = switch (section_i) { + 0 => .null, + else => switch (section.type) { + .loadable_program => .program_data, + .string_table => .string_table, + .symbol_table => .symbol_table, + .null => .null, + }, + }, + .flags = .{ + .alloc = true, + .executable = section.flags.execute, + .writable = section.flags.write, + }, + .virtual_address = virtual_address, + .file_offset = section_offset, + .size = section.index, + .link = switch (section.type) { + .symbol_table => @intCast(string_table_index), + else => 0, + }, + .info = switch (section.type) { + .symbol_table => 1, + else => 0, + }, + .alignment = 0, + .entry_size = switch (section.type) { + .symbol_table => @sizeOf(SymbolTable.Entry), + else => 0, + }, + }); + + section_offset += section.index; + section_name_offset += @intCast(section.name.len + 1); + } + + const section_header_offset = std.mem.alignForward(usize, section_offset, @alignOf(SectionHeader)); + const section_header_bytes = std.mem.sliceAsBytes(section_headers.items); + try file.ensureTotalCapacity(image.allocator, section_header_offset + section_header_bytes.len); + file.items.len = section_header_offset + section_header_bytes.len; + try file.replaceRange(image.allocator, section_header_offset, section_header_bytes.len, section_header_bytes); + + const _start_offset = blk: { + const entry_offset = image.sections.items[text_section_index].symbol_table.values()[image.entry_point]; + const text_section_virtual_address = section_headers.items[text_section_index].virtual_address; + break :blk text_section_virtual_address + entry_offset; }; - const file_descriptor = try std.fs.cwd().createFile(file_path, flags); - try file_descriptor.writeAll(writer.bytes.items); - file_descriptor.close(); + + const header: *Header = @ptrCast(file.items.ptr); + header.section_header_offset = section_header_offset; + header.entry = _start_offset; } - pub fn writeToFileAbsolute(writer: *const Writer, absolute_file_path: []const u8) !void { - const file = try std.fs.createFileAbsolute(absolute_file_path, .{}); - defer file.close(); - try file.writeAll(writer.bytes.items); - } -}; + return file; +} const Header = extern struct { magic: u8 = 0x7f, @@ -163,7 +256,7 @@ const Header = extern struct { program_header_count: u16 = 1, section_header_size: u16 = @sizeOf(SectionHeader), section_header_count: u16, - name_section_header_index: u16, + section_header_string_table_index: u16, const BitCount = enum(u8) { @"32" = 1, @@ -197,14 +290,14 @@ const Header = extern struct { }; const ProgramHeader = extern struct { - type: Type = .load, + type: Type, flags: Flags, offset: u64, virtual_address: u64, physical_address: u64, size_in_file: u64, size_in_memory: u64, - alignment: u64 = 0, + alignment: u64, const Type = enum(u32) { null = 0, @@ -232,8 +325,8 @@ const SectionHeader = extern struct { name_offset: u32, type: Type, flags: Flags, - address: u64, - offset: u64, + virtual_address: u64, + file_offset: u64, size: u64, // section index link: u32, @@ -279,3 +372,14 @@ const SectionHeader = extern struct { _reserved: u53 = 0, }; }; + +const SymbolTable = extern struct { + const Entry = extern struct { + name_offset: u32, + information: u8, + other: u8, + section_header_index: u16, + value: u64, + size: u64, + }; +}; diff --git a/src/backend/emit.zig b/src/backend/emit.zig index ce6aca2..e1395fa 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -24,38 +24,130 @@ const jit_callconv = .SysV; const Section = struct { content: []align(page_size) u8, index: usize = 0, - alignment: u32 = 0x10, + alignment: u32, + name: []const u8, + flags: Flags, + type: Type, + symbol_table: std.StringArrayHashMapUnmanaged(u32) = .{}, + + const Type = enum { + null, + loadable_program, + string_table, + symbol_table, + }; + + const Flags = packed struct { + read: bool, + write: bool, + execute: bool, + }; }; pub const Result = struct { - sections: struct { - text: Section, - rodata: Section, - data: Section, - }, - entry_point: u32 = 0, + sections: ArrayList(Section) = .{}, + // sections: struct { + // text: Section, + // rodata: Section, + // data: Section, + // }, + entry_point: u32, target: std.Target, + allocator: Allocator, - pub fn create(target: std.Target) !Result { - return Result{ - .sections = .{ - .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, - .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, - .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, - }, + const text_section_index = 0; + + pub fn create(allocator: Allocator, target: std.Target, entry_point_index: u32) !Result { + var result = Result{ + // .sections = .{ + // .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, + // .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, + // .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, + // }, .target = target, + .allocator = allocator, + .entry_point = entry_point_index, }; + + _ = try result.addSection(.{ + .name = ".text", + .size = 0x1000, + .alignment = 0x1000, + .flags = .{ + .execute = true, + .read = true, + .write = false, + }, + .type = .loadable_program, + }); + + return result; + } + + const SectionCreation = struct { + name: []const u8, + size: usize, + alignment: u32, + flags: Section.Flags, + type: Section.Type, + }; + + pub fn addSection(result: *Result, arguments: SectionCreation) !usize { + const index = result.sections.items.len; + assert(std.mem.isAligned(arguments.size, page_size)); + + try result.sections.append(result.allocator, .{ + .content = try mmap(arguments.size, .{ .executable = arguments.flags.execute }), + .alignment = arguments.alignment, + .name = arguments.name, + .flags = arguments.flags, + .type = arguments.type, + }); + + return index; + } + + pub fn insertSection(result: *Result, index: usize, arguments: SectionCreation) !usize { + assert(std.mem.isAligned(arguments.size, page_size)); + try result.sections.insert(result.allocator, index, .{ + .content = try mmap(arguments.size, .{ .executable = arguments.flags.execute }), + .alignment = arguments.alignment, + .name = arguments.name, + .flags = arguments.flags, + .type = arguments.type, + }); + + return index; + } + + pub fn alignSection(result: *Result, index: usize, alignment: usize) void { + const index_ptr = &result.sections.items[index].index; + index_ptr.* = std.mem.alignForward(usize, index_ptr.*, alignment); + } + + pub fn writeToSection(image: *Result, section_index: usize, bytes: []const u8) void { + const section = &image.sections.items[section_index]; + const destination = section.content[section.index..][0..bytes.len]; + @memcpy(destination, bytes); + section.index += bytes.len; + } + + pub fn writeByteToSection(image: *Result, section_index: usize, byte: u8) void { + const section = &image.sections.items[section_index]; + section.content[section.index] = byte; + section.index += 1; + } + + pub fn getTextSection(result: *Result) *Section { + return &result.sections.items[0]; } pub fn appendCode(image: *Result, code: []const u8) void { - const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; - @memcpy(destination, code); - image.sections.text.index += code.len; + image.writeToSection(text_section_index, code); } pub fn appendCodeByte(image: *Result, code_byte: u8) void { - image.sections.text.content[image.sections.text.index] = code_byte; - image.sections.text.index += 1; + image.writeByteToSection(text_section_index, code_byte); } fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { @@ -70,16 +162,31 @@ pub const Result = struct { return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); } - fn writeElf(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void { - var writer = try elf.Writer.init(allocator); - try writer.writeToMemory(image); - try writer.writeToFile(executable_relative_path); + fn writeElf(image: *Result, executable_relative_path: []const u8) !void { + const file_in_memory = try elf.writeToMemory(image); + try writeFile(file_in_memory.items, executable_relative_path); } - fn writePe(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void { - var writer = try pe.Writer.init(allocator); - try writer.writeToMemory(image); - try writer.writeToFile(executable_relative_path); + fn writeFile(bytes: []const u8, path: []const u8) !void { + const flags = switch (@import("builtin").os.tag) { + .windows => .{}, + else => .{ + .mode = 0o777, + }, + }; + + const file_descriptor = try std.fs.cwd().createFile(path, flags); + try file_descriptor.writeAll(bytes); + file_descriptor.close(); + } + + fn writePe(image: *Result, executable_relative_path: []const u8) !void { + _ = executable_relative_path; + _ = image; + // var writer = try pe.Writer.init(allocator); + // try writer.writeToMemory(image); + // try writer.writeToFile(executable_relative_path); + unreachable; } }; @@ -124,15 +231,13 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { var mir = try backend.MIR.selectInstructions(allocator, intermediate, descriptor.target); try mir.allocateRegisters(); const os = descriptor.target.os.tag; - _ = os; const image = try mir.encode(); - _ = image; - // switch (os) { - // .linux => try image.writeElf(allocator, descriptor.executable_path), - // .windows => try image.writePe(allocator, descriptor.executable_path), - // else => unreachable, - // } + switch (os) { + .linux => try image.writeElf(descriptor.executable_path), + .windows => try image.writePe(descriptor.executable_path), + else => unreachable, + } }, else => { const file = try std.fs.cwd().readFileAlloc(allocator, "main", std.math.maxInt(u64)); diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 839e029..73d2d07 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -42,7 +42,7 @@ pub const Result = struct { casts: BlockList(Cast) = .{}, readonly_data: ArrayList(u8) = .{}, module: *Module, - entry_point: u32 = 0, + entry_point: Function.Index = Function.Index.invalid, pub fn getFunctionName(ir: *Result, function_index: Function.Declaration.Index) []const u8 { return ir.module.getName(ir.module.function_name_map.get(@bitCast(function_index)).?).?; @@ -58,13 +58,20 @@ pub fn initialize(compilation: *Compilation, module: *Module) !*Result { }; builder.ir.module = module; - builder.ir.entry_point = module.entry_point orelse unreachable; + var sema_function_index = function_iterator.getCurrentIndex(); while (function_iterator.next()) |sema_function| { const function_index = try builder.buildFunction(sema_function); - _ = function_index; + if (sema_function_index.eq(module.entry_point)) { + assert(!function_index.invalid); + builder.ir.entry_point = function_index; + } + + sema_function_index = function_iterator.getCurrentIndex(); } + assert(!builder.ir.entry_point.invalid); + return &builder.ir; } @@ -398,7 +405,7 @@ pub const Builder = struct { return builder.ir.function_definitions.get(builder.current_function_index); } - fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !void { + fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !Function.Index { const sema_prototype = builder.module.function_prototypes.get(builder.module.types.get(sema_function.prototype).function); const function_declaration_allocation = try builder.ir.function_declarations.addOne(builder.allocator); const function_declaration = function_declaration_allocation.ptr; @@ -425,7 +432,7 @@ pub const Builder = struct { } switch (sema_prototype.attributes.@"extern") { - true => {}, + true => return Function.Index.invalid, false => { const function_allocation = try builder.ir.function_definitions.append(builder.allocator, .{ .ir = &builder.ir, @@ -495,6 +502,8 @@ pub const Builder = struct { builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); try builder.optimizeFunction(builder.currentFunction()); + + return function_allocation.index; }, } } diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 6bf50c4..7bdda85 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -35,20 +35,23 @@ pub const Logger = enum { register_allocation_instruction_avoid_copy, register_allocation_function_after, register_allocation_operand_list_verification, + encoding, pub var bitset = std.EnumSet(Logger).initMany(&.{ .instruction_selection_ir_function, + .instruction_selection_mir_function, // .instruction_selection_register_operand_list, - .register_allocation_block, + // .register_allocation_block, // .register_allocation_problematic_hint, // .register_allocation_assignment, // .register_allocation_reload, - .register_allocation_function_before, + // .register_allocation_function_before, // .register_allocation_new_instruction, // .register_allocation_new_instruction_function_before, // .register_allocation_instruction_avoid_copy, .register_allocation_function_after, - .register_allocation_operand_list_verification, + // .register_allocation_operand_list_verification, + .encoding, }); }; @@ -1238,33 +1241,6 @@ const InstructionSelection = struct { try instruction_selection.instruction_cache.append(mir.allocator, instr); }, } - // const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); - // - // switch (integer.value.unsigned == 0) { - // true => switch (value_type) { - // .i32 => .mov32r0, - // else => |t| @panic(@tagName(t)), - // }, - // false => blk: { - // - // const destination_register = try mir.createVirtualRegister(destination_register_class); - // const destination_operand = mir.constrainOperandRegisterClass(instruction_descriptor, destination_register, 0, .{ .type = .def }); - // - // const instr = try mir.buildInstruction(instruction_selection, instruction_id, &.{ - // destination_operand, - // Operand{ - // .id = .immediate, - // .u = .{ - // .immediate = integer.value.unsigned, - // }, - // .flags = .{}, - // }, - // }); - // try instruction_selection.instruction_cache.append(mir.allocator, instr); - // - // break :blk destination_register; - // }, - // } } fn getAddressingModeFromIr(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) AddressingMode { @@ -1293,17 +1269,6 @@ const InstructionSelection = struct { unreachable; } } - // const gop = try instruction_selection.local_value_map.getOrPut(allocator, ir_instruction_index); - // if (gop.found_existing) { - // const stored_register = gop.value_ptr.*; - // if (std.meta.eql(stored_register, register)) { - // unreachable; - // } else { - // std.debug.panic("Register mismatch: Stored: {} Got: {}", .{ stored_register, register }); - // } - // } else { - // gop.value_ptr.* = register; - // } } fn lowerArguments(instruction_selection: *InstructionSelection, mir: *MIR, ir_function: *ir.Function) !void { @@ -1478,20 +1443,24 @@ const Instruction = struct { mov64mr, mov32ri, mov32ri64, + mov32rr, movsx64rm32, movsx64rr32, ret, syscall, ud2, + xor32rr, }; pub const Descriptor = struct { operands: []const Operand.Reference = &.{}, - opcode: u16 = 0, + opcode: u16, format: Format = .pseudo, - flags: Flags, + flags: Flags = .{}, + const Flags = packed struct { - implicit_def: bool, + implicit_def: bool = false, + two_byte_prefix: bool = false, }; const Format = enum { @@ -1501,6 +1470,7 @@ const Instruction = struct { mrm_dest_mem, mrm_source_mem, mrm_source_reg, + mrm_dest_reg, }; }; @@ -1738,7 +1708,7 @@ pub const Operand = struct { }; const PCRelative = union(enum) { - function_declaration: ir.Function.Declaration.Index, + function_declaration: MIR.Function.Index, string_literal: ir.StringLiteral.Index, imm32: i32, imm8: i8, @@ -1794,18 +1764,17 @@ const register_class_operand_matcher = std.EnumArray(Operand.Id, Register.Class) const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descriptor).init(.{ .call64pcrel32 = .{ .format = .no_operands, + .opcode = 0xe8, .operands = &.{ .{ .id = .i64i32imm_brtarget, .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .copy = .{ .format = .pseudo, + .opcode = 0, .operands = &.{ .{ .id = .unknown, @@ -1816,12 +1785,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .lea64r = .{ .format = .mrm_source_mem, + .opcode = 0x8d, .operands = &.{ .{ .id = .gp64, @@ -1832,24 +1799,20 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov32r0 = .{ .format = .pseudo, + .opcode = 0, .operands = &.{ .{ .id = .gp32, .kind = .dst, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov32rm = .{ .format = .mrm_source_mem, + .opcode = 0x8b, .operands = &.{ .{ .id = .gp32, @@ -1860,12 +1823,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov64rm = .{ .format = .mrm_source_mem, + .opcode = 0x8b, .operands = &.{ .{ .id = .gp64, @@ -1876,12 +1837,24 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, + }, + .mov32rr = .{ + .format = .mrm_dest_reg, + .opcode = 0x89, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, }, }, .mov32mr = .{ .format = .mrm_dest_mem, + .opcode = 0x89, .operands = &.{ .{ .id = .i32mem, @@ -1892,12 +1865,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov64mr = .{ .format = .mrm_dest_mem, + .opcode = 0x89, .operands = &.{ .{ .id = .i64mem, @@ -1908,12 +1879,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov32ri = .{ .format = .add_reg, + .opcode = 0xb8, .operands = &.{ .{ .id = .gp32, @@ -1924,12 +1893,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .mov32ri64 = .{ .format = .pseudo, + .opcode = 0, .operands = &.{ .{ .id = .gp64, @@ -1940,12 +1907,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .movsx64rm32 = .{ - .format = .mrm_source_reg, + .format = .mrm_source_mem, + .opcode = 0x63, .operands = &.{ .{ .id = .gp64, @@ -1956,12 +1921,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .movsx64rr32 = .{ .format = .mrm_source_reg, + .opcode = 0x63, .operands = &.{ .{ .id = .gp64, @@ -1972,34 +1935,45 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .ret = .{ .format = .no_operands, + .opcode = 0xc3, .operands = &.{ .{ .id = .unknown, .kind = .src, }, }, - .flags = .{ - .implicit_def = false, - }, }, .syscall = .{ .format = .no_operands, + .opcode = 0x05, .operands = &.{}, .flags = .{ - .implicit_def = false, + .two_byte_prefix = true, }, }, .ud2 = .{ .format = .no_operands, + .opcode = 0x0b, .operands = &.{}, .flags = .{ - .implicit_def = false, + .two_byte_prefix = true, + }, + }, + .xor32rr = .{ + .format = .mrm_dest_reg, + .opcode = 0x31, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, }, }, }); @@ -2052,6 +2026,8 @@ pub const MIR = struct { operands: BlockList(Operand) = .{}, instruction_selections: ArrayList(InstructionSelection) = .{}, virtual_registers: BlockList(Register.Virtual) = .{}, + function_declaration_map: std.AutoHashMapUnmanaged(ir.Function.Declaration.Index, Function.Index) = .{}, + entry_point: u32 = 0, pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !*MIR { logln(.codegen, .instruction_selection_block, "\n[INSTRUCTION SELECTION]\n", .{}); @@ -2066,15 +2042,16 @@ pub const MIR = struct { try mir.functions.ensureCapacity(allocator, intermediate.function_definitions.len); try mir.instruction_selections.ensureUnusedCapacity(allocator, intermediate.function_definitions.len); - var function_definition_iterator = intermediate.function_definitions.iterator(); + var ir_function_definition_iterator = intermediate.function_definitions.iterator(); + try mir.function_declaration_map.ensureTotalCapacity(mir.allocator, @intCast(intermediate.function_definitions.len)); - while (function_definition_iterator.nextPointer()) |ir_function| { + while (ir_function_definition_iterator.nextPointer()) |ir_function| { const fn_name = mir.ir.getFunctionName(ir_function.declaration); - logln(.codegen, .instruction_selection_ir_function, "Selecting instructions for {}", .{ir_function}); const instruction_selection = mir.instruction_selections.addOneAssumeCapacity(); const function_allocation = try mir.functions.addOne(mir.allocator); const function = function_allocation.ptr; + mir.function_declaration_map.putAssumeCapacityNoClobber(ir_function.declaration, function_allocation.index); function.* = .{ .mir = mir, .instruction_selection = instruction_selection, @@ -2083,12 +2060,29 @@ pub const MIR = struct { instruction_selection.* = .{ .function = function, }; + } + + var function_iterator = mir.functions.iterator(); + ir_function_definition_iterator = intermediate.function_definitions.iterator(); + + var entry_point: ?u32 = null; + var ir_function_index = ir_function_definition_iterator.getCurrentIndex(); + while (ir_function_definition_iterator.nextPointer()) |ir_function| { + const function_index = function_iterator.getCurrentIndex(); + const function = function_iterator.nextPointer() orelse unreachable; + logln(.codegen, .instruction_selection_ir_function, "Selecting instructions for {}", .{ir_function}); + const instruction_selection = function.instruction_selection; + + if (ir_function_index.eq(intermediate.entry_point)) { + entry_point = function_index.uniqueInteger(); + } const ir_function_declaration = mir.ir.function_declarations.get(ir_function.declaration); const calling_convention = calling_conventions.get(ir_function_declaration.calling_convention); try instruction_selection.block_map.ensureUnusedCapacity(allocator, @intCast(ir_function.blocks.items.len)); try function.blocks.ensureTotalCapacity(allocator, ir_function.blocks.items.len); + for (ir_function.blocks.items) |block| { const block_allocation = try mir.blocks.append(allocator, .{}); instruction_selection.block_map.putAssumeCapacity(block, block_allocation.index); @@ -2521,7 +2515,7 @@ pub const MIR = struct { .id = .i64i32imm_brtarget, .u = .{ .pc_relative = .{ - .function_declaration = ir_call.function, + .function_declaration = mir.function_declaration_map.get(ir_call.function).?, }, }, .flags = .{}, @@ -2605,8 +2599,12 @@ pub const MIR = struct { try instruction_selection.emitLiveInCopies(mir, function.blocks.items[0]); logln(.codegen, .instruction_selection_ir_function, "Selected instructions for {}", .{function}); + + ir_function_index = ir_function_definition_iterator.getCurrentIndex(); } + mir.entry_point = entry_point orelse unreachable; + return mir; } @@ -3666,10 +3664,337 @@ pub const MIR = struct { unreachable; } - pub fn encode(mir: *MIR) !emit.Result { - _ = mir; - // unreachable; - return undefined; + fn getGP32Encoding(operand: Operand) Encoding.GP32 { + assert(operand.id == .gp32); + const physical_register = operand.u.register.index.physical; + const gp_register_encoding: Encoding.GP32 = switch (physical_register) { + .eax => .a, + .edi => .di, + else => |t| @panic(@tagName(t)), + }; + + return gp_register_encoding; + } + + fn getGP64Encoding(operand: Operand) Encoding.GP64 { + assert(operand.id == .gp64); + const physical_register = operand.u.register.index.physical; + const gp_register_encoding: Encoding.GP64 = switch (physical_register) { + .rax => .a, + .rdi => .di, + else => |t| @panic(@tagName(t)), + }; + + return gp_register_encoding; + } + + pub fn encode(mir: *MIR) !*emit.Result { + const image = try mir.allocator.create(emit.Result); + image.* = try emit.Result.create(mir.allocator, mir.target, mir.entry_point); + + var function_iterator = mir.functions.iterator(); + + var function_offsets = std.AutoArrayHashMapUnmanaged(Function.Index, u32){}; + try function_offsets.ensureTotalCapacity(mir.allocator, mir.functions.len); + try image.sections.items[0].symbol_table.ensureTotalCapacity(mir.allocator, mir.functions.len); + + while (function_iterator.nextPointer()) |function| { + const function_index = mir.functions.indexOf(function); + logln(.codegen, .encoding, "\n{s}:", .{function.name}); + + const function_offset: u32 = @intCast(image.getTextSection().index); + + function_offsets.putAssumeCapacityNoClobber(function_index, function_offset); + image.sections.items[0].symbol_table.putAssumeCapacityNoClobber(function.name, function_offset); + + const stack_size = blk: { + var result: u32 = 0; + + for (function.instruction_selection.stack_objects.items) |stack_object| { + assert(std.mem.isAligned(result, stack_object.alignment)); + result += @intCast(stack_object.size); + } + + break :blk result; + }; + + if (stack_size != 0) { + image.appendCodeByte(0x55); // push rbp + image.appendCode(&.{ 0x48, 0x89, 0xe5 }); // mov rbp, rsp + + // sub rsp, stack_offset + if (std.math.cast(u8, stack_size)) |stack_size_u8| { + image.appendCode(&.{ 0x48, 0x83, 0xec, stack_size_u8 }); + } else { + unreachable; + } + } + + for (function.blocks.items) |block_index| { + const block = mir.blocks.get(block_index); + for (block.instructions.items) |instruction_index| { + const instruction = mir.instructions.get(instruction_index); + + const instruction_offset = image.getTextSection().index; + + switch (instruction.id) { + .mov32r0 => { + assert(instruction.operands.items.len == 1); + const operand = mir.operands.get(instruction.operands.items[0]); + const gp_register_encoding = getGP32Encoding(operand.*); + const new_instruction_id = Instruction.Id.xor32rr; + const instruction_descriptor = instruction_descriptors.get(new_instruction_id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + const direct = true; + const modrm = ModRm{ + .rm = @intCast(@intFromEnum(gp_register_encoding)), + .reg = @intCast(@intFromEnum(gp_register_encoding)), + .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), + }; + image.appendCodeByte(@bitCast(modrm)); + }, + .ret => {}, + .mov32mr => { + assert(instruction.operands.items.len == 2); + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_gp32 = getGP32Encoding(source_operand.*); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + assert(destination_operand.u == .memory); + const memory = destination_operand.u.memory; + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(source_gp32)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .mov32rm => { + assert(instruction.operands.items.len == 2); + + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_gp32 = getGP32Encoding(destination_operand.*); + + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(source_operand.u == .memory); + const source_memory = source_operand.u.memory; + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(destination_gp32)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (source_memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .mov32ri64 => { + assert(instruction.operands.items.len == 2); + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_immediate: u32 = @intCast(source_operand.u.immediate); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_gp64 = getGP64Encoding(destination_operand.*); + const destination_gp32 = switch (destination_gp64) { + inline else => |gp64| @field(Encoding.GP32, @tagName(gp64)), + }; + + const opcode = @as(u8, 0xb8) | @as(u3, @intCast(@intFromEnum(destination_gp32))); + image.appendCodeByte(opcode); + + image.appendCode(std.mem.asBytes(&source_immediate)); + }, + .movsx64rm32 => { + assert(instruction.operands.items.len == 2); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_register = getGP64Encoding(destination_operand.*); + + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_memory = source_operand.u.memory; + + const rex = Rex{ + .b = false, + .x = false, + .r = false, + .w = true, + }; + image.appendCodeByte(@bitCast(rex)); + + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(destination_register)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (source_memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .syscall => image.appendCode(&.{ 0x0f, 0x05 }), + .ud2 => image.appendCode(&.{ 0x0f, 0x0b }), + .call64pcrel32 => { + // TODO: emit relocation + assert(instruction.operands.items.len == 1); + const operand = mir.operands.get(instruction.operands.items[0]); + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + switch (operand.u) { + .pc_relative => |pc_relative| { + // TODO: fix + const callee = pc_relative.function_declaration; + const caller = function_index; + + const instruction_len = 5; + + if (callee.uniqueInteger() <= caller.uniqueInteger()) { + const callee_offset: i64 = @intCast(function_offsets.get(callee).?); + const caller_offset: i64 = @intCast(instruction_offset + instruction_len); + const offset: i32 = @intCast(callee_offset - caller_offset); + image.appendCode(std.mem.asBytes(&offset)); + } else { + image.appendCode(&.{ 0, 0, 0, 0 }); + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + } + }, + .copy => { + assert(instruction.operands.items.len == 2); + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(destination_operand.id == source_operand.id); + + // const destination_physical_register = destination_operand.u.register.index.physical; + // _ = destination_physical_register; + // const source_physical_register = source_operand.u.register.index.physical; + switch (destination_operand.id) { + .gp32 => { + image.appendCodeByte(0x89); + + const destination_register = getGP32Encoding(destination_operand.*); + const source_register = getGP32Encoding(source_operand.*); + const modrm = ModRm{ + .rm = @intCast(@intFromEnum(destination_register)), + .reg = @intCast(@intFromEnum(source_register)), + .mod = @as(u2, @intFromBool(true)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + } + + if (instruction_offset != image.getTextSection().index) { + const print_tags = true; + if (print_tags) { + var offset = @tagName(instruction.id).len + 2; + log(.codegen, .encoding, "{s}: ", .{@tagName(instruction.id)}); + const margin = 16; + while (offset < margin) : (offset += 1) { + log(.codegen, .encoding, " ", .{}); + } + } + for (image.getTextSection().content[instruction_offset..image.getTextSection().index]) |byte| { + log(.codegen, .encoding, "0x{x:0>2} ", .{byte}); + } + log(.codegen, .encoding, "\n", .{}); + } + } + } + + const last_block_index = function.blocks.items[function.blocks.items.len - 1]; + const last_block = mir.blocks.get(last_block_index); + const last_block_last_instruction_index = last_block.instructions.items[last_block.instructions.items.len - 1]; + const last_block_last_instruction = mir.instructions.get(last_block_last_instruction_index); + + if (last_block_last_instruction.id == .ret) { + if (stack_size != 0) { + // add rsp, stack_offset + if (std.math.cast(u8, stack_size)) |stack_size_u8| { + image.appendCode(&.{ 0x48, 0x83, 0xc4, stack_size_u8 }); + } else { + unreachable; + } + + image.appendCodeByte(0x5d); // pop rbp + } + + image.appendCodeByte(0xc3); + } + } + + return image; } fn getRegisterListHead(mir: *MIR, instruction_selection: *InstructionSelection, register: Register) *Operand.Index { @@ -3691,6 +4016,10 @@ pub const MIR = struct { mir: *MIR, name: []const u8, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { const function_name = function.name; try writer.print("{s}:\n", .{function_name}); @@ -3820,39 +4149,50 @@ pub const MIR = struct { } }; -// const ModRm = packed struct(u8) { -// rm: u3, -// reg: u3, -// mod: u2, -// }; +const ModRm = packed struct(u8) { + rm: u3, + reg: u3, + mod: u2, +}; -// const Rex = packed struct(u8) { -// b: bool, -// x: bool, -// r: bool, -// w: bool, -// fixed: u4 = 0b0100, -// -// fn create(args: struct { -// rm: ?GPRegister = null, -// reg: ?GPRegister = null, -// sib: bool = false, -// rm_size: ?Size = null, -// }) ?Rex { -// const rex_byte = Rex{ -// .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, -// .x = args.sib, -// .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, -// .w = if (args.rm_size) |rm_size| rm_size == .eight else false, -// }; -// -// if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { -// return rex_byte; -// } else { -// return null; -// } -// } -// }; +const Rex = packed struct(u8) { + b: bool, + x: bool, + r: bool, + w: bool, + fixed: u4 = 0b0100, + + // fn create32RR(args: struct { + // rm: Encoding.GP32, + // reg: Encoding.GP32, + // sib: bool = false, + // }) ?Rex { + // if (args.sib) { + // unreachable; + // } else { + // } + // } + + // fn create(args: struct { + // rm: ?GPRegister = null, + // reg: ?GPRegister = null, + // sib: bool = false, + // rm_size: ?Size = null, + // }) ?Rex { + // const rex_byte = Rex{ + // .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, + // .x = args.sib, + // .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, + // .w = if (args.rm_size) |rm_size| rm_size == .eight else false, + // }; + // + // if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { + // return rex_byte; + // } else { + // return null; + // } + // } +}; fn getIrType(intermediate: *ir.Result, ir_instruction_index: ir.Instruction.Index) ir.Type { const ir_instruction = intermediate.instructions.get(ir_instruction_index); @@ -3903,6 +4243,45 @@ fn getSubregistersRecursive(allocator: Allocator, set: *RegisterSet, reg: Regist } } +const Encoding = struct { + const GP32 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + }; + const GP64 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + }; +}; + const LiveRegister = struct { last_use: Instruction.Index = Instruction.Index.invalid, virtual: Register.Virtual.Index, diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 415fc4f..82ed488 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -1492,7 +1492,7 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, if (equal(u8, declaration_name, "_start")) { const value = module.values.get(decl.init_value); module.entry_point = switch (value.*) { - .function => |function_index| function_index.uniqueInteger(), + .function => |function_index| function_index, .unresolved => panic("Unresolved declaration: {s}\n", .{declaration_name}), else => |t| @panic(@tagName(t)), }; From aee9770f8b0e606680959e7698235215c323659a Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Thu, 9 Nov 2023 23:22:24 -0600 Subject: [PATCH 6/6] Exclude other OS different than Linux in CI --- .github/workflows/ci.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4e4de4..02fdcba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,8 +24,6 @@ jobs: matrix: os: [ ubuntu-latest, - windows-latest, - macos-latest ] runs-on: ${{ matrix.os }} timeout-minutes: 15 @@ -39,4 +37,7 @@ jobs: - name: Zig environment variables run: zig env - name: Test - run: zig build test -Dall --summary all + run: | + zig build test -Dall --summary all + zig build run -- test/first/main.nat + nat/first