diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4e4de4..02fdcba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,8 +24,6 @@ jobs: matrix: os: [ ubuntu-latest, - windows-latest, - macos-latest ] runs-on: ${{ matrix.os }} timeout-minutes: 15 @@ -39,4 +37,7 @@ jobs: - name: Zig environment variables run: zig env - name: Test - run: zig build test -Dall --summary all + run: | + zig build test -Dall --summary all + zig build run -- test/first/main.nat + nat/first diff --git a/.vscode/launch.json b/.vscode/launch.json index 46a561c..3fbf72a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,8 +8,10 @@ "type": "lldb", "request": "launch", "name": "Launch", - "program": "${workspaceFolder}/zig-out/bin/compiler", - "args": [], + "program": "${workspaceFolder}/zig-out/bin/nativity", + "args": [ + "test/hello_world/main.nat" + ], "cwd": "${workspaceFolder}", "preLaunchTask": "zig build" }, diff --git a/build.zig b/build.zig index e2dd39c..0ac8555 100644 --- a/build.zig +++ b/build.zig @@ -1,15 +1,18 @@ const std = @import("std"); var all: bool = false; -fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget, optimization: std.builtin.OptimizeMode, unit_tests: []const []const u8, test_step: *std.Build.Step) !void { - const name = if (all) try std.mem.concat(b.allocator, u8, &.{ "nativity_", @tagName(optimization) }) else "nativity"; +pub fn build(b: *std.Build) !void { + all = b.option(bool, "all", "All") orelse false; + const target = b.standardTargetOptions(.{}); + const optimization = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ - .name = name, + .name = "nativity", .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimization, + .use_llvm = true, + .use_lld = false, }); - b.installArtifact(exe); b.installDirectory(.{ .source_dir = std.Build.LazyPath.relative("lib"), @@ -17,100 +20,50 @@ fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget .install_subdir = "lib", }); - const run_cmd = b.addRunArtifact(exe); - - run_cmd.step.dependOn(b.getInstallStep()); - - if (b.args) |args| { - run_cmd.addArgs(args); - } - - const run_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "run_", @tagName(optimization) }) else "run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - const debug_command = addDebugCommand(b, exe); - const debug_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "debug_", @tagName(optimization) }) else "debug", "Debug the app"); - debug_step.dependOn(&debug_command.step); - const zig_tests = b.addTest(.{ .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimization, }); - const run_zig_tests = b.addRunArtifact(zig_tests); - run_zig_tests.has_side_effects = true; - test_step.dependOn(&run_zig_tests.step); + const run_command = b.addRunArtifact(exe); - for (unit_tests) |unit_test_main_source_file| { - const unit_test = b.addRunArtifact(exe); - unit_test.has_side_effects = true; - unit_test.addArg(unit_test_main_source_file); - test_step.dependOn(&unit_test.step); - } -} - -fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run { - return switch (@import("builtin").os.tag) { + const debug_command = switch (@import("builtin").os.tag) { .linux => blk: { const result = b.addSystemCommand(&.{"gf2"}); - result.addArtifactArg(artifact); - - if (artifact.kind == .@"test") { - result.addArgs(&.{ "-ex", "r" }); - } - + result.addArg("--args"); + result.addArtifactArg(exe); break :blk result; }, .windows => blk: { const result = b.addSystemCommand(&.{"remedybg"}); result.addArg("-g"); - result.addArtifactArg(artifact); + result.addArtifactArg(exe); break :blk result; }, .macos => blk: { // not tested - const result = b.addSystemCommand(&.{"gdb"}); - result.addArtifactArg(artifact); + const result = b.addSystemCommand(&.{"lldb"}); + result.addArg("--"); + result.addArtifactArg(exe); break :blk result; }, - else => @compileError("Operating system not supported"), + else => @compileError("OS not supported"), }; -} -pub fn build(b: *std.Build) !void { - all = b.option(bool, "all", "All") orelse false; + const test_command = b.addRunArtifact(zig_tests); - var unit_test_list = std.ArrayList([]const u8).init(b.allocator); - var test_dir = try std.fs.cwd().openIterableDir("test", .{ .access_sub_paths = true }); - defer test_dir.close(); - - var test_dir_iterator = test_dir.iterate(); - - while (try test_dir_iterator.next()) |entry| { - switch (entry.kind) { - .directory => { - const dir_name = entry.name; - const main_unit_test_source_file = try std.mem.concat(b.allocator, u8, &.{ "test/", dir_name, "/main.nat" }); - try unit_test_list.append(main_unit_test_source_file); - }, - .file => {}, - else => @panic("Don't put crap on test directory"), - } + if (b.args) |args| { + run_command.addArgs(args); + test_command.addArgs(args); + debug_command.addArgs(args); } - const target = b.standardTargetOptions(.{}); - const unit_tests = unit_test_list.items; + const run_step = b.step("run", "Test the Nativity compiler"); + run_step.dependOn(&run_command.step); const test_step = b.step("test", "Test the Nativity compiler"); - - if (all) { - inline for (@typeInfo(std.builtin.OptimizeMode).Enum.fields) |enum_field| { - const optimization = @field(std.builtin.OptimizeMode, enum_field.name); - try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step); - } - } else { - const optimization = b.standardOptimizeOption(.{}); - _ = try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step); - } + test_step.dependOn(&test_command.step); + const debug_step = b.step("debug", "Debug the Nativity compiler"); + debug_step.dependOn(&debug_command.step); } diff --git a/lib/std/builtin.nat b/lib/std/builtin.nat new file mode 100644 index 0000000..835cfdf --- /dev/null +++ b/lib/std/builtin.nat @@ -0,0 +1,16 @@ +const Os = enum{ + linux, + macos, + windows, +}; + +const Cpu = enum{ + aarch64, + x86_64, +}; + +const Abi = enum{ + none, + gnu, + msvc, +}; diff --git a/lib/std/os.nat b/lib/std/os.nat new file mode 100644 index 0000000..4aced36 --- /dev/null +++ b/lib/std/os.nat @@ -0,0 +1,46 @@ +const current = #import("builtin").os; +const system = switch (current) { + .linux => linux, + .macos => macos, + .windows => windows, +}; + +const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize { + switch (current) { + .linux => return #syscall(1, file_descriptor, bytes_ptr, bytes_len), + .macos => return macos.write(file_descriptor, bytes_ptr, bytes_len), + .windows => { + var written_bytes: u32 = 0; + if (windows.WriteFile(file_descriptor, bytes_ptr, bytes_len, @written_bytes, false) != 0) { + return written_bytes; + } else { + unreachable; + } + }, + } +} + +const FileDescriptor = system.FileDescriptor; + +const print = fn(bytes_ptr: [@]const u8, bytes_len: usize) void { + const file_descriptor = switch (current) { + .linux, .macos => 2, + .windows => windows.GetStdHandle(windows.STD_OUTPUT_HANDLE), + }; + + _ = write(file_descriptor, bytes_ptr, bytes_len); +} + +const exit = fn(exit_code: s32) noreturn { + switch (current) { + .linux => _ = #syscall(231, exit_code), + .macos => macos.exit(exit_code), + .windows => windows.ExitProcess(exit_code), + } + + unreachable; +} + +const linux = #import("os/linux.nat"); +const macos = #import("os/macos.nat"); +const windows = #import("os/windows.nat"); diff --git a/lib/std/os/linux.nat b/lib/std/os/linux.nat new file mode 100644 index 0000000..3183d18 --- /dev/null +++ b/lib/std/os/linux.nat @@ -0,0 +1 @@ +const FileDescriptor = s32; diff --git a/lib/std/os/macos.nat b/lib/std/os/macos.nat new file mode 100644 index 0000000..7300cab --- /dev/null +++ b/lib/std/os/macos.nat @@ -0,0 +1,3 @@ +const FileDescriptor = s32; +const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize extern; +const exit = fn (exit_code: u32) noreturn extern; diff --git a/lib/std/os/windows.nat b/lib/std/os/windows.nat new file mode 100644 index 0000000..c5f37a5 --- /dev/null +++ b/lib/std/os/windows.nat @@ -0,0 +1,3 @@ +const HANDLE = u64; +const FileDescriptor = HANDLE; +const GetStdHandle = fn(handle_descriptor: u32) HANDLE extern; diff --git a/lib/std/start.nat b/lib/std/start.nat index 031890b..3a2c6c3 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -1,9 +1,9 @@ +const std = #import("std"); comptime { _ = _start; } const _start = fn () noreturn { const result = #import("main").main(); - _ = #syscall(231, result); - unreachable; -}; + std.os.exit(result); +} diff --git a/lib/std/std.nat b/lib/std/std.nat index 3ce1556..6215821 100644 --- a/lib/std/std.nat +++ b/lib/std/std.nat @@ -2,4 +2,7 @@ comptime { _ = start; } +const builtin = #import("builtin.nat"); +const os = #import("os.nat"); +const print = os.print; const start = #import("start.nat"); diff --git a/src/Compilation.zig b/src/Compilation.zig index 3669529..ef81403 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -3,7 +3,6 @@ const Compilation = @This(); const std = @import("std"); const assert = std.debug.assert; const equal = std.mem.eql; -const print = std.debug.print; const Allocator = std.mem.Allocator; @@ -13,6 +12,7 @@ const AutoHashMap = data_structures.AutoHashMap; const BlockList = data_structures.BlockList; const HashMap = data_structures.HashMap; const SegmentedList = data_structures.SegmentedList; +const StringKeyMap = data_structures.StringKeyMap; const StringHashMap = data_structures.StringHashMap; const StringArrayHashMap = data_structures.StringArrayHashMap; @@ -39,7 +39,104 @@ build_directory: std.fs.Dir, const cache_dir_name = "cache"; const installation_dir_name = "installation"; -pub fn init(allocator: Allocator) !*Compilation { +const ArgumentParsingError = error{ + main_package_path_not_specified, +}; + +fn reportUnterminatedArgumentError(string: []const u8) noreturn { + std.debug.panic("Unterminated argument: {s}", .{string}); +} + +fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor { + const arguments = (try std.process.argsAlloc(allocator))[1..]; + + var maybe_executable_path: ?[]const u8 = null; + var maybe_main_package_path: ?[]const u8 = null; + var target_triplet: []const u8 = "x86_64-linux-gnu"; + + var i: usize = 0; + while (i < arguments.len) : (i += 1) { + const current_argument = arguments[i]; + if (equal(u8, current_argument, "-o")) { + if (i + 1 != arguments.len) { + maybe_executable_path = arguments[i + 1]; + assert(maybe_executable_path.?.len != 0); + i += 1; + } else { + reportUnterminatedArgumentError(current_argument); + } + } else if (equal(u8, current_argument, "-target")) { + if (i + 1 != arguments.len) { + target_triplet = arguments[i + 1]; + i += 1; + } else { + reportUnterminatedArgumentError(current_argument); + } + } else if (equal(u8, current_argument, "-log")) { + if (i + 1 != arguments.len) { + i += 1; + + var log_argument_iterator = std.mem.splitScalar(u8, arguments[i], ','); + + while (log_argument_iterator.next()) |log_argument| { + var log_argument_splitter = std.mem.splitScalar(u8, log_argument, '.'); + const log_scope_candidate = log_argument_splitter.next() orelse unreachable; + var recognized_scope = false; + + inline for (@typeInfo(LoggerScope).Enum.fields) |logger_scope_enum_field| { + const log_scope = @field(LoggerScope, logger_scope_enum_field.name); + + if (equal(u8, @tagName(log_scope), log_scope_candidate)) { + const LogScope = getLoggerScopeType(log_scope); + + if (log_argument_splitter.next()) |particular_log_candidate| { + var recognized_particular = false; + inline for (@typeInfo(LogScope.Logger).Enum.fields) |particular_log_field| { + const particular_log = @field(LogScope.Logger, particular_log_field.name); + + if (equal(u8, particular_log_candidate, @tagName(particular_log))) { + LogScope.Logger.bitset.setPresent(particular_log, true); + recognized_particular = true; + } + } else if (!recognized_particular) std.debug.panic("Unrecognized particular log \"{s}\" in scope {s}", .{ particular_log_candidate, @tagName(log_scope) }); + } else { + // LogScope.Logger.bitset = @TypeOf(LogScope.Logger.bitset).initFull(); + } + + logger_bitset.setPresent(log_scope, true); + + recognized_scope = true; + } + } else if (!recognized_scope) std.debug.panic("Unrecognized log scope: {s}", .{log_scope_candidate}); + } + } else { + reportUnterminatedArgumentError(current_argument); + } + } else { + maybe_main_package_path = current_argument; + } + } + + const main_package_path = maybe_main_package_path orelse return error.main_package_path_not_specified; + + const executable_path = maybe_executable_path orelse blk: { + const executable_name = std.fs.path.basename(main_package_path[0 .. main_package_path.len - "/main.nat".len]); + assert(executable_name.len > 0); + const result = try std.mem.concat(allocator, u8, &.{ "nat/", executable_name }); + break :blk result; + }; + + const cross_target = try std.zig.CrossTarget.parse(.{ .arch_os_abi = target_triplet }); + const target = cross_target.toTarget(); + + return .{ + .main_package_path = main_package_path, + .executable_path = executable_path, + .target = target, + }; +} + +pub fn init(allocator: Allocator) !void { const compilation: *Compilation = try allocator.create(Compilation); const self_exe_path = try std.fs.selfExePathAlloc(allocator); @@ -55,7 +152,9 @@ pub fn init(allocator: Allocator) !*Compilation { try compilation.build_directory.makePath(cache_dir_name); try compilation.build_directory.makePath(installation_dir_name); - return compilation; + const compilation_descriptor = try parseArguments(allocator); + + try compilation.compileModule(compilation_descriptor); } pub const Struct = struct { @@ -67,12 +166,57 @@ pub const Struct = struct { pub const Allocation = List.Allocation; }; +pub const Enum = struct { + scope: Scope.Index, + fields: ArrayList(Enum.Field.Index) = .{}, + type: Type.Index, + + pub const Field = struct { + name: u32, + value: Value.Index, + parent: Enum.Index, + + pub const List = BlockList(@This()); + pub const Index = Enum.Field.List.Index; + pub const Allocation = Enum.Field.List.Allocation; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Array = struct { + element_type: Type.Index, + element_count: u32, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + pub const Type = union(enum) { void, noreturn, bool, + type, + comptime_int, integer: Type.Integer, + slice: Slice, + pointer: Pointer, @"struct": Struct.Index, + @"enum": Enum.Index, + function: Function.Prototype.Index, + array: Array, + + const Slice = struct { + element_type: Type.Index, + }; + const Pointer = struct { + element_type: Type.Index, + many: bool, + @"const": bool, + }; pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; @@ -85,14 +229,37 @@ pub const Type = union(enum) { signed = 1, }; - pub fn getSize(integer: Type.Integer) u64 { + pub fn getSize(integer: Integer) u64 { return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); } + + pub fn getIndex(integer: Integer) Compilation.Type.Index { + return .{ + .block = 0, + .index = @ctz(integer.bit_count) - @ctz(@as(u8, 8)) + @as(u6, switch (integer.signedness) { + .signed => Compilation.HardwareSignedIntegerType.offset, + .unsigned => Compilation.HardwareUnsignedIntegerType.offset, + }), + }; + } + + // pub fn get(bit_count: u16, comptime signedness: Signedness) @This().Type(signedness) { + // _ = bit_count; + // } + + fn Type(comptime signedness: Signedness) type { + return switch (signedness) { + .unsigned => HardwareUnsignedIntegerType, + .signed => HardwareSignedIntegerType, + }; + } }; pub fn getSize(type_info: Type) u64 { return switch (type_info) { .integer => |integer| integer.getSize(), + .pointer => 8, + .comptime_int => @panic("This call should never happen"), else => |t| @panic(@tagName(t)), }; } @@ -100,9 +267,106 @@ pub const Type = union(enum) { pub fn getAlignment(type_info: Type) u64 { return switch (type_info) { .integer => |integer| @min(16, integer.getSize()), + .pointer => 8, else => |t| @panic(@tagName(t)), }; } + + pub const @"void" = FixedTypeKeyword.void.toType(); + pub const boolean = FixedTypeKeyword.bool.toType(); + pub const ssize = FixedTypeKeyword.ssize.toType(); + pub const @"usize" = FixedTypeKeyword.usize.toType(); + pub const @"noreturn" = FixedTypeKeyword.noreturn.toType(); + pub const @"type" = FixedTypeKeyword.type.toType(); + pub const @"comptime_int" = FixedTypeKeyword.comptime_int.toType(); + pub const string_literal = ExtraCommonType.string_literal.toType(); + pub const @"u8" = Type.Integer.getIndex(.{ + .bit_count = 8, + .signedness = .unsigned, + }); +}; + +// Each time an enum is added here, a corresponding insertion in the initialization must be made +pub const Values = enum { + bool_false, + bool_true, + @"unreachable", + + pub fn getIndex(value: Values) Value.Index { + const absolute: u32 = @intFromEnum(value); + const foo = @as(Value.Index, undefined); + const ElementT = @TypeOf(@field(foo, "index")); + const BlockT = @TypeOf(@field(foo, "block")); + const divider = std.math.maxInt(ElementT); + const element_index: ElementT = @intCast(absolute % divider); + const block_index: BlockT = @intCast(absolute / divider); + return .{ + .index = element_index, + .block = block_index, + }; + } +}; + +pub const Intrinsic = enum { + @"error", + import, + syscall, +}; + +pub const FixedTypeKeyword = enum { + void, + noreturn, + bool, + usize, + ssize, + type, + comptime_int, + + const offset = 0; + + fn toType(fixed_type_keyword: FixedTypeKeyword) Type.Index { + return Type.Index.fromInteger(offset + @intFromEnum(fixed_type_keyword)); + } +}; + +pub const HardwareUnsignedIntegerType = enum { + u8, + u16, + u32, + u64, + + pub const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; +}; + +pub const HardwareSignedIntegerType = enum { + s8, + s16, + s32, + s64, + + pub const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; +}; + +pub const ExtraCommonType = enum { + string_literal, + pub const offset = HardwareSignedIntegerType.offset + @typeInfo(HardwareSignedIntegerType).Enum.fields.len; + + fn toType(t: ExtraCommonType) Type.Index { + return Type.Index.fromInteger(offset + @intFromEnum(t)); + } +}; + +pub const extra_common_type_data = blk: { + var result: [@typeInfo(ExtraCommonType).Enum.fields.len]Type = undefined; + result[@intFromEnum(ExtraCommonType.string_literal)] = .{ + .pointer = .{ + .many = true, + .@"const" = true, + .element_type = Type.u8, + }, + }; + + break :blk result; }; /// A scope contains a bunch of declarations @@ -131,7 +395,14 @@ pub const Declaration = struct { scope_type: ScopeType, mutability: Mutability, init_value: Value.Index, - name: []const u8, + name: u32, + argument_index: ?u32, + type: Type.Index, + + pub const Reference = struct { + value: Declaration.Index, + type: Type.Index, + }; pub const List = BlockList(@This()); pub const Index = List.Index; @@ -139,15 +410,31 @@ pub const Declaration = struct { }; pub const Function = struct { + scope: Scope.Index, body: Block.Index, - prototype: Prototype.Index, + prototype: Type.Index, pub const Prototype = struct { - arguments: ?[]const Field.Index, + arguments: ?[]const Declaration.Index, return_type: Type.Index, + attributes: Attributes = .{}, pub const List = BlockList(@This()); pub const Index = Prototype.List.Index; + + pub const Attributes = packed struct { + @"extern": bool = false, + @"export": bool = false, + @"inline": Inline = .none, + calling_convention: CallingConvention = .system_v, + + pub const Inline = enum { + none, + suggestion_optimizer, + force_semantic, + force_optimizer, + }; + }; }; pub fn getBodyBlock(function: Function, module: *Module) *Block { @@ -168,7 +455,8 @@ pub const Block = struct { }; pub const Field = struct { - foo: u32 = 0, + name: u32, + type: Type.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -239,10 +527,23 @@ pub const Return = struct { pub const Allocation = List.Allocation; }; +pub const Cast = struct { + value: Value.Index, + type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const CallingConvention = enum { + system_v, +}; + pub const Value = union(enum) { unresolved: Unresolved, declaration: Declaration.Index, - declaration_reference: Declaration.Index, + declaration_reference: Declaration.Reference, void, bool: bool, undefined, @@ -258,35 +559,118 @@ pub const Value = union(enum) { call: Call.Index, argument_list: ArgumentList, @"return": Return.Index, + argument: Declaration.Index, + string_literal: u32, + enum_field: Enum.Field.Index, + extern_function: Function.Prototype.Index, + sign_extend: Cast.Index, + zero_extend: Cast.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; + pub const Integer = struct { + value: u64, + type: Type.Index, + signedness: Type.Integer.Signedness, + + pub fn getBitCount(integer: Integer, module: *Module) u16 { + return module.types.get(integer.type).integer.bit_count; + } + }; + pub fn isComptime(value: Value) bool { return switch (value) { - .bool, .void, .undefined, .function => true, - else => false, - }; - } - - pub fn getType(value: *Value, module: *Module) Type.Index { - return switch (value.*) { - .call => |call_index| module.calls.get(call_index).type, + .bool, .void, .undefined, .function, .type, .enum_field => true, + .integer => |integer| integer.type.eq(Type.comptime_int), + .call => false, else => |t| @panic(@tagName(t)), }; } -}; -pub const Integer = struct { - value: u64, - type: Type.Integer, + pub fn getType(value: Value, module: *Module) Type.Index { + const result = switch (value) { + .call => |call_index| module.calls.get(call_index).type, + .integer => |integer| integer.type, + .declaration_reference => |declaration_reference| declaration_reference.type, + .string_literal => |string_literal_hash| module.string_literal_types.get(@intCast(module.getStringLiteral(string_literal_hash).?.len)).?, + .type => Type.type, + .enum_field => |enum_field_index| module.enums.get(module.enum_fields.get(enum_field_index).parent).type, + .function => |function_index| module.functions.get(function_index).prototype, + else => |t| @panic(@tagName(t)), + }; + + return result; + } + + // pub fn setType(value: *Value, new_type: Type.Index) void { + // switch (value.*) { + // .integer => value.integer.type = new_type, + // else => |t| @panic(@tagName(t)), + // } + // } + const TypeCheckError = error{ + integer_size, + pointer_many_differ, + pointer_element_type_differ, + }; + + pub fn typeCheck(value: *Value, module: *Module, type_to_check_index: Type.Index) TypeCheckError!void { + const value_type_index = value.getType(module); + + if (!value_type_index.eq(type_to_check_index)) { + const value_type = module.types.get(value_type_index); + const check_type = module.types.get(type_to_check_index); + if (std.meta.activeTag(value_type.*) == std.meta.activeTag(check_type.*)) { + switch (value_type.*) { + .integer => |coercee_int| { + if (check_type.integer.getSize() < coercee_int.getSize()) { + return error.integer_size; + } + }, + .pointer => |coercee_pointer| { + if (coercee_pointer.many != check_type.pointer.many) { + return error.pointer_many_differ; + } + + if (!coercee_pointer.element_type.eq(check_type.pointer.element_type)) { + if (check_type.pointer.many) { + const coercee_element_type = module.types.get(coercee_pointer.element_type); + switch (coercee_element_type.*) { + .array => |array| if (!array.element_type.eq(check_type.pointer.element_type)) { + return error.pointer_element_type_differ; + }, + else => |t| @panic(@tagName(t)), + } + } + } + }, + else => |t| @panic(@tagName(t)), + } + } else { + switch (check_type.*) { + .integer => { + switch (value_type.*) { + .comptime_int => switch (value.*) { + .integer => value.integer.type = type_to_check_index, + .declaration_reference => value.declaration_reference.type = type_to_check_index, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + } + } + } + } }; pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, - string_table: AutoHashMap(u32, []const u8) = .{}, + string_table: StringKeyMap([]const u8) = .{}, declarations: BlockList(Declaration) = .{}, structs: BlockList(Struct) = .{}, scopes: BlockList(Scope) = .{}, @@ -301,12 +685,22 @@ pub const Module = struct { assignments: BlockList(Assignment) = .{}, syscalls: BlockList(Syscall) = .{}, calls: BlockList(Call) = .{}, - argument_list: BlockList(ArgumentList) = .{}, + argument_lists: BlockList(ArgumentList) = .{}, returns: BlockList(Return) = .{}, - entry_point: ?u32 = null, + string_literals: StringKeyMap([]const u8) = .{}, + enums: BlockList(Enum) = .{}, + enum_fields: BlockList(Enum.Field) = .{}, + function_name_map: data_structures.AutoArrayHashMap(Function.Index, u32) = .{}, + arrays: BlockList(Array) = .{}, + casts: BlockList(Cast) = .{}, + string_literal_types: data_structures.AutoArrayHashMap(u32, Type.Index) = .{}, + array_types: data_structures.AutoArrayHashMap(Array, Type.Index) = .{}, + entry_point: Function.Index = Function.Index.invalid, pub const Descriptor = struct { main_package_path: []const u8, + executable_path: []const u8, + target: std.Target, }; const ImportFileResult = struct { @@ -321,7 +715,7 @@ pub const Module = struct { }; pub fn importFile(module: *Module, allocator: Allocator, current_file_index: File.Index, import_name: []const u8) !ImportPackageResult { - print("import: '{s}'\n", .{import_name}); + logln(.compilation, .import, "import: '{s}'\n", .{import_name}); if (equal(u8, import_name, "std")) { return module.importPackage(allocator, module.main_package.dependencies.get("std").?); } @@ -343,8 +737,10 @@ pub const Module = struct { unreachable; } - const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); - const file_relative_path = std.fs.path.basename(full_path); + const current_file_relative_path_to_package_directory = std.fs.path.dirname(current_file.relative_path) orelse ""; + const import_file_relative_path = try std.fs.path.join(allocator, &.{ current_file_relative_path_to_package_directory, import_name }); + const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_file_relative_path }); + const file_relative_path = import_file_relative_path; const package = current_file.package; const import_file = try module.getFile(allocator, full_path, file_relative_path, package); @@ -358,12 +754,6 @@ pub const Module = struct { return result; } - fn lookupDeclaration(module: *Module, hashed: u32) !noreturn { - _ = hashed; - _ = module; - while (true) {} - } - fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { const path_lookup = try module.import_table.getOrPut(allocator, full_path); const file, const index = switch (path_lookup.found_existing) { @@ -380,7 +770,7 @@ pub const Module = struct { .relative_path = relative_path, .package = package, }); - std.debug.print("Adding file #{}: {s}\n", .{ file_allocation.index.uniqueInteger(), full_path }); + logln(.compilation, .new_file, "Adding file #{}: {s}\n", .{ file_allocation.index.uniqueInteger(), full_path }); path_lookup.value_ptr.* = file_allocation.ptr; // break :blk file; break :blk .{ @@ -399,6 +789,7 @@ pub const Module = struct { pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); + logln(.compilation, .import, "Import full path: {s}\n", .{full_path}); const import_file = try module.getFile(allocator, full_path, package.source_path, package); try import_file.ptr.addPackageReference(allocator, package); @@ -408,8 +799,8 @@ pub const Module = struct { }; } - pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { - _ = module; + pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file_index: File.Index) !void { + const file = module.files.get(file_index); const source_file = file.package.directory.handle.openFile(file.relative_path, .{}) catch |err| { std.debug.panic("Can't find file {s} in directory {s} for error {s}", .{ file.relative_path, file.package.directory.path, @errorName(err) }); }; @@ -425,8 +816,68 @@ pub const Module = struct { file.source_code = file_buffer[0..read_byte_count]; file.status = .loaded_into_memory; - try file.lex(allocator); - try file.parse(allocator); + try file.lex(allocator, file_index); + try file.parse(allocator, file_index); + } + + fn getString(map: *StringKeyMap([]const u8), key: u32) ?[]const u8 { + return map.getValue(key); + } + + fn addString(map: *StringKeyMap([]const u8), allocator: Allocator, string: []const u8) !u32 { + const lookup_result = try map.getOrPut(allocator, string, string); + + { + const lookup_name = map.getValue(lookup_result.key) orelse unreachable; + assert(equal(u8, lookup_name, string)); + } + + return lookup_result.key; + } + + pub fn getName(module: *Module, key: u32) ?[]const u8 { + return getString(&module.string_table, key); + } + + pub fn addName(module: *Module, allocator: Allocator, name: []const u8) !u32 { + return addString(&module.string_table, allocator, name); + } + + pub fn getStringLiteral(module: *Module, key: u32) ?[]const u8 { + return getString(&module.string_literals, key); + } + + pub fn addStringLiteral(module: *Module, allocator: Allocator, string_literal: []const u8) !u32 { + const result = addString(&module.string_literals, allocator, string_literal); + + const len: u32 = @intCast(string_literal.len); + // try analyzer.module. + const string_literal_type_gop = try module.string_literal_types.getOrPut(allocator, len); + if (!string_literal_type_gop.found_existing) { + const array = Array{ + .element_type = Type.u8, + .element_count = len, + }; + const array_type_gop = try module.array_types.getOrPut(allocator, array); + if (!array_type_gop.found_existing) { + const array_type_allocation = try module.types.append(allocator, .{ + .array = array, + }); + array_type_gop.value_ptr.* = array_type_allocation.index; + } + + const array_type_index = array_type_gop.value_ptr.*; + const pointer_type_allocation = try module.types.append(allocator, .{ + .pointer = .{ + .@"const" = true, + .many = true, + .element_type = array_type_index, + }, + }); + string_literal_type_gop.value_ptr.* = pointer_type_allocation.index; + } + + return result; } }; @@ -448,7 +899,18 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! // TODO: generate an actual file const builtin_file_name = "builtin.nat"; var cache_dir = try compilation.build_directory.openDir("cache", .{}); - const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); + const builtin_file = try cache_dir.createFile(builtin_file_name, .{}); + try builtin_file.writer().print( + \\const builtin = #import("std").builtin; + \\const cpu = builtin.Cpu.{s}; + \\const os = builtin.Os.{s}; + \\const abi = builtin.Abi.{s}; + \\ + , .{ + @tagName(descriptor.target.cpu.arch), + @tagName(descriptor.target.os.tag), + @tagName(descriptor.target.abi), + }); builtin_file.close(); const module: *Module = try compilation.base_allocator.create(Module); @@ -508,14 +970,80 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); for (module.import_table.values()) |import| { - try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); + try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, module.files.indexOf(import)); } - const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0], .{ .block = 0, .index = 0 }); + inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, switch (@field(FixedTypeKeyword, enum_field.name)) { + .usize => @unionInit(Type, "integer", .{ + .bit_count = 64, + .signedness = .unsigned, + }), + .ssize => @unionInit(Type, "integer", .{ + .bit_count = 64, + .signedness = .signed, + }), + else => @unionInit(Type, enum_field.name, {}), + }); + } - var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); + inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .unsigned, + .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { + .u8 => 8, + .u16 => 16, + .u32 => 32, + .u64 => 64, + }, + }, + }); + } - try emit.get(.x86_64).initialize(compilation.base_allocator, &ir); + inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .signed, + .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { + .s8 => 8, + .s16 => 16, + .s32 => 32, + .s64 => 64, + }, + }, + }); + } + + for (extra_common_type_data) |type_data| { + _ = try module.types.append(compilation.base_allocator, type_data); + } + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = false, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = true, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .@"unreachable" = {}, + }); + + const value_allocation = try module.values.append(compilation.base_allocator, .{ + .unresolved = .{ + .node_index = .{ .value = 0 }, + }, + }); + + try semantic_analyzer.initialize(compilation, module, packages[0], value_allocation.ptr); + + const ir = try intermediate_representation.initialize(compilation, module); + + switch (descriptor.target.cpu.arch) { + inline else => |arch| try emit.get(arch).initialize(compilation.base_allocator, ir, descriptor), + } } fn generateAST() !void {} @@ -544,6 +1072,7 @@ pub const File = struct { syntactic_analyzer_result: syntactic_analyzer.Result = undefined, package_references: ArrayList(*Package) = .{}, file_references: ArrayList(*File) = .{}, + type: Type.Index = Type.Index.invalid, relative_path: []const u8, package: *Package, @@ -569,21 +1098,84 @@ pub const File = struct { try file.file_references.append(allocator, affected); } - fn lex(file: *File, allocator: Allocator) !void { + fn lex(file: *File, allocator: Allocator, file_index: File.Index) !void { assert(file.status == .loaded_into_memory); - file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); + file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code, file_index); // if (!@import("builtin").is_test) { // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); // } file.status = .lexed; } - fn parse(file: *File, allocator: Allocator) !void { + fn parse(file: *File, allocator: Allocator, file_index: File.Index) !void { assert(file.status == .lexed); - file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); + file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code, file_index); // if (!@import("builtin").is_test) { // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); // } file.status = .parsed; } }; + +const LoggerScope = enum { + compilation, + lexer, + parser, + sema, + ir, + codegen, +}; + +const Logger = enum { + import, + new_file, + arguments, + var bitset = std.EnumSet(Logger).initEmpty(); +}; + +fn getLoggerScopeType(comptime logger_scope: LoggerScope) type { + comptime { + return switch (logger_scope) { + .compilation => @This(), + .lexer => lexical_analyzer, + .parser => syntactic_analyzer, + .sema => semantic_analyzer, + .ir => intermediate_representation, + .codegen => emit, + }; + } +} + +var logger_bitset = std.EnumSet(LoggerScope).initEmpty(); + +var writer = std.io.getStdErr().writer(); + +fn shouldLog(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger) bool { + return logger_bitset.contains(logger_scope) and getLoggerScopeType(logger_scope).Logger.bitset.contains(logger); +} + +pub fn logln(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger, comptime format: []const u8, arguments: anytype) void { + if (shouldLog(logger_scope, logger)) { + log(logger_scope, logger, format, arguments); + writer.writeByte('\n') catch unreachable; + } +} + +pub fn log(comptime logger_scope: LoggerScope, logger: getLoggerScopeType(logger_scope).Logger, comptime format: []const u8, arguments: anytype) void { + if (shouldLog(logger_scope, logger)) { + std.fmt.format(writer, format, arguments) catch unreachable; + } +} + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace, return_address: ?usize) noreturn { + const print_stack_trace = true; + switch (print_stack_trace) { + true => std.builtin.default_panic(message, stack_trace, return_address), + false => { + writer.writeAll("\nPANIC: ") catch {}; + writer.writeAll(message) catch {}; + writer.writeByte('\n') catch {}; + std.os.abort(); + }, + } +} diff --git a/src/backend/aarch64.zig b/src/backend/aarch64.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/elf.zig b/src/backend/elf.zig new file mode 100644 index 0000000..7d5d188 --- /dev/null +++ b/src/backend/elf.zig @@ -0,0 +1,385 @@ +const std = @import("std"); +const assert = std.debug.assert; +const equal = std.mem.eql; + +const data_structures = @import("../data_structures.zig"); +const Allocator = data_structures.Allocator; +const ArrayList = data_structures.ArrayList; + +const emit = @import("emit.zig"); +const page_size = 0x1000; + +pub fn writeToMemory(image: *emit.Result) !std.ArrayListAlignedUnmanaged(u8, page_size) { + var file = try std.ArrayListAlignedUnmanaged(u8, 0x1000).initCapacity(image.allocator, 0x100000); + _ = try image.insertSection(0, .{ + .name = "", + .size = page_size, + .alignment = page_size, + .flags = .{ + .read = true, + .write = false, + .execute = false, + }, + .type = .loadable_program, + }); + + const symbol_table_index = try image.addSection(.{ + .name = ".symtab", + .size = page_size, + .alignment = @alignOf(SymbolTable.Entry), + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .symbol_table, + }); + const string_table_index = try image.addSection(.{ + .name = ".strtab", + .size = page_size, + .alignment = 1, + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .string_table, + }); + const section_header_string_table_index = try image.addSection(.{ + .name = ".shstrtab", + .size = page_size, + .alignment = 1, + .flags = .{ + .read = false, + .write = false, + .execute = false, + }, + .type = .string_table, + }); + + const base_virtual_address = 0x400000; + const text_section_index = 1; + + const program_header_count = blk: { + var result: usize = 0; + for (image.sections.items) |section| { + result += @intFromBool(switch (section.type) { + .null => false, + .loadable_program => true, + .string_table => false, + .symbol_table => false, + }); + } + break :blk result; + }; + + var symbol_name_offset: u32 = 0; + + image.writeToSection(symbol_table_index, std.mem.asBytes(&SymbolTable.Entry{ + .name_offset = symbol_name_offset, + .information = 0, + .other = 0, + .section_header_index = 0, + .value = 0, + .size = 0, + })); + + image.writeToSection(string_table_index, ""); + image.writeByteToSection(string_table_index, 0); + symbol_name_offset += 1; + + for (image.sections.items) |section| { + image.writeToSection(section_header_string_table_index, section.name); + image.writeByteToSection(section_header_string_table_index, 0); + } + + { + var program_segment_offset: usize = 0; + + image.writeToSection(0, std.mem.asBytes(&Header{ + .endianness = .little, + .machine = switch (image.target.cpu.arch) { + .x86_64 => .AMD64, + else => unreachable, + }, + .os_abi = switch (image.target.os.tag) { + .linux => .systemv, + else => unreachable, + }, + .entry = 0, + .section_header_offset = 0, + .program_header_count = @intCast(program_header_count), + .section_header_count = @intCast(image.sections.items.len), + .section_header_string_table_index = @intCast(section_header_string_table_index), + })); + + for (image.sections.items, 0..) |section, section_index| { + switch (section.type) { + .loadable_program => { + program_segment_offset = std.mem.alignForward(usize, program_segment_offset, section.alignment); + const virtual_address = base_virtual_address + program_segment_offset; + const program_segment_size = switch (section_index) { + 0 => @sizeOf(Header) + @sizeOf(ProgramHeader) * program_header_count, + else => section.index, + }; + image.writeToSection(0, std.mem.asBytes(&ProgramHeader{ + .type = .load, + .flags = ProgramHeader.Flags{ + .executable = section.flags.execute, + .writable = section.flags.write, + .readable = section.flags.read, + }, + .offset = program_segment_offset, + .virtual_address = virtual_address, + .physical_address = virtual_address, + .size_in_file = program_segment_size, + .size_in_memory = program_segment_size, + .alignment = section.alignment, + })); + + program_segment_offset += program_segment_size; + }, + .null, + .string_table, + .symbol_table, + => {}, + } + } + } + + { + var section_offset: usize = 0; + var section_headers = try ArrayList(SectionHeader).initCapacity(image.allocator, image.sections.items.len); + var section_name_offset: u32 = 0; + + for (image.sections.items, 0..) |section, section_i| { + section_offset = std.mem.alignForward(usize, section_offset, section.alignment); + const virtual_address = base_virtual_address + section_offset; + + for (section.symbol_table.keys(), section.symbol_table.values()) |symbol_name, symbol_offset| { + const symbol_address = virtual_address + symbol_offset; + image.writeToSection(symbol_table_index, std.mem.asBytes(&SymbolTable.Entry{ + .name_offset = symbol_name_offset, + .information = 0x10, + .other = 0, + .section_header_index = @intCast(section_i), + .value = symbol_address, + .size = 0, + })); + + image.writeToSection(string_table_index, symbol_name); + image.writeByteToSection(string_table_index, 0); + + symbol_name_offset += @intCast(symbol_name.len + 1); + } + + const source = section.content[0..section.index]; + file.items.len = section_offset + source.len; + try file.replaceRange(image.allocator, section_offset, source.len, source); + + section_headers.appendAssumeCapacity(SectionHeader{ + .name_offset = section_name_offset, + .type = switch (section_i) { + 0 => .null, + else => switch (section.type) { + .loadable_program => .program_data, + .string_table => .string_table, + .symbol_table => .symbol_table, + .null => .null, + }, + }, + .flags = .{ + .alloc = true, + .executable = section.flags.execute, + .writable = section.flags.write, + }, + .virtual_address = virtual_address, + .file_offset = section_offset, + .size = section.index, + .link = switch (section.type) { + .symbol_table => @intCast(string_table_index), + else => 0, + }, + .info = switch (section.type) { + .symbol_table => 1, + else => 0, + }, + .alignment = 0, + .entry_size = switch (section.type) { + .symbol_table => @sizeOf(SymbolTable.Entry), + else => 0, + }, + }); + + section_offset += section.index; + section_name_offset += @intCast(section.name.len + 1); + } + + const section_header_offset = std.mem.alignForward(usize, section_offset, @alignOf(SectionHeader)); + const section_header_bytes = std.mem.sliceAsBytes(section_headers.items); + try file.ensureTotalCapacity(image.allocator, section_header_offset + section_header_bytes.len); + file.items.len = section_header_offset + section_header_bytes.len; + try file.replaceRange(image.allocator, section_header_offset, section_header_bytes.len, section_header_bytes); + + const _start_offset = blk: { + const entry_offset = image.sections.items[text_section_index].symbol_table.values()[image.entry_point]; + const text_section_virtual_address = section_headers.items[text_section_index].virtual_address; + break :blk text_section_virtual_address + entry_offset; + }; + + const header: *Header = @ptrCast(file.items.ptr); + header.section_header_offset = section_header_offset; + header.entry = _start_offset; + } + + return file; +} + +const Header = extern struct { + magic: u8 = 0x7f, + elf_id: [3]u8 = "ELF".*, + bit_count: BitCount = .@"64", + endianness: Endianness = .little, + header_version: u8 = 1, + os_abi: ABI, + abi_version: u8 = 0, + padding: [7]u8 = [_]u8{0} ** 7, + object_type: ObjectFileType = .executable, // e_type + machine: Machine, + version: u32 = 1, + entry: u64, + program_header_offset: u64 = std.mem.alignForward(u16, @sizeOf(Header), @alignOf(ProgramHeader)), + section_header_offset: u64, + flags: u32 = 0, + header_size: u16 = 0x40, + program_header_size: u16 = @sizeOf(ProgramHeader), + program_header_count: u16 = 1, + section_header_size: u16 = @sizeOf(SectionHeader), + section_header_count: u16, + section_header_string_table_index: u16, + + const BitCount = enum(u8) { + @"32" = 1, + @"64" = 2, + }; + + const ABI = enum(u8) { + systemv = 0, + }; + + const ObjectFileType = enum(u16) { + none = 0, + relocatable = 1, + executable = 2, + dynamic = 3, + core = 4, + lo_os = 0xfe00, + hi_os = 0xfeff, + lo_proc = 0xff00, + hi_proc = 0xffff, + }; + + const Machine = enum(u16) { + AMD64 = 0x3e, + }; + + const Endianness = enum(u8) { + little = 1, + big = 2, + }; +}; + +const ProgramHeader = extern struct { + type: Type, + flags: Flags, + offset: u64, + virtual_address: u64, + physical_address: u64, + size_in_file: u64, + size_in_memory: u64, + alignment: u64, + + const Type = enum(u32) { + null = 0, + load = 1, + dynamic = 2, + interpreter = 3, + note = 4, + shlib = 5, // reserved + program_header = 6, + tls = 7, + lo_os = 0x60000000, + hi_os = 0x6fffffff, + lo_proc = 0x70000000, + hi_proc = 0x7fffffff, + }; + + const Flags = packed struct(u32) { + executable: bool, + writable: bool, + readable: bool, + reserved: u29 = 0, + }; +}; +const SectionHeader = extern struct { + name_offset: u32, + type: Type, + flags: Flags, + virtual_address: u64, + file_offset: u64, + size: u64, + // section index + link: u32, + info: u32, + alignment: u64, + entry_size: u64, + + // type + const Type = enum(u32) { + null = 0, + program_data = 1, + symbol_table = 2, + string_table = 3, + relocation_entries_addends = 4, + symbol_hash_table = 5, + dynamic_linking_info = 6, + notes = 7, + program_space_no_data = 8, + relocation_entries = 9, + reserved = 10, + dynamic_linker_symbol_table = 11, + array_of_constructors = 14, + array_of_destructors = 15, + array_of_pre_constructors = 16, + section_group = 17, + extended_section_indices = 18, + number_of_defined_types = 19, + start_os_specific = 0x60000000, + }; + + const Flags = packed struct(u64) { + writable: bool, + alloc: bool, + executable: bool, + reserved: bool = false, + mergeable: bool = false, + contains_null_terminated_strings: bool = false, + info_link: bool = false, + link_order: bool = false, + os_non_conforming: bool = false, + section_group: bool = false, + tls: bool = false, + _reserved: u53 = 0, + }; +}; + +const SymbolTable = extern struct { + const Entry = extern struct { + name_offset: u32, + information: u8, + other: u8, + section_header_index: u16, + value: u64, + size: u64, + }; +}; diff --git a/src/backend/emit.zig b/src/backend/emit.zig index c35c0f3..e1395fa 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -6,83 +6,154 @@ const assert = std.debug.assert; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; +const Compilation = @import("../Compilation.zig"); + const ir = @import("intermediate_representation.zig"); const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const AutoHashMap = data_structures.AutoHashMap; +const mmap = data_structures.mmap; + +const elf = @import("elf.zig"); +const pe = @import("pe.zig"); +const macho = @import("macho.zig"); const jit_callconv = .SysV; const Section = struct { content: []align(page_size) u8, index: usize = 0, + alignment: u32, + name: []const u8, + flags: Flags, + type: Type, + symbol_table: std.StringArrayHashMapUnmanaged(u32) = .{}, + + const Type = enum { + null, + loadable_program, + string_table, + symbol_table, + }; + + const Flags = packed struct { + read: bool, + write: bool, + execute: bool, + }; }; pub const Result = struct { - sections: struct { - text: Section, - rodata: Section, - data: Section, - }, - entry_point: u32 = 0, + sections: ArrayList(Section) = .{}, + // sections: struct { + // text: Section, + // rodata: Section, + // data: Section, + // }, + entry_point: u32, + target: std.Target, + allocator: Allocator, - pub fn create() !Result { - return Result{ - .sections = .{ - .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, - .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, - .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, - }, + const text_section_index = 0; + + pub fn create(allocator: Allocator, target: std.Target, entry_point_index: u32) !Result { + var result = Result{ + // .sections = .{ + // .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, + // .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, + // .data = .{ .content = try mmap(page_size, .{ .executable = false }) }, + // }, + .target = target, + .allocator = allocator, + .entry_point = entry_point_index, }; + + _ = try result.addSection(.{ + .name = ".text", + .size = 0x1000, + .alignment = 0x1000, + .flags = .{ + .execute = true, + .read = true, + .write = false, + }, + .type = .loadable_program, + }); + + return result; } - fn mmap(size: usize, flags: packed struct { - executable: bool, - }) ![]align(page_size) u8 { - return switch (@import("builtin").os.tag) { - .windows => blk: { - const windows = std.os.windows; - break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; - }, - .linux, .macos => |os_tag| blk: { - const jit = switch (os_tag) { - .macos => 0x800, - .linux => 0, - else => unreachable, - }; - const execute_flag: switch (os_tag) { - .linux => u32, - .macos => c_int, - else => unreachable, - } = if (flags.executable) std.os.PROT.EXEC else 0; - const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); - const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit; + const SectionCreation = struct { + name: []const u8, + size: usize, + alignment: u32, + flags: Section.Flags, + type: Section.Type, + }; - break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); - }, - else => @compileError("OS not supported"), - }; + pub fn addSection(result: *Result, arguments: SectionCreation) !usize { + const index = result.sections.items.len; + assert(std.mem.isAligned(arguments.size, page_size)); + + try result.sections.append(result.allocator, .{ + .content = try mmap(arguments.size, .{ .executable = arguments.flags.execute }), + .alignment = arguments.alignment, + .name = arguments.name, + .flags = arguments.flags, + .type = arguments.type, + }); + + return index; + } + + pub fn insertSection(result: *Result, index: usize, arguments: SectionCreation) !usize { + assert(std.mem.isAligned(arguments.size, page_size)); + try result.sections.insert(result.allocator, index, .{ + .content = try mmap(arguments.size, .{ .executable = arguments.flags.execute }), + .alignment = arguments.alignment, + .name = arguments.name, + .flags = arguments.flags, + .type = arguments.type, + }); + + return index; + } + + pub fn alignSection(result: *Result, index: usize, alignment: usize) void { + const index_ptr = &result.sections.items[index].index; + index_ptr.* = std.mem.alignForward(usize, index_ptr.*, alignment); + } + + pub fn writeToSection(image: *Result, section_index: usize, bytes: []const u8) void { + const section = &image.sections.items[section_index]; + const destination = section.content[section.index..][0..bytes.len]; + @memcpy(destination, bytes); + section.index += bytes.len; + } + + pub fn writeByteToSection(image: *Result, section_index: usize, byte: u8) void { + const section = &image.sections.items[section_index]; + section.content[section.index] = byte; + section.index += 1; + } + + pub fn getTextSection(result: *Result) *Section { + return &result.sections.items[0]; } pub fn appendCode(image: *Result, code: []const u8) void { - std.debug.print("New code: ", .{}); - for (code) |byte| { - std.debug.print("0x{x} ", .{byte}); - } - std.debug.print("\n", .{}); - const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; - @memcpy(destination, code); - image.sections.text.index += code.len; + image.writeToSection(text_section_index, code); } pub fn appendCodeByte(image: *Result, code_byte: u8) void { - std.debug.print("New code: 0x{x}\n", .{code_byte}); - image.sections.text.content[image.sections.text.index] = code_byte; - image.sections.text.index += 1; + image.writeByteToSection(text_section_index, code_byte); } fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { + if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) { + data_structures.pthread_jit_write_protect_np(true); + } comptime { assert(@typeInfo(FunctionType) == .Fn); } @@ -90,6 +161,33 @@ pub const Result = struct { assert(image.sections.text.content.len > 0); return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); } + + fn writeElf(image: *Result, executable_relative_path: []const u8) !void { + const file_in_memory = try elf.writeToMemory(image); + try writeFile(file_in_memory.items, executable_relative_path); + } + + fn writeFile(bytes: []const u8, path: []const u8) !void { + const flags = switch (@import("builtin").os.tag) { + .windows => .{}, + else => .{ + .mode = 0o777, + }, + }; + + const file_descriptor = try std.fs.cwd().createFile(path, flags); + try file_descriptor.writeAll(bytes); + file_descriptor.close(); + } + + fn writePe(image: *Result, executable_relative_path: []const u8) !void { + _ = executable_relative_path; + _ = image; + // var writer = try pe.Writer.init(allocator); + // try writer.writeToMemory(image); + // try writer.writeToFile(executable_relative_path); + unreachable; + } }; pub fn InstructionSelector(comptime Instruction: type) type { @@ -114,36 +212,49 @@ pub fn InstructionSelector(comptime Instruction: type) type { }; } +const x86_64 = @import("x86_64.zig"); +const aarch64 = @import("aarch64.zig"); + +pub const Logger = x86_64.Logger; + pub fn get(comptime arch: std.Target.Cpu.Arch) type { const backend = switch (arch) { - .x86_64 => @import("x86_64.zig"), - else => @compileError("Architecture not supported"), + .x86_64 => x86_64, + .aarch64 => aarch64, + else => {}, }; return struct { - pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { - std.debug.print("Entry point: {}\n", .{intermediate.entry_point}); - var mir = try backend.MIR.generate(allocator, intermediate); - try mir.allocateRegisters(allocator, intermediate); - const result = try mir.encode(intermediate); + pub fn initialize(allocator: Allocator, intermediate: *ir.Result, descriptor: Compilation.Module.Descriptor) !void { + switch (arch) { + .x86_64 => { + var mir = try backend.MIR.selectInstructions(allocator, intermediate, descriptor.target); + try mir.allocateRegisters(); + const os = descriptor.target.os.tag; + const image = try mir.encode(); - const text_section = result.sections.text.content[0..result.sections.text.index]; - for (text_section) |byte| { - std.debug.print("0x{x}\n", .{byte}); - } - - switch (@import("builtin").os.tag) { - .linux => switch (@import("builtin").cpu.arch == arch) { - true => { - std.debug.print("Executing...\n", .{}); - const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn); - entryPoint(); - std.debug.print("This should not print...\n", .{}); - }, - false => {}, + switch (os) { + .linux => try image.writeElf(descriptor.executable_path), + .windows => try image.writePe(descriptor.executable_path), + else => unreachable, + } + }, + else => { + const file = try std.fs.cwd().readFileAlloc(allocator, "main", std.math.maxInt(u64)); + try macho.interpretFile(allocator, descriptor, file); }, - else => {}, } + + // switch (@import("builtin").os.tag) { + // .linux => switch (@import("builtin").cpu.arch == arch) { + // true => { + // const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn); + // entryPoint(); + // }, + // false => {}, + // }, + // else => {}, + // } } }; } diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index b396aa7..73d2d07 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -1,9 +1,10 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const print = std.debug.print; const Compilation = @import("../Compilation.zig"); +const log = Compilation.log; +const logln = Compilation.logln; const Module = Compilation.Module; const Package = Compilation.Package; @@ -12,46 +13,66 @@ const ArrayList = data_structures.ArrayList; const BlockList = data_structures.BlockList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; const AutoHashMap = data_structures.AutoHashMap; +const StringKeyMap = data_structures.StringKeyMap; + +pub const Logger = enum { + function, + phi_removal, + + pub var bitset = std.EnumSet(Logger).initMany(&.{ + .function, + }); +}; pub const Result = struct { blocks: BlockList(BasicBlock) = .{}, calls: BlockList(Call) = .{}, - functions: BlockList(Function) = .{}, + function_declarations: BlockList(Function.Declaration) = .{}, + function_definitions: BlockList(Function) = .{}, instructions: BlockList(Instruction) = .{}, jumps: BlockList(Jump) = .{}, loads: BlockList(Load) = .{}, phis: BlockList(Phi) = .{}, stores: BlockList(Store) = .{}, syscalls: BlockList(Syscall) = .{}, - values: BlockList(Value) = .{}, + arguments: BlockList(Argument) = .{}, + returns: BlockList(Return) = .{}, stack_references: BlockList(StackReference) = .{}, - entry_point: u32 = 0, + string_literals: BlockList(StringLiteral) = .{}, + casts: BlockList(Cast) = .{}, + readonly_data: ArrayList(u8) = .{}, + module: *Module, + entry_point: Function.Index = Function.Index.invalid, + + pub fn getFunctionName(ir: *Result, function_index: Function.Declaration.Index) []const u8 { + return ir.module.getName(ir.module.function_name_map.get(@bitCast(function_index)).?).?; + } }; -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { - _ = main_file; - _ = package; - print("\nFunction count: {}\n", .{module.functions.len}); - +pub fn initialize(compilation: *Compilation, module: *Module) !*Result { var function_iterator = module.functions.iterator(); - var builder = Builder{ + const builder = try compilation.base_allocator.create(Builder); + builder.* = .{ .allocator = compilation.base_allocator, .module = module, }; - builder.ir.entry_point = module.entry_point orelse unreachable; + builder.ir.module = module; + var sema_function_index = function_iterator.getCurrentIndex(); while (function_iterator.next()) |sema_function| { const function_index = try builder.buildFunction(sema_function); - try builder.optimizeFunction(function_index); + if (sema_function_index.eq(module.entry_point)) { + assert(!function_index.invalid); + builder.ir.entry_point = function_index; + } + + sema_function_index = function_iterator.getCurrentIndex(); } - var ir_function_iterator = builder.ir.functions.iterator(); - while (ir_function_iterator.nextPointer()) |function| { - print("\n{}\n", .{function}); - } + assert(!builder.ir.entry_point.invalid); - return builder.ir; + return &builder.ir; } pub const BasicBlock = struct { @@ -71,25 +92,20 @@ pub const BasicBlock = struct { basic_block.sealed = true; } -}; -pub const Instruction = union(enum) { - call: Call.Index, - jump: Jump.Index, - load: Load.Index, - phi: Phi.Index, - ret: Value.Index, - store: Store.Index, - syscall: Value.Index, - copy: Value.Index, - @"unreachable", - - pub const List = BlockList(@This()); - pub const Index = List.Index; + fn hasJump(basic_block: *BasicBlock, ir: *Result) bool { + if (basic_block.instructions.items.len > 0) { + const last_instruction = ir.instructions.get(basic_block.instructions.getLast()); + return switch (last_instruction.*) { + .jump => true, + else => false, + }; + } else return false; + } }; const Phi = struct { - value: Value.Index, + instruction: Instruction.Index, jump: Jump.Index, block: BasicBlock.Index, next: Phi.Index, @@ -105,27 +121,28 @@ pub const Jump = struct { }; const Syscall = struct { - arguments: ArrayList(Value.Index), + arguments: ArrayList(Instruction.Index), pub const List = BlockList(@This()); pub const Index = List.Index; }; -const Load = struct { - value: Value.Index, +pub const Load = struct { + instruction: Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; -const Store = struct { - source: Value.Index, - destination: Value.Index, +pub const Store = struct { + source: Instruction.Index, + destination: Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; pub const StackReference = struct { - size: u64, + type: Type, + count: usize = 1, alignment: u64, offset: u64, pub const List = BlockList(@This()); @@ -133,59 +150,242 @@ pub const StackReference = struct { }; pub const Call = struct { - function: Function.Index, + function: Function.Declaration.Index, + arguments: []const Instruction.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; }; -pub const Value = union(enum) { - integer: Compilation.Integer, - load: Load.Index, - call: Call.Index, - stack_reference: StackReference.Index, - phi: Phi.Index, - instruction: Instruction.Index, - syscall: Syscall.Index, +pub const Argument = struct { + type: Type, + // index: usize, pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; - pub fn isInMemory(value: Value) bool { - return switch (value) { - .integer => false, - .load => true, - .call => true, - .stack_reference => true, - .phi => unreachable, - .instruction => unreachable, - .syscall => unreachable, +pub const Return = struct { + instruction: Instruction.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Copy = struct { + foo: u64 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const Cast = struct { + value: Instruction.Index, + type: Type, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +pub const CastType = enum { + sign_extend, +}; + +pub const Type = enum { + void, + noreturn, + i8, + i16, + i32, + i64, + + fn isInteger(t: Type) bool { + return switch (t) { + .i8, + .i16, + .i32, + .i64, + => true, + .void, + .noreturn, + => false, + }; + } + + pub fn getSize(t: Type) u64 { + return switch (t) { + .i8 => @sizeOf(i8), + .i16 => @sizeOf(i16), + .i32 => @sizeOf(i32), + .i64 => @sizeOf(i64), + .void, + .noreturn, + => unreachable, + }; + } + + pub fn getAlignment(t: Type) u64 { + return switch (t) { + .i8 => @alignOf(i8), + .i16 => @alignOf(i16), + .i32 => @alignOf(i32), + .i64 => @alignOf(i64), + .void, + .noreturn, + => unreachable, }; } }; +pub const Instruction = union(enum) { + call: Call.Index, + jump: Jump.Index, + load: Load.Index, + phi: Phi.Index, + ret: Return.Index, + store: Store.Index, + syscall: Syscall.Index, + copy: Instruction.Index, + @"unreachable", + argument: Argument.Index, + load_integer: Integer, + load_string_literal: StringLiteral.Index, + stack: StackReference.Index, + sign_extend: Cast.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const StringLiteral = struct { + offset: u32, + hash: u32, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + pub const Function = struct { + declaration: Declaration.Index = Declaration.Index.invalid, blocks: ArrayList(BasicBlock.Index) = .{}, - stack_map: AutoHashMap(Compilation.Declaration.Index, Value.Index) = .{}, + stack_map: AutoHashMap(Compilation.Declaration.Index, Instruction.Index) = .{}, current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, return_phi_node: Instruction.Index = Instruction.Index.invalid, return_phi_block: BasicBlock.Index = BasicBlock.Index.invalid, ir: *Result, current_stack_offset: usize = 0, + pub const List = BlockList(@This()); pub const Index = List.Index; + pub const Declaration = struct { + definition: Function.Index = Function.Index.invalid, + arguments: AutoArrayHashMap(Compilation.Declaration.Index, Instruction.Index) = .{}, + calling_convention: Compilation.CallingConvention, + return_type: Type, + + pub const List = BlockList(@This()); + pub const Index = Declaration.List.Index; + }; + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try writer.writeAll("Function:\n"); + const function_index = function.declaration; + const sema_function_index: Compilation.Function.Index = @bitCast(function_index); + const function_name_hash = function.ir.module.function_name_map.get(sema_function_index).?; + const function_name = function.ir.module.getName(function_name_hash).?; + try writer.print("Function #{} \"{s}\"\n", .{ function_index.uniqueInteger(), function_name }); for (function.blocks.items, 0..) |block_index, function_block_index| { - try writer.print("#{}:\n", .{function_block_index}); + try writer.print("#{}: ({})\n", .{ function_block_index, block_index.uniqueInteger() }); const block = function.ir.blocks.get(block_index); for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { - try writer.print("%{}: ", .{block_instruction_index}); - const instruction = function.ir.instructions.get(instruction_index).*; - try writer.print("{s}", .{@tagName(instruction)}); + try writer.print("%{} (${}): ", .{ block_instruction_index, instruction_index.uniqueInteger() }); + const instruction = function.ir.instructions.get(instruction_index); + try writer.print("{s}", .{@tagName(instruction.*)}); + switch (instruction.*) { + .syscall => |syscall_index| { + const syscall = function.ir.syscalls.get(syscall_index); + try writer.writeAll(" ("); + for (syscall.arguments.items, 0..) |arg_index, i| { + const arg_value = function.ir.instructions.get(arg_index).*; + + try writer.print("${}: {s}", .{ i, @tagName(arg_value) }); + + if (i < syscall.arguments.items.len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(")"); + }, + .jump => |jump_index| { + const jump = function.ir.jumps.get(jump_index); + try writer.print(" ${}", .{jump.destination.uniqueInteger()}); + }, + .phi => {}, + .ret => |ret_index| { + const ret = function.ir.returns.get(ret_index); + switch (ret.instruction.invalid) { + false => { + const ret_value = function.ir.instructions.get(ret.instruction).*; + try writer.print(" {s}", .{@tagName(ret_value)}); + }, + true => try writer.writeAll(" void"), + } + }, + // .load => |load_index| { + // const load = function.ir.loads.get(load_index); + // try writer.print(" {s}", .{@tagName(function.ir.values.get(load.value).*)}); + // }, + .store => |store_index| { + const store = function.ir.stores.get(store_index); + const source = function.ir.instructions.get(store.source).*; + const destination = function.ir.instructions.get(store.destination).*; + try writer.print(" {s}, {s}", .{ @tagName(destination), @tagName(source) }); + }, + .call => |call_index| { + const call = function.ir.calls.get(call_index); + try writer.print(" ${} {s}(", .{ call.function.uniqueInteger(), function.ir.getFunctionName(call.function) }); + for (call.arguments, 0..) |arg_index, i| { + const arg_value = function.ir.instructions.get(arg_index).*; + + try writer.print("${}: {s}", .{ i, @tagName(arg_value) }); + + if (i < call.arguments.len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(")"); + }, + .load_integer => |integer| { + try writer.print(" {s} (unsigned: 0x{x}, signed {})", .{ @tagName(integer.type), integer.value.unsigned, integer.value.unsigned }); + }, + .@"unreachable" => {}, + .load_string_literal => |string_literal_index| { + const string_literal = function.ir.string_literals.get(string_literal_index); + try writer.print(" at 0x{x}", .{string_literal.offset}); + }, + .stack => |stack_index| { + const stack = function.ir.stack_references.get(stack_index); + try writer.print(" offset: {}. size: {}. alignment: {}", .{ stack.offset, stack.type.getSize(), stack.alignment }); + }, + .argument => |argument_index| { + const argument = function.ir.arguments.get(argument_index); + try writer.print("${}, size: {}. alignment: {}", .{ argument_index, argument.type.getSize(), argument.type.getAlignment() }); + }, + .sign_extend => |cast_index| { + const cast = function.ir.casts.get(cast_index); + try writer.print(" {s} ${}", .{ @tagName(cast.type), cast.value.uniqueInteger() }); + }, + .load => |load_index| { + const load = function.ir.loads.get(load_index); + try writer.print(" ${}", .{load.instruction.uniqueInteger()}); + }, + else => |t| @panic(@tagName(t)), + } + try writer.writeByte('\n'); } + try writer.writeByte('\n'); } _ = options; @@ -195,51 +395,117 @@ pub const Function = struct { pub const Builder = struct { allocator: Allocator, - ir: Result = .{}, + ir: Result = .{ + .module = undefined, + }, module: *Module, current_function_index: Function.Index = Function.Index.invalid, fn currentFunction(builder: *Builder) *Function { - return builder.ir.functions.get(builder.current_function_index); + return builder.ir.function_definitions.get(builder.current_function_index); } fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !Function.Index { - const function_allocation = try builder.ir.functions.append(builder.allocator, .{ - .ir = &builder.ir, - }); - builder.current_function_index = function_allocation.index; - const function = function_allocation.ptr; - // TODO: arguments - function.current_basic_block = try builder.newBlock(); + const sema_prototype = builder.module.function_prototypes.get(builder.module.types.get(sema_function.prototype).function); + const function_declaration_allocation = try builder.ir.function_declarations.addOne(builder.allocator); + const function_declaration = function_declaration_allocation.ptr; + function_declaration.* = .{ + .calling_convention = sema_prototype.attributes.calling_convention, + .return_type = try builder.translateType(sema_prototype.return_type), + }; - const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); - const is_noreturn = return_type.* == .noreturn; - if (!is_noreturn) { - const exit_block = try builder.newBlock(); - const phi_instruction = try builder.appendToBlock(exit_block, .{ - .phi = Phi.Index.invalid, - }); - // phi.ptr.* = .{ - // .value = Value.Index.invalid, - // .jump = Jump.Index.invalid, - // .block = exit_block, - // .next = Phi.Index.invalid, - // }; - const ret = try builder.appendToBlock(exit_block, .{ - .ret = (try builder.ir.values.append(builder.allocator, .{ - .instruction = phi_instruction, - })).index, - }); - _ = ret; - function.return_phi_node = phi_instruction; - function.return_phi_block = exit_block; + const function_decl_name = builder.ir.getFunctionName(function_declaration_allocation.index); + _ = function_decl_name; + + if (sema_prototype.arguments) |sema_arguments| { + try function_declaration.arguments.ensureTotalCapacity(builder.allocator, @intCast(sema_arguments.len)); + for (sema_arguments) |sema_argument_declaration_index| { + const sema_argument_declaration = builder.module.declarations.get(sema_argument_declaration_index); + const argument_allocation = try builder.ir.arguments.append(builder.allocator, .{ + .type = try builder.translateType(sema_argument_declaration.type), + }); + const value_allocation = try builder.ir.instructions.append(builder.allocator, .{ + .argument = argument_allocation.index, + }); + function_declaration.arguments.putAssumeCapacity(sema_argument_declaration_index, value_allocation.index); + } } - const sema_block = sema_function.getBodyBlock(builder.module); - try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); - builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + switch (sema_prototype.attributes.@"extern") { + true => return Function.Index.invalid, + false => { + const function_allocation = try builder.ir.function_definitions.append(builder.allocator, .{ + .ir = &builder.ir, + }); + const function = function_allocation.ptr; - return builder.current_function_index; + builder.current_function_index = function_allocation.index; + function.declaration = function_declaration_allocation.index; + + // TODO: arguments + function.current_basic_block = try builder.newBlock(); + + const return_type = builder.module.types.get(sema_prototype.return_type); + const is_noreturn = return_type.* == .noreturn; + + if (!is_noreturn) { + const exit_block = try builder.newBlock(); + const phi_instruction = try builder.appendToBlock(exit_block, .{ + .phi = Phi.Index.invalid, + }); + // phi.ptr.* = .{ + // .value = Value.Index.invalid, + // .jump = Jump.Index.invalid, + // .block = exit_block, + // .next = Phi.Index.invalid, + // }; + const ret = try builder.appendToBlock(exit_block, .{ + .ret = (try builder.ir.returns.append(builder.allocator, .{ + .instruction = phi_instruction, + })).index, + }); + _ = ret; + function.return_phi_node = phi_instruction; + function.return_phi_block = exit_block; + } + + try function.stack_map.ensureUnusedCapacity(builder.allocator, @intCast(function_declaration.arguments.keys().len)); + + for (function_declaration.arguments.keys(), function_declaration.arguments.values()) |sema_argument_index, ir_argument_instruction_index| { + const ir_argument_instruction = builder.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = builder.ir.arguments.get(ir_argument_instruction.argument); + + const stack_reference = try builder.stackReference(.{ + .type = ir_argument.type, + .sema = sema_argument_index, + }); + + _ = try builder.store(.{ + .source = ir_argument_instruction_index, + .destination = stack_reference, + }); + } + + const sema_block = sema_function.getBodyBlock(builder.module); + try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); + + if (!is_noreturn and sema_block.reaches_end) { + if (!builder.ir.blocks.get(builder.currentFunction().current_basic_block).hasJump(&builder.ir)) { + _ = try builder.append(.{ + .jump = try builder.jump(.{ + .source = builder.currentFunction().current_basic_block, + .destination = builder.currentFunction().return_phi_block, + }), + }); + } + } + + builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + try builder.optimizeFunction(builder.currentFunction()); + + return function_allocation.index; + }, + } } const BlockSearcher = struct { @@ -247,7 +513,7 @@ pub const Builder = struct { visited: AutoArrayHashMap(BasicBlock.Index, void) = .{}, }; - fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) ![]const BasicBlock.Index { + fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) !ArrayList(BasicBlock.Index) { var searcher = BlockSearcher{}; try searcher.to_visit.append(builder.allocator, first); try searcher.visited.put(builder.allocator, first, {}); @@ -257,44 +523,100 @@ pub const Builder = struct { const block_to_visit = builder.ir.blocks.get(block_index); const last_instruction_index = block_to_visit.instructions.items[block_to_visit.instructions.items.len - 1]; const last_instruction = builder.ir.instructions.get(last_instruction_index); - switch (last_instruction.*) { - .jump => |jump_index| { + const block_to_search = switch (last_instruction.*) { + .jump => |jump_index| blk: { const ir_jump = builder.ir.jumps.get(jump_index); assert(ir_jump.source.eq(block_index)); const new_block = ir_jump.destination; - if (searcher.visited.get(new_block) == null) { - try searcher.to_visit.append(builder.allocator, new_block); - try searcher.visited.put(builder.allocator, new_block, {}); + break :blk new_block; + }, + .call => |call_index| blk: { + const ir_call = builder.ir.calls.get(call_index); + const function_declaration_index = ir_call.function; + const function_declaration = builder.ir.function_declarations.get(function_declaration_index); + const function_definition_index = function_declaration.definition; + switch (function_definition_index.invalid) { + false => { + const function = builder.ir.function_definitions.get(function_definition_index); + const first_block = function.blocks.items[0]; + break :blk first_block; + }, + true => continue, } }, - .@"unreachable", .ret => {}, + .@"unreachable", .ret, .store => continue, else => |t| @panic(@tagName(t)), + }; + + if (searcher.visited.get(block_to_search) == null) { + try searcher.to_visit.append(builder.allocator, block_to_search); + try searcher.visited.put(builder.allocator, block_to_search, {}); } } - return searcher.visited.keys(); + var list = try ArrayList(BasicBlock.Index).initCapacity(builder.allocator, searcher.visited.keys().len); + list.appendSliceAssumeCapacity(searcher.visited.keys()); + + return list; } - fn optimizeFunction(builder: *Builder, function_index: Function.Index) !void { - const function = builder.ir.functions.get(function_index); - const reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); + fn optimizeFunction(builder: *Builder, function: *Function) !void { + // HACK + logln(.ir, .function, "\n[BEFORE OPTIMIZE]:\n{}", .{function}); + var reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); var did_something = true; while (did_something) { did_something = false; - for (reachable_blocks) |basic_block_index| { + for (reachable_blocks.items) |basic_block_index| { const basic_block = builder.ir.blocks.get(basic_block_index); for (basic_block.instructions.items) |instruction_index| { - did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks, instruction_index); + did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks.items, instruction_index); did_something = did_something or try builder.removeTrivialPhis(instruction_index); const copy = try builder.removeCopyReferences(instruction_index); did_something = did_something or copy; } + + if (basic_block.instructions.items.len > 0) { + const instruction = builder.ir.instructions.get(basic_block.instructions.getLast()); + switch (instruction.*) { + .jump => |jump_index| { + const jump_instruction = builder.ir.jumps.get(jump_index); + const source = basic_block_index; + assert(source.eq(jump_instruction.source)); + const destination = jump_instruction.destination; + + const source_index = for (function.blocks.items, 0..) |bi, index| { + if (source.eq(bi)) break index; + } else unreachable; + const destination_index = for (function.blocks.items, 0..) |bi, index| { + if (destination.eq(bi)) break index; + } else unreachable; + + if (destination_index == source_index + 1) { + const destination_block = builder.ir.blocks.get(destination); + _ = basic_block.instructions.pop(); + try basic_block.instructions.appendSlice(builder.allocator, destination_block.instructions.items); + _ = function.blocks.orderedRemove(destination_index); + const reachable_index = for (reachable_blocks.items, 0..) |bi, index| { + if (destination.eq(bi)) break index; + } else unreachable; + _ = reachable_blocks.swapRemove(reachable_index); + did_something = true; + break; + } + }, + .ret, .@"unreachable", .call => {}, + else => |t| @panic(@tagName(t)), + } + } else { + unreachable; + } } } var instructions_to_delete = ArrayList(u32){}; - for (reachable_blocks) |basic_block_index| { + for (reachable_blocks.items) |basic_block_index| { instructions_to_delete.clearRetainingCapacity(); const basic_block = builder.ir.blocks.get(basic_block_index); for (basic_block.instructions.items, 0..) |instruction_index, index| { @@ -310,6 +632,8 @@ pub const Builder = struct { _ = basic_block.instructions.orderedRemove(instruction_to_delete - deleted_instruction_count); } } + + logln(.ir, .function, "[AFTER OPTIMIZE]:\n{}", .{function}); } fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { @@ -318,10 +642,10 @@ pub const Builder = struct { .phi => blk: { var did_something = false; var head = &instruction.phi; - next: while (head.valid) { + next: while (!head.invalid) { const phi = builder.ir.phis.get(head.*); const phi_jump = builder.ir.jumps.get(phi.jump); - assert(phi_jump.source.valid); + assert(!phi_jump.source.invalid); for (reachable_blocks) |block_index| { if (phi_jump.source.eq(block_index)) { @@ -344,21 +668,21 @@ pub const Builder = struct { const instruction = builder.ir.instructions.get(instruction_index); return switch (instruction.*) { .phi => |phi_index| blk: { - const trivial_phi: ?Value.Index = trivial_blk: { - var only_value = Value.Index.invalid; + const trivial_phi: ?Instruction.Index = trivial_blk: { + var only_value = Instruction.Index.invalid; var it = phi_index; - while (it.valid) { + while (!it.invalid) { const phi = builder.ir.phis.get(it); - const phi_value = builder.ir.values.get(phi.value); + const phi_value = builder.ir.instructions.get(phi.instruction); if (phi_value.* == .phi) unreachable; // TODO: undefined - if (only_value.valid) { - if (!only_value.eq(phi.value)) { + if (!only_value.invalid) { + if (!only_value.eq(phi.instruction)) { break :trivial_blk null; } } else { - only_value = phi.value; + only_value = phi.instruction; } it = phi.next; @@ -368,7 +692,7 @@ pub const Builder = struct { }; if (trivial_phi) |trivial_value| { - if (trivial_value.valid) { + if (!trivial_value.invalid) { // Option to delete const delete = false; if (delete) { @@ -379,7 +703,10 @@ pub const Builder = struct { }; } } else { - unreachable; + logln(.ir, .phi_removal, "TODO: maybe this phi removal is wrong?", .{}); + instruction.* = .{ + .copy = trivial_value, + }; } } @@ -396,22 +723,21 @@ pub const Builder = struct { else => { var did_something = false; - const operands: []const *Value.Index = switch (instruction.*) { - .jump, .@"unreachable" => &.{}, - .ret => &.{&instruction.ret}, + const operands: []const *Instruction.Index = switch (instruction.*) { + .jump, .@"unreachable", .load_integer, .load_string_literal, .stack, .argument => &.{}, + .ret => &.{&builder.ir.returns.get(instruction.ret).instruction}, // TODO: arguments .call => blk: { - var list = ArrayList(*Value.Index){}; + var list = ArrayList(*Instruction.Index){}; break :blk list.items; }, .store => |store_index| blk: { const store_instr = builder.ir.stores.get(store_index); break :blk &.{ &store_instr.source, &store_instr.destination }; }, - .syscall => |syscall_value_index| blk: { - const syscall_value = builder.ir.values.get(syscall_value_index); - const syscall = builder.ir.syscalls.get(syscall_value.syscall); - var list = ArrayList(*Value.Index){}; + .syscall => |syscall_index| blk: { + const syscall = builder.ir.syscalls.get(syscall_index); + var list = ArrayList(*Instruction.Index){}; try list.ensureTotalCapacity(builder.allocator, syscall.arguments.items.len); for (syscall.arguments.items) |*arg| { list.appendAssumeCapacity(arg); @@ -419,24 +745,38 @@ pub const Builder = struct { break :blk list.items; }, + .sign_extend => |cast_index| blk: { + const cast = builder.ir.casts.get(cast_index); + break :blk &.{&cast.value}; + }, + .load => |load_index| blk: { + const load = builder.ir.loads.get(load_index); + break :blk &.{&load.instruction}; + }, else => |t| @panic(@tagName(t)), }; - for (operands) |operand_value_index| { - const operand_value = builder.ir.values.get(operand_value_index.*); - switch (operand_value.*) { - .instruction => |operand_instruction_index| { - const operand_instruction = builder.ir.instructions.get(operand_instruction_index); - switch (operand_instruction.*) { + for (operands) |operand_instruction_index_pointer| { + switch (operand_instruction_index_pointer.invalid) { + false => { + const operand_value = builder.ir.instructions.get(operand_instruction_index_pointer.*); + switch (operand_value.*) { .copy => |copy_value| { - operand_value_index.* = copy_value; + operand_instruction_index_pointer.* = copy_value; did_something = true; }, + .load_integer, + .stack, + .call, + .argument, + .syscall, + .sign_extend, + .load, + => {}, else => |t| @panic(@tagName(t)), } }, - .integer, .stack_reference, .call => {}, - else => |t| @panic(@tagName(t)), + true => {}, } } @@ -456,7 +796,95 @@ pub const Builder = struct { emit_exit_block: bool = true, }; - fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) error{OutOfMemory}!void { + fn emitSyscallArgument(builder: *Builder, sema_syscall_argument_value_index: Compilation.Value.Index) !Instruction.Index { + const sema_syscall_argument_value = builder.module.values.get(sema_syscall_argument_value_index); + return switch (sema_syscall_argument_value.*) { + .integer => |integer| try builder.processInteger(integer), + .sign_extend => |cast_index| try builder.processCast(cast_index, .sign_extend), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + } + + fn processCast(builder: *Builder, sema_cast_index: Compilation.Cast.Index, cast_type: CastType) !Instruction.Index { + const sema_cast = builder.module.casts.get(sema_cast_index); + const sema_source_value = builder.module.values.get(sema_cast.value); + const source_value = switch (sema_source_value.*) { + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + + const cast_allocation = try builder.ir.casts.append(builder.allocator, .{ + .value = source_value, + .type = try builder.translateType(sema_cast.type), + }); + + const result = try builder.append(@unionInit(Instruction, switch (cast_type) { + inline else => |ct| @tagName(ct), + }, cast_allocation.index)); + + return result; + } + + fn processDeclarationReferenceRaw(builder: *Builder, declaration_index: Compilation.Declaration.Index) !Instruction.Index { + const sema_declaration = builder.module.declarations.get(declaration_index); + const result = switch (sema_declaration.scope_type) { + .local => builder.currentFunction().stack_map.get(declaration_index).?, + .global => unreachable, + }; + return result; + } + + fn loadDeclarationReference(builder: *Builder, declaration_index: Compilation.Declaration.Index) !Instruction.Index { + const stack_instruction = try builder.processDeclarationReferenceRaw(declaration_index); + const load = try builder.ir.loads.append(builder.allocator, .{ + .instruction = stack_instruction, + }); + return try builder.append(.{ + .load = load.index, + }); + } + + fn processInteger(builder: *Builder, integer_value: Compilation.Value.Integer) !Instruction.Index { + const integer = Integer{ + .value = .{ + .unsigned = integer_value.value, + }, + .type = try builder.translateType(integer_value.type), + }; + assert(integer.type.isInteger()); + const load_integer = try builder.append(.{ + .load_integer = integer, + }); + return load_integer; + } + + fn processSyscall(builder: *Builder, sema_syscall_index: Compilation.Syscall.Index) anyerror!Instruction.Index { + const sema_syscall = builder.module.syscalls.get(sema_syscall_index); + var arguments = try ArrayList(Instruction.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); + + const sema_syscall_number = sema_syscall.number; + assert(!sema_syscall_number.invalid); + const number_value_index = try builder.emitSyscallArgument(sema_syscall_number); + + arguments.appendAssumeCapacity(number_value_index); + + for (sema_syscall.getArguments()) |sema_syscall_argument| { + assert(!sema_syscall_argument.invalid); + const argument_value_index = try builder.emitSyscallArgument(sema_syscall_argument); + arguments.appendAssumeCapacity(argument_value_index); + } + + // TODO: undo this mess + const syscall_allocation = try builder.ir.syscalls.append(builder.allocator, .{ + .arguments = arguments, + }); + + const instruction_index = try builder.append(.{ .syscall = syscall_allocation.index }); + return instruction_index; + } + + fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) anyerror!void { for (sema_block.statements.items) |sema_statement_index| { const sema_statement = builder.module.values.get(sema_statement_index); switch (sema_statement.*) { @@ -479,7 +907,7 @@ pub const Builder = struct { const loop_body_block = try builder.newBlock(); const loop_prologue_block = if (options.emit_exit_block) try builder.newBlock() else BasicBlock.Index.invalid; - const loop_head_block = switch (condition.valid) { + const loop_head_block = switch (!condition.invalid) { false => loop_body_block, true => unreachable, }; @@ -491,7 +919,7 @@ pub const Builder = struct { const sema_body_block = builder.module.blocks.get(sema_loop_body.block); builder.currentFunction().current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); - if (loop_prologue_block.valid) { + if (!loop_prologue_block.invalid) { builder.ir.blocks.get(loop_prologue_block).seal(); } @@ -510,56 +938,31 @@ pub const Builder = struct { unreachable; } - if (loop_prologue_block.valid) { + if (!loop_prologue_block.invalid) { builder.currentFunction().current_basic_block = loop_prologue_block; } }, - .syscall => |syscall_index| { - const sema_syscall = builder.module.syscalls.get(syscall_index); - var arguments = try ArrayList(Value.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); - - const sema_syscall_number = sema_syscall.number; - assert(sema_syscall_number.valid); - const number_value_index = try builder.emitValue(sema_syscall_number); - - arguments.appendAssumeCapacity(number_value_index); - - for (sema_syscall.getArguments()) |sema_syscall_argument| { - assert(sema_syscall_argument.valid); - var argument_value_index = try builder.emitValue(sema_syscall_argument); - arguments.appendAssumeCapacity(argument_value_index); - } - - // TODO: undo this mess - _ = try builder.append(.{ - .syscall = (try builder.ir.values.append(builder.allocator, .{ - .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ - .arguments = arguments, - })).index, - })).index, - }); - }, + .syscall => |sema_syscall_index| _ = try builder.processSyscall(sema_syscall_index), .@"unreachable" => _ = try builder.append(.{ .@"unreachable" = {}, }), .@"return" => |sema_ret_index| { const sema_ret = builder.module.returns.get(sema_ret_index); - const return_value = try builder.emitValue(sema_ret.value); + const return_value = try builder.emitReturnValue(sema_ret.value); const phi_instruction = builder.ir.instructions.get(builder.currentFunction().return_phi_node); - const phi = switch (phi_instruction.phi.valid) { - true => unreachable, - false => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, + const phi = switch (phi_instruction.phi.invalid) { + false => unreachable, + true => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, }; //builder.ir.phis.get(phi_instruction.phi); const exit_jump = try builder.jump(.{ .source = builder.currentFunction().current_basic_block, - .destination = switch (phi_instruction.phi.valid) { + .destination = switch (!phi_instruction.phi.invalid) { true => phi.block, false => builder.currentFunction().return_phi_block, }, }); - print("Previous phi: {}\n", .{phi_instruction.phi}); phi_instruction.phi = (try builder.ir.phis.append(builder.allocator, .{ - .value = return_value, + .instruction = return_value, .jump = exit_jump, .next = phi_instruction.phi, .block = phi.block, @@ -571,49 +974,83 @@ pub const Builder = struct { }, .declaration => |sema_declaration_index| { const sema_declaration = builder.module.declarations.get(sema_declaration_index); + //logln("Name: {s}\n", .{builder.module.getName(sema_declaration.name).?}); assert(sema_declaration.scope_type == .local); - const sema_init_value = builder.module.values.get(sema_declaration.init_value); - const declaration_type = builder.module.types.get(sema_init_value.getType(builder.module)); - const size = declaration_type.getSize(); - const alignment = declaration_type.getAlignment(); - const stack_offset = switch (size > 0) { - true => builder.allocateStack(size, alignment), - false => 0, - }; - var value_index = try builder.emitValue(sema_declaration.init_value); - const value = builder.ir.values.get(value_index); - print("Value: {}\n", .{value.*}); - value_index = switch (value.isInMemory()) { - false => try builder.load(value_index), - true => value_index, - }; + const declaration_type = builder.module.types.get(sema_declaration.type); + switch (declaration_type.*) { + .comptime_int => unreachable, + else => { + var value_index = try builder.emitDeclarationInitValue(sema_declaration.init_value); + const value = builder.ir.instructions.get(value_index); + value_index = switch (value.*) { + .load_integer, + .call, + => value_index, + // .call => try builder.load(value_index), + else => |t| @panic(@tagName(t)), + }; - if (stack_offset > 0) { - _ = try builder.store(.{ - .source = value_index, - .destination = try builder.stackReference(stack_offset, declaration_type.*, sema_declaration_index), - }); + const ir_type = try builder.translateType(sema_declaration.type); + _ = try builder.store(.{ + .source = value_index, + .destination = try builder.stackReference(.{ + .type = ir_type, + .sema = sema_declaration_index, + }), + }); + }, } }, + .call => |sema_call_index| _ = try builder.processCall(sema_call_index), else => |t| @panic(@tagName(t)), } } } - fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, sema_declaration: Compilation.Declaration.Index) !Value.Index { + fn emitDeclarationInitValue(builder: *Builder, declaration_init_value_index: Compilation.Value.Index) !Instruction.Index { + const declaration_init_value = builder.module.values.get(declaration_init_value_index); + return switch (declaration_init_value.*) { + .call => |call_index| try builder.processCall(call_index), + .integer => |integer| try builder.processInteger(integer), + else => |t| @panic(@tagName(t)), + }; + } + + fn emitReturnValue(builder: *Builder, return_value_index: Compilation.Value.Index) !Instruction.Index { + const return_value = builder.module.values.get(return_value_index); + return switch (return_value.*) { + .syscall => |syscall_index| try builder.processSyscall(syscall_index), + .integer => |integer| try builder.processInteger(integer), + .call => |call_index| try builder.processCall(call_index), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + else => |t| @panic(@tagName(t)), + }; + } + + fn stackReference(builder: *Builder, arguments: struct { + type: Type, + sema: Compilation.Declaration.Index, + alignment: ?u64 = null, + }) !Instruction.Index { + const size = arguments.type.getSize(); + assert(size > 0); + const alignment = if (arguments.alignment) |a| a else arguments.type.getAlignment(); + builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); + builder.currentFunction().current_stack_offset += size; + const stack_offset = builder.currentFunction().current_stack_offset; const stack_reference_allocation = try builder.ir.stack_references.append(builder.allocator, .{ .offset = stack_offset, - .size = t.getSize(), - .alignment = t.getAlignment(), + .type = arguments.type, + .alignment = alignment, }); - const value_allocation = try builder.ir.values.append(builder.allocator, .{ - .stack_reference = stack_reference_allocation.index, + const instruction_index = try builder.append(.{ + .stack = stack_reference_allocation.index, }); - try builder.currentFunction().stack_map.put(builder.allocator, sema_declaration, value_allocation.index); + try builder.currentFunction().stack_map.put(builder.allocator, arguments.sema, instruction_index); - return value_allocation.index; + return instruction_index; } fn store(builder: *Builder, descriptor: Store) !void { @@ -623,78 +1060,123 @@ pub const Builder = struct { }); } - fn allocateStack(builder: *Builder, size: u64, alignment: u64) u64 { - builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); - builder.currentFunction().current_stack_offset += size; - return builder.currentFunction().current_stack_offset; + fn emitCallArgument(builder: *Builder, call_argument_value_index: Compilation.Value.Index) !Instruction.Index { + const call_argument_value = builder.module.values.get(call_argument_value_index); + return switch (call_argument_value.*) { + .integer => |integer| try builder.processInteger(integer), + .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + .string_literal => |string_literal_index| try builder.processStringLiteral(string_literal_index), + else => |t| @panic(@tagName(t)), + }; } - fn load(builder: *Builder, value_index: Value.Index) !Value.Index { - print("Doing load!\n", .{}); + fn processCall(builder: *Builder, sema_call_index: Compilation.Call.Index) anyerror!Instruction.Index { + const sema_call = builder.module.calls.get(sema_call_index); + const sema_argument_list_index = sema_call.arguments; + const argument_list: []const Instruction.Index = switch (sema_argument_list_index.invalid) { + false => blk: { + var argument_list = ArrayList(Instruction.Index){}; + const sema_argument_list = builder.module.argument_lists.get(sema_argument_list_index); + try argument_list.ensureTotalCapacity(builder.allocator, sema_argument_list.array.items.len); + for (sema_argument_list.array.items) |sema_argument_value_index| { + const argument_value_index = try builder.emitCallArgument(sema_argument_value_index); + argument_list.appendAssumeCapacity(argument_value_index); + } + break :blk argument_list.items; + }, + true => &.{}, + }; - const load_allocation = try builder.ir.loads.append(builder.allocator, .{ - .value = value_index, + const call_index = try builder.call(.{ + .function = switch (builder.module.values.get(sema_call.value).*) { + .function => |function_index| .{ + .index = function_index.index, + .block = function_index.block, + }, + else => |t| @panic(@tagName(t)), + }, + .arguments = argument_list, }); + const instruction_index = try builder.append(.{ - .load = load_allocation.index, + .call = call_index, }); - _ = instruction_index; - const result = try builder.ir.values.append(builder.allocator, .{ - .load = load_allocation.index, - }); - return result.index; + + return instruction_index; } - fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Value.Index { - const sema_value = builder.module.values.get(sema_value_index).*; - return switch (sema_value) { + fn processStringLiteral(builder: *Builder, string_literal_hash: u32) !Instruction.Index { + const string_literal = builder.module.string_literals.getValue(string_literal_hash).?; + + const readonly_offset = builder.ir.readonly_data.items.len; + try builder.ir.readonly_data.appendSlice(builder.allocator, string_literal); + try builder.ir.readonly_data.append(builder.allocator, 0); + + const string_literal_allocation = try builder.ir.string_literals.append(builder.allocator, .{ + .offset = @intCast(readonly_offset), + .hash = string_literal_hash, + }); + const result = try builder.append(.{ + .load_string_literal = string_literal_allocation.index, + }); + + return result; + } + + // fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Instruction.Index { + // const sema_value = builder.module.values.get(sema_value_index).*; + // return switch (sema_value) { + // .integer => |integer| try builder.append(.{ + // .integer = integer, + // }), + // .call => |sema_call_index| try builder.processCall(sema_call_index), + // .declaration_reference => |sema_declaration_reference| blk: { + // }, + // .syscall => |sema_syscall_index| try builder.processSyscall(sema_syscall_index), + // .string_literal => |string_literal_hash| blk: { + // const string_literal = builder.module.string_literals.getValue(string_literal_hash).?; + // + // const readonly_offset = builder.ir.readonly_data.items.len; + // try builder.ir.readonly_data.appendSlice(builder.allocator, string_literal); + // + // const string_literal_allocation = try builder.ir.string_literals.append(builder.allocator, .{ + // .offset = @intCast(readonly_offset), + // .hash = string_literal_hash, + // }); + // break :blk try builder.append(.{ + // .string_literal = string_literal_allocation.index, + // }); + // }, + // .sign_extend => |sema_cast_index| blk: { + // const sema_sign_extend = builder.module.casts.get(sema_cast_index); + // + // const sign_extend = try builder.ir.casts.append(builder.allocator, .{ + // .value = try builder.emitValue(sema_sign_extend.value), + // .type = try builder.translateType(sema_sign_extend.type), + // }); + // + // break :blk try builder.append(.{ + // .sign_extend = sign_extend.index, + // }); + // }, + // else => |t| @panic(@tagName(t)), + // }; + // } + + fn translateType(builder: *Builder, type_index: Compilation.Type.Index) !Type { + const sema_type = builder.module.types.get(type_index); + return switch (sema_type.*) { + .integer => |integer| switch (integer.bit_count) { + 8 => .i8, + 16 => .i16, + 32 => .i32, + 64 => .i64, + else => unreachable, + }, // TODO - .integer => |integer| (try builder.ir.values.append(builder.allocator, .{ - .integer = integer, - })).index, - .call => |sema_call_index| { - const sema_call = builder.module.calls.get(sema_call_index); - const argument_list_index = sema_call.arguments; - if (argument_list_index.valid) { - unreachable; - } - - const call_index = try builder.call(.{ - .function = switch (builder.module.values.get(sema_call.value).*) { - .function => |function_index| .{ - .index = function_index.index, - .block = function_index.block, - }, - else => |t| @panic(@tagName(t)), - }, - }); - - _ = try builder.append(.{ - .call = call_index, - }); - - const value_allocation = try builder.ir.values.append(builder.allocator, .{ - .call = call_index, - }); - - return value_allocation.index; - }, - .declaration_reference => |sema_declaration_index| { - const sema_declaration = builder.module.declarations.get(sema_declaration_index); - const sema_init_value = builder.module.values.get(sema_declaration.init_value); - const init_type = sema_init_value.getType(builder.module); - _ = init_type; - switch (sema_declaration.scope_type) { - .local => { - const stack_reference = builder.currentFunction().stack_map.get(sema_declaration_index).?; - return stack_reference; - }, - .global => unreachable, - } - // switch (sema_declaration.*) { - // else => |t| @panic(@tagName(t)), - // } - }, + .pointer => .i64, + .void => .void, + .noreturn => .noreturn, else => |t| @panic(@tagName(t)), }; } @@ -707,21 +1189,19 @@ pub const Builder = struct { fn jump(builder: *Builder, descriptor: Jump) !Jump.Index { const destination_block = builder.ir.blocks.get(descriptor.destination); assert(!destination_block.sealed); + assert(!descriptor.source.invalid); const jump_allocation = try builder.ir.jumps.append(builder.allocator, descriptor); return jump_allocation.index; } fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { - assert(builder.current_function_index.valid); + assert(!builder.current_function_index.invalid); const current_function = builder.currentFunction(); - assert(current_function.current_basic_block.valid); + assert(!current_function.current_basic_block.invalid); return builder.appendToBlock(current_function.current_basic_block, instruction); } fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { - if (instruction == .phi) { - print("Adding phi: {}\n", .{instruction}); - } const instruction_allocation = try builder.ir.instructions.append(builder.allocator, instruction); try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_allocation.index); @@ -730,12 +1210,19 @@ pub const Builder = struct { fn newBlock(builder: *Builder) !BasicBlock.Index { const new_block_allocation = try builder.ir.blocks.append(builder.allocator, .{}); - const current_function = builder.ir.functions.get(builder.current_function_index); + const current_function = builder.currentFunction(); const function_block_index = current_function.blocks.items.len; + _ = function_block_index; try current_function.blocks.append(builder.allocator, new_block_allocation.index); - print("Adding block: {}\n", .{function_block_index}); - return new_block_allocation.index; } }; + +pub const Integer = struct { + value: extern union { + signed: i64, + unsigned: u64, + }, + type: Type, +}; diff --git a/src/backend/macho.zig b/src/backend/macho.zig new file mode 100644 index 0000000..fa84c7b --- /dev/null +++ b/src/backend/macho.zig @@ -0,0 +1,682 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const print = std.debug.print; + +const Compilation = @import("../Compilation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const mmap = data_structures.mmap; + +const Header = extern struct { + magic: u32 = magic, + cpu_type: CpuType, + cpu_subtype: extern union { + arm: ArmSubType, + x86: X86SubType, + }, + file_type: FileType, + load_command_count: u32, + load_command_size: u32, + flags: Flags, + reserved: u32 = 0, + + const magic = 0xfeedfacf; + + const CpuType = enum(u32) { + VAX = 0x00000001, + ROMP = 0x00000002, + NS32032 = 0x00000004, + NS32332 = 0x00000005, + MC680x0 = 0x00000006, + x86 = 0x00000007, + MIPS = 0x00000008, + NS32352 = 0x00000009, + MC98000 = 0x0000000A, + HPPA = 0x0000000B, + ARM = 0x0000000C, + MC88000 = 0x0000000D, + SPARC = 0x0000000E, + i860be = 0x0000000F, + i860_le = 0x00000010, + RS6000 = 0x00000011, + PowerPC = 0x00000012, + arm64 = 0x0000000C | abi64, + x86_64 = 0x00000007 | abi64, + + const abi64 = 0x01000000; + }; + + const ArmSubType = enum(u32) { + all = 0x00000000, + ARM_A500_ARCH = 0x00000001, + ARM_A500 = 0x00000002, + ARM_A440 = 0x00000003, + ARM_M4 = 0x00000004, + ARM_V4T = 0x00000005, + ARM_V6 = 0x00000006, + ARM_V5TEJ = 0x00000007, + ARM_XSCALE = 0x00000008, + ARM_V7 = 0x00000009, + ARM_V7F = 0x0000000A, + ARM_V7S = 0x0000000B, + ARM_V7K = 0x0000000C, + ARM_V8 = 0x0000000D, + ARM_V6M = 0x0000000E, + ARM_V7M = 0x0000000F, + ARM_V7EM = 0x00000010, + _, + }; + + const X86SubType = enum(u32) { + All = 0x00000003, + @"486" = 0x00000004, + @"486SX" = 0x00000084, + Pentium_M5 = 0x00000056, + Celeron = 0x00000067, + Celeron_Mobile = 0x00000077, + Pentium_3 = 0x00000008, + Pentium_3_M = 0x00000018, + Pentium_3_XEON = 0x00000028, + Pentium_4 = 0x0000000A, + Itanium = 0x0000000B, + Itanium_2 = 0x0000001B, + XEON = 0x0000000C, + XEON_MP = 0x0000001C, + _, + }; + + const FileType = enum(u32) { + relocatable_object = 0x00000001, + demand_paged_executable = 0x00000002, + fixed_vm_shared_library = 0x00000003, + core = 0x00000004, + preloaded_executable = 0x00000005, + dynamic_shared_library = 0x00000006, + dynamic_link_editor = 0x00000007, + dynamic_bundle = 0x00000008, + shared_library_stub = 0x00000009, + debug_companion = 0x0000000A, + x86_64_kext = 0x0000000B, + archive = 0x0000000C, + }; + + const Flags = packed struct(u32) { + no_undefined_references: bool = true, + incrementally_linked: bool = false, + dynamic_linker_input: bool = true, + dynamic_linker_bound_undefined_references: bool = false, + prebound_dynamic_undefined_references: bool = false, + split_ro_and_rw_segments: bool = false, + _: bool = false, + two_level_namespace_bindings: bool = true, + no_symbol_multiple_definition_in_subimages: bool = false, + no_dyld_prebinding_agent_notification: bool = false, + can_redo_prebinding: bool = false, + bind_two_level_namespaces_to_libraries: bool = false, + safe_to_split_sections_for_dead_code_stripping: bool = false, + canonicalized_by_unprebinding: bool = false, + final_external_weak_symbols: bool = false, + final_weak_symbols: bool = false, + all_stacks_execute_protected: bool = false, + safe_for_zero_uid: bool = false, + safe_for_ugid: bool = false, + no_check_dependent_dylibs_for_reexport: bool = false, + load_at_random_address: bool = false, + no_load_command_for_unreferenced_dylib: bool = true, + thread_local_variable_section: bool = false, + run_with_non_executable_heap: bool = false, + code_linked_for_application_use: bool = false, + nlist_external_symbols_not_all_dyld_info_symbols: bool = false, + allow_lc_min_version_macos_lc_build_version: bool = false, + reserved: u4 = 0, + dylib_only: bool = false, + }; +}; + +const UniversalHeader = extern struct { + magic: u32 = magic, + binary_count: u32, + + const magic = 0xcafebabe; +}; + +const LoadCommand = extern struct { + type: Type, + size: u32, + + const Type = enum(u32) { + segment32 = 0x01, + symbol_table = 0x02, + symbol_table_information = 0x0b, + load_dylib = 0x0c, + id_dylib = 0x0d, + load_dylinker = 0x0e, + id_dylinker = 0x0f, + optional_dynamic_library = 0x18, + segment64 = 0x19, + uuid_number = 0x1b, + code_signature = 0x1d, + compressed_linkedit_table = 0x22, + function_starts = 0x26, + data_in_code = 0x29, + source_version = 0x2a, + minimum_os_version = 0x32, + dyld_exports_trie = 0x80000033, + dyld_chained_fixups = 0x80000034, + dyld_main_entry_point = 0x80000028, + }; + + const Segment64 = extern struct { + type: Type = .segment64, + size: u32, + name: [16]u8, + address: u64, + address_size: u64, + file_offset: u64, + file_size: u64, + maximum_virtual_memory_protections: VirtualMemoryProtection, + initial_virtual_memory_protections: VirtualMemoryProtection, + section_count: u32, + flags: Flags, + + const VirtualMemoryProtection = packed struct(u32) { + read: bool, + write: bool, + execute: bool, + reserved: u29 = 0, + }; + + const Flags = packed struct(u32) { + vm_space_high_part: bool = false, + vm_fixed_library: bool = false, + no_relocation: bool = false, + protected_segment: bool = false, + read_only_after_relocations: bool = false, + reserved: u27 = 0, + }; + + const Section = extern struct { + name: [16]u8, + segment_name: [16]u8, + address: u64, + size: u64, + file_offset: u32, + alignment: u32, + relocation_file_offset: u32, + relocation_count: u32, + type: Section.Type, + reserved: u8 = 0, + flags: Section.Flags, + reserved0: u32 = 0, + reserved1: u32 = 0, + reserved2: u32 = 0, + + comptime { + assert(@sizeOf(Section) == 80); + } + + const Type = enum(u8) { + regular = 0, + only_non_lazy_symbol_pointers = 0b110, + only_lazy_symbol_pointers_only_symbol_stubs = 0b111, + zero_fill_on_demand_section = 0b1100, + only_lazy_pointers_to_lazy_loaded_dylibs = 0b10000, + }; + + const Flags = packed struct(u16) { + local_relocations: bool = false, + external_relocations: bool = false, + some_machine_instructions: bool = false, + reserved: u5 = 0, + reserved2: u1 = 0, + debug_section: bool = false, + i386_code_stubs: bool = false, + live_blocks_if_reference_live_blocks: bool = false, + no_dead_stripping: bool = false, + strip_static_symbols_dyldlink_flag: bool = false, + coalesced_symbols: bool = false, + only_machine_instructions: bool = false, + }; + }; + + fn getSize(section_count: u32) u32 { + return @sizeOf(LoadCommand.Segment64) + section_count * @sizeOf(LoadCommand.Segment64.Section); + } + }; + + const LinkeditData = extern struct { + type: Type, + size: u32 = 16, + data_offset: u32, + data_size: u32, + }; + + const SymbolTable = extern struct { + type: Type, + size: u32 = 24, + symbol_offset: u32, + symbol_count: u32, + string_table_offset: u32, + string_table_size: u32, + }; + + const SymbolTableInformation = extern struct { + type: Type, + size: u32 = 80, + local_symbol_index: u32, + local_symbol_count: u32, + external_symbol_index: u32, + external_symbol_count: u32, + undefined_symbol_index: u32, + undefined_symbol_count: u32, + content_table_offset: u32, + content_table_entry_count: u32, + module_table_offset: u32, + module_table_entry_count: u32, + referenced_symbol_table_offset: u32, + referenced_symbol_table_entry_count: u32, + indirect_symbol_table_offset: u32, + indirect_symbol_table_entry_count: u32, + external_relocation_offset: u32, + external_relocation_entry_count: u32, + local_relocation_offset: u32, + local_relocation_entry_count: u32, + }; + + const Dylinker = extern struct { + type: Type, + size: u32, + name_offset: u32 = 12, + }; + + const Dylib = extern struct { + type: Type, + size: u32, + name_offset: u32, + timestamp: u32, + current_version: u32, + compatibility_version: u32, + }; + + const Uuid = extern struct { + type: Type, + size: u32, + uuid: [16]u8, + }; + + const MinimumVersion = extern struct { + type: Type, + size: u32, + version: u32, + sdk: u32, + }; + + const SourceVersion = extern struct { + type: Type, + size: u32, + version: u64, + }; + + const EntryPoint = extern struct { + type: Type, + size: u32, + entry_offset: u64, + stack_size: u64, + }; +}; + +const Writer = struct { + items: []u8, + index: usize = 0, + address_offset: usize = 0, + file_offset: usize = 0, + load_command_size: u32, + segment_count: u16, + segment_index: u16 = 0, + segment_offset: u16 = @sizeOf(Header), + linkedit_segment_address_offset: u64 = 0, + linkedit_segment_file_offset: u64 = 0, + linkedit_segment_size: u32 = 0, + + fn getWrittenBytes(writer: *const Writer) []const u8 { + return writer.items[0..writer.index]; + } + + fn append(writer: *Writer, bytes: []const u8) void { + writer.writeBytesAt(bytes, writer.index); + writer.index += bytes.len; + } + + fn writeBytesAt(writer: *Writer, bytes: []const u8, offset: usize) void { + @memcpy(writer.items[offset..][0..bytes.len], bytes); + } + + const SegmentCreation = struct { + name: []const u8, + sections: []const SectionCreation, + protection: LoadCommand.Segment64.VirtualMemoryProtection, + }; + + const SectionCreation = struct { + name: []const u8, + bytes: []const u8, + alignment: u32 = 1, + flags: LoadCommand.Segment64.Section.Flags, + }; + + fn writeSegment(writer: *Writer, descriptor: SegmentCreation) void { + assert(writer.segment_index < writer.segment_count); + defer writer.segment_index += 1; + + const segment_name = blk: { + var result = [1]u8{0} ** 16; + @memcpy(result[0..descriptor.name.len], descriptor.name); + break :blk result; + }; + + if (equal(u8, descriptor.name, "__PAGEZERO")) { + assert(writer.segment_offset == @sizeOf(Header)); + const address_size = 4 * 1024 * 1024 * 1024; + writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64{ + .size = @sizeOf(LoadCommand.Segment64), + .name = segment_name, + .address = 0, + .address_size = address_size, + .file_offset = 0, + .file_size = 0, + .maximum_virtual_memory_protections = descriptor.protection, + .initial_virtual_memory_protections = descriptor.protection, + .section_count = @intCast(descriptor.sections.len), + .flags = .{}, + }), writer.segment_offset); + + writer.address_offset += address_size; + writer.segment_offset += @sizeOf(LoadCommand.Segment64); + } else if (equal(u8, descriptor.name, "__TEXT")) { + const original_offset = writer.segment_offset; + assert(original_offset == @sizeOf(Header) + @sizeOf(LoadCommand.Segment64)); + writer.segment_offset += @sizeOf(LoadCommand.Segment64); + + const text_metadata_offset = @sizeOf(Header) + writer.load_command_size; + var section_address_offset = writer.address_offset + text_metadata_offset; + var section_file_offset = writer.file_offset + text_metadata_offset; + + for (descriptor.sections) |section| { + section_address_offset = std.mem.alignForward(usize, section_address_offset, section.alignment); + section_file_offset = std.mem.alignForward(usize, section_file_offset, section.alignment); + + writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64.Section{ + .name = blk: { + var result = [1]u8{0} ** 16; + @memcpy(result[0..section.name.len], section.name); + break :blk result; + }, + .segment_name = segment_name, + .address = section_address_offset, + .size = section.bytes.len, + .file_offset = @intCast(section_file_offset), + .alignment = std.math.log2(section.alignment), + .relocation_file_offset = 0, + .relocation_count = 0, + .type = .regular, + .flags = section.flags, + }), writer.segment_offset); + + @memcpy(writer.items[section_file_offset..][0..section.bytes.len], section.bytes); + + section_address_offset += section.bytes.len; + section_file_offset += section.bytes.len; + + writer.segment_offset += @sizeOf(LoadCommand.Segment64.Section); + } + + const end_segment_offset = writer.segment_offset; + writer.segment_offset = original_offset; + + const size = end_segment_offset - writer.file_offset; + const aligned_size = std.mem.alignForward(usize, size, 16 * 1024); + + writer.append(std.mem.asBytes(&LoadCommand.Segment64{ + .size = @sizeOf(LoadCommand.Segment64), + .name = segment_name, + .address = writer.address_offset, + .address_size = aligned_size, + .file_offset = writer.file_offset, + .file_size = aligned_size, + .maximum_virtual_memory_protections = descriptor.protection, + .initial_virtual_memory_protections = descriptor.protection, + .section_count = @intCast(descriptor.sections.len), + .flags = .{}, + })); + + writer.segment_offset = end_segment_offset; + + writer.address_offset += aligned_size; + writer.file_offset += aligned_size; + } else { + unreachable; + } + } + + fn writeLinkeditData(writer: *Writer, bytes: []const u8, load_command_type: LoadCommand.Type) void { + if (writer.linkedit_segment_size == 0) { + writer.linkedit_segment_address_offset = writer.address_offset; + writer.linkedit_segment_file_offset = writer.file_offset; + } + + const data_size: u32 = @intCast(bytes.len); + @memcpy(writer.items[writer.file_offset..][0..data_size], bytes); + + writer.append(std.mem.asBytes(&LoadCommand.LinkeditData{ + .type = load_command_type, + .data_offset = @intCast(writer.linkedit_segment_file_offset), + .data_size = data_size, + })); + + writer.address_offset += data_size; + writer.file_offset += data_size; + + writer.linkedit_segment_size += data_size; + } +}; + +pub fn interpretFile(allocator: Allocator, descriptor: Compilation.Module.Descriptor, file: []const u8) !void { + _ = allocator; + _ = descriptor; + const header: *const Header = @ptrCast(@alignCast(file.ptr)); + print("Header : {}", .{header}); + assert(header.magic == Header.magic); + + var text_segment: LoadCommand.Segment64 = undefined; + const load_command_start: *const LoadCommand = @ptrCast(@alignCast(file[@sizeOf(Header)..].ptr)); + var load_command_ptr = load_command_start; + + for (0..header.load_command_count) |_| { + const load_command = load_command_ptr.*; + switch (load_command.type) { + .segment64 => { + const segment_load_command: *const LoadCommand.Segment64 = @ptrCast(@alignCast(load_command_ptr)); + const text_segment_name = "__TEXT"; + if (equal(u8, segment_load_command.name[0..text_segment_name.len], text_segment_name)) { + text_segment = segment_load_command.*; + } + print("SLC: {}", .{segment_load_command}); + print("segment name: {s}", .{segment_load_command.name}); + const section_ptr: [*]const LoadCommand.Segment64.Section = @ptrFromInt(@intFromPtr(segment_load_command) + @sizeOf(LoadCommand.Segment64)); + const sections = section_ptr[0..segment_load_command.section_count]; + for (sections) |section| { + print("{}", .{section}); + print("Section name: {s}. Segment name: {s}", .{ section.name, section.segment_name }); + } + }, + .dyld_chained_fixups => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .dyld_exports_trie => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .symbol_table => { + const command: *const LoadCommand.SymbolTable = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .symbol_table_information => { + const command: *const LoadCommand.SymbolTableInformation = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .load_dylinker => { + const command: *const LoadCommand.Dylinker = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + const name: [*:0]const u8 = @ptrFromInt(@intFromPtr(command) + command.name_offset); + print("Name: {s}", .{name}); + }, + .uuid_number => { + const command: *const LoadCommand.Uuid = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .minimum_os_version => { + const command: *const LoadCommand.MinimumVersion = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .source_version => { + const command: *const LoadCommand.SourceVersion = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .dyld_main_entry_point => { + const command: *const LoadCommand.EntryPoint = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .load_dylib => { + const command: *const LoadCommand.Dylib = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + print("Dylib: {s}", .{@as([*:0]const u8, @ptrFromInt(@intFromPtr(load_command_ptr) + @sizeOf(LoadCommand.Dylib)))}); + }, + .function_starts => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .data_in_code => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + .code_signature => { + const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr)); + print("command: {}", .{command}); + }, + else => |t| @panic(@tagName(t)), + } + + load_command_ptr = @ptrFromInt(@intFromPtr(load_command_ptr) + load_command.size); + } + + // const load_command_end = load_command_ptr; + // const load_command_size = @intFromPtr(load_command_end) - @intFromPtr(load_command_start); + // assert(load_command_size == header.load_command_size); + + const segment_count = 3; + var writer = Writer{ + .items = try mmap(0x100000, .{}), + .load_command_size = segment_count * @sizeOf(LoadCommand.Segment64) + + 2 * @sizeOf(LoadCommand.Segment64.Section) + + @sizeOf(LoadCommand.LinkeditData) + + @sizeOf(LoadCommand.LinkeditData) + + @sizeOf(LoadCommand.SymbolTable) + + @sizeOf(LoadCommand.SymbolTableInformation) + + @sizeOf(LoadCommand.Dylinker) + std.mem.alignForward(u32, "/usr/lib/dyld".len, 8) + + @sizeOf(LoadCommand.Uuid) + + @sizeOf(LoadCommand.MinimumVersion) + + @sizeOf(LoadCommand.EntryPoint) + + @sizeOf(LoadCommand.Dylib) + std.mem.alignForward(u32, "/usr/lib/libSystem.B.dylib".len, 8) + + 3 * @sizeOf(LoadCommand.LinkeditData), + .segment_count = segment_count, + }; + writer.index = @sizeOf(Header); + writer.writeSegment(.{ + .name = "__PAGEZERO", + .sections = &.{}, + .protection = .{ + .read = false, + .write = false, + .execute = false, + }, + }); + writer.writeSegment(.{ + .name = "__TEXT", + .sections = &.{ + .{ + .name = "__text", + .bytes = &.{ + 0x00, 0x00, 0x80, 0x52, + 0xc0, 0x03, 0x5f, 0xd6, + }, + .alignment = 4, + .flags = .{ + .only_machine_instructions = true, + }, + }, + .{ + .name = "__unwind_info", + .bytes = &.{ + 0x01, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1c, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, + 0xb0, 0x3f, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0xb9, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x34, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x01, 0x00, + 0x10, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, + }, + .alignment = 4, + .flags = .{}, + }, + }, + .protection = .{ + .read = true, + .write = false, + .execute = true, + }, + }); + + // TODO: write this later + + // writer.writeSegment(.{ + // .name = "__LINKEDIT", + // .sections = &.{}, + // .protection = .{ + // .read = true, + // .write = false, + // .execute = false, + // }, + // }); + assert(writer.segment_index == writer.segment_count - 1); + writer.index = writer.segment_offset + @sizeOf(LoadCommand.Segment64); + + for (file[16384 + 56 ..][0..48]) |b| { + print("0x{x}, ", .{b}); + } + + const chained_fixup_bytes = &.{ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + writer.writeLinkeditData(chained_fixup_bytes, .dyld_chained_fixups); + const export_trie_bytes = &.{ 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + writer.writeLinkeditData(export_trie_bytes, .dyld_exports_trie); + unreachable; + // writer.writeSymbolTable( +} + +// .bytes = &.{ +// 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0xf, 0x1, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x16, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0xb0, 0x3f, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x20, 0x0, 0x5f, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfa, 0xde, 0xc, 0xc0, 0x0, 0x0, 0x1, 0x11, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14, 0xfa, 0xde, 0xc, 0x2, 0x0, 0x0, 0x0, 0xfd, 0x0, 0x2, 0x4, 0x0, 0x0, 0x2, 0x0, 0x2, 0x0, 0x0, 0x0, 0x5d, 0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x40, 0xb0, 0x20, 0x2, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0xb2, 0x2a, 0x3, 0x79, 0x1b, 0x82, 0xf4, 0x71, 0xf1, 0xae, 0xfa, 0x44, 0x53, 0xe0, 0xc2, 0x78, 0x1e, 0x56, 0xd1, 0x9b, 0x36, 0x37, 0x7b, 0x7e, 0x61, 0xf5, 0x8a, 0x59, 0xc4, 0xf0, 0x64, 0x56, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0x8, 0xdb, 0xee, 0xf5, 0x95, 0x71, 0x3e, 0xcb, 0x29, 0xff, 0x3f, 0x28, 0x46, 0xf0, 0xdc, 0x97, 0xbf, 0x2d, 0x3, 0xf2, 0xec, 0xc, 0x84, 0xa, 0x44, 0x90, 0xf, 0xe0, 0xf4, 0xea, 0x67, 0x97, 0x6b, 0xb0, 0x22, 0x2, 0x0, 0xa7, 0xed, 0x94, 0xb2, 0x3d, 0x86, 0x4d, 0x13, 0xd6, 0xa4, 0xe, 0x1c, 0x1a, 0x6b, 0x9b, 0x82, 0xa0, 0xeb, 0x28, 0x23, 0xfe, 0x8a, 0x51, 0x2a, 0xe5, 0xf9, 0x39, +// }, diff --git a/src/backend/pe.zig b/src/backend/pe.zig new file mode 100644 index 0000000..dc5b0d3 --- /dev/null +++ b/src/backend/pe.zig @@ -0,0 +1,266 @@ +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; +const Allocator = std.mem.Allocator; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const emit = @import("emit.zig"); +pub const Writer = struct { + in_file: []const u8, + items: []u8, + index: usize = 0, + allocator: Allocator, + pub fn init(allocator: Allocator) !Writer { + const file = try std.fs.cwd().readFileAlloc(allocator, "main.exe", 0xfffffffffffff); + const len = std.mem.alignForward(usize, file.len, 0x1000); + return Writer{ + .in_file = file, + .items = try data_structures.mmap(len, .{}), + .allocator = allocator, + }; + } + + pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void { + print("File len: {}", .{writer.in_file.len}); + const dos_header: *const ImageDosHeader = @ptrCast(@alignCast(writer.in_file.ptr)); + print("File address: {}", .{dos_header.file_address_of_new_exe_header}); + print("File: {s}", .{writer.in_file[0x40..]}); + for (writer.in_file[0x40..], 0..) |byte, index| { + if (byte == 'T') { + print("Index: {}", .{index}); + break; + } + } + assert(dos_header.magic_number == ImageDosHeader.magic); + // assert(dos_header.file_address_of_new_exe_header == @sizeOf(ImageDosHeader)); + print("{}", .{dos_header}); + const file_header: *const ImageFileHeader = @ptrCast(@alignCast(writer.in_file[dos_header.file_address_of_new_exe_header + 4 ..].ptr)); + print("File header: {}", .{file_header}); + + writer.append(std.mem.asBytes(&ImageDosHeader{ + .file_address_of_new_exe_header = 208, + })); + while (writer.index < 208) : (writer.index += 1) { + writer.append(&.{0}); + } + writer.append(std.mem.asBytes(&image_NT_signature)); + writer.append(std.mem.asBytes(&ImageFileHeader{ + .machine = switch (image.target.cpu.arch) { + .x86_64 => .amd64, + .aarch64 => .arm64, + else => @panic("Architecture"), + }, + .section_count = 3, + .time_date_stamp = @intCast(std.time.timestamp()), + })); + + const kernel32 = blk: { + var library = Library{ + .name = "KERNEL32.DLL", + }; + try library.symbols.append(writer.allocator, Symbol{ + .name = "ExitProcess", + }); + + break :blk library; + }; + + const libraries = &[_]Library{kernel32}; + _ = libraries; + + const code = &.{ + 0x48, 0x83, 0xec, 0x28, //subq $40, %rsp + 0xb9, 0x2a, 0x00, 0x00, 0x00, //movl $42, %ecx + 0xff, 0x15, 0xf1, 0x0f, 0x00, 0x00, //callq *4081(%rip) # 0x140002000 + 0xcc, + }; + _ = code; + + const pdata = &.{ + 0x00, 0x10, + 0x00, 0x00, + 0x10, 0x10, + 0x00, 0x00, + 0x28, 0x21, + 0x00, 0x00, + }; + _ = pdata; + + // TODO + // writer.append(std.mem.asBytes(ImageOptionalHeader{ + // .magic = ImageOptionalHeader.magic, + // .size_of_code = code.len, + // })); + + unreachable; + } + + fn append(writer: *Writer, bytes: []const u8) void { + const destination = writer.items[writer.index..][0..bytes.len]; + const source = bytes; + @memcpy(destination, source); + writer.index += bytes.len; + } + + pub fn writeToFile(writer: *Writer, executable_relative_path: []const u8) !void { + _ = writer; + _ = executable_relative_path; + unreachable; + } +}; + +const ImageDosHeader = extern struct { + magic_number: u16 = magic, + bytes_last_page_of_file: u16 = 0, + pages_in_file: u16 = 0, + relocations: u16 = 0, + size_of_header_in_paragraphs: u16 = 0, + minimum_extra_paragraphs: u16 = 0, + maximum_extra_paragraphs: u16 = 0, + initial_ss_value: u16 = 0, + initial_sp_value: u16 = 0, + cheksum: u16 = 0, + initial_ip_value: u16 = 0, + initial_cs_value: u16 = 0, + file_address_of_relocation_table: u16 = 0, + overlay_number: u16 = 0, + reserved_words: [4]u16 = .{0} ** 4, + oem_id: u16 = 0, + oem_info: u16 = 0, + reserved_words2: [10]u16 = .{0} ** 10, + file_address_of_new_exe_header: u32 = @sizeOf(ImageDosHeader), + + const magic = 0x5a4d; + + comptime { + assert(@sizeOf(ImageDosHeader) == 64); + } +}; +const image_NT_signature: u32 = 0x00004550; + +/// COFF header format +const ImageFileHeader = extern struct { + machine: ImageFileMachine, + section_count: u16, + time_date_stamp: u32, + symbol_table_offset: u32 = 0, + symbol_count: u32 = 0, + size_of_optional_header: u16 = @sizeOf(ImageOptionalHeader), + characteristics: Characteristics = .{}, + + const Characteristics = packed struct(u16) { + relocations_stripped: bool = false, + executable_image: bool = true, + stripped_line_count: bool = false, + stripped_local_symbols: bool = false, + aggressive_ws_trim: bool = false, + large_address_aware: bool = true, + reserved: u1 = 0, + bytes_reversed_lo: bool = false, + machine_32bit: bool = false, + stripped_debug: bool = false, + removable_run_from_swap: bool = false, + net_run_from_swap: bool = false, + system: bool = false, + dll: bool = false, + up_systems_only: bool = false, + bytes_reversed_hi: bool = false, + }; +}; + +const ImageFileMachine = enum(u16) { + unknown = 0, + target_host = 0x0001, // Useful for indicating we want to interact with the host and not a WoW guest. + i386 = 0x014c, // Intel 386. + r3000 = 0x0162, // MIPS little-endian, 0x160 big-endian + r4000 = 0x0166, // MIPS little-endian + r10000 = 0x0168, // MIPS little-endian + wcemipsv2 = 0x0169, // MIPS little-endian WCE v2 + alpha = 0x0184, // Alpha_AXP + sh3 = 0x01a2, // SH3 little-endian + sh3dsp = 0x01a3, + sh3e = 0x01a4, // SH3E little-endian + sh4 = 0x01a6, // SH4 little-endian + sh5 = 0x01a8, // SH5 + arm = 0x01c0, // ARM Little-Endian + thumb = 0x01c2, // ARM Thumb/Thumb-2 Little-Endian + armnt = 0x01c4, // ARM Thumb-2 Little-Endian + am33 = 0x01d3, + powerpc = 0x01F0, // IBM PowerPC Little-Endian + powerpcfp = 0x01f1, + ia64 = 0x0200, // Intel 64 + mips16 = 0x0266, // MIPS + alpha64 = 0x0284, // ALPHA64 + mipsfpu = 0x0366, // MIPS + mipsfpu16 = 0x0466, // MIPS + tricore = 0x0520, // Infineon + cef = 0x0CEF, + ebc = 0x0EBC, // EFI Byte Code + amd64 = 0x8664, // AMD64 (K8) + m32r = 0x9041, // M32R little-endian + arm64 = 0xAA64, // ARM64 Little-Endian + cee = 0xC0EE, + + const axp64 = ImageFileMachine.alpha64; +}; + +const ImageOptionalHeader = extern struct { + magic: u16 = magic, + major_linker_version: u8 = 0, + minor_linker_version: u8 = 0, + size_of_code: u32, + size_of_initialized_data: u32, + size_of_uninitialized_data: u32, + address_of_entry_point: u32, + base_of_code: u32, + image_base: u64, + section_alignment: u32, + file_alignment: u32, + major_os_version: u16, + minor_os_version: u16, + major_image_version: u16, + minor_image_version: u16, + major_subsystem_version: u16, + minor_subsystem_version: u16, + win32_version_value: u32, + size_of_image: u32, + size_of_headers: u32, + checksum: u32, + subsystem: u16, + dll_characteristics: u16, + size_of_stack_reserve: u64, + size_of_stack_commit: u64, + size_of_heap_reserve: u64, + size_of_heap_commit: u64, + loader_flags: u32, + number_of_RVA_and_sizes: u32, + data_directory: [image_number_of_directory_entries]ImageDataDirectory, + + const magic = 0x20b; + + comptime { + assert(@sizeOf(ImageOptionalHeader) == 0xf0); + } +}; + +const ImageDataDirectory = extern struct { + virtual_address: u32, + size: u32, +}; + +const image_number_of_directory_entries = 0x10; + +const Library = struct { + symbols: ArrayList(Symbol) = .{}, + name: []const u8, + name_virtual_address: u32 = 0, + virtual_address: u32 = 0, + image_thunk_virtual_address: u32 = 0, +}; + +const Symbol = struct { + name: []const u8, + name_virtual_address: u32 = 0, + offset_in_data: u32 = 0, +}; diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index f56900e..7bdda85 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -1,18 +1,1983 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const print = std.debug.print; +const panic = std.debug.panic; const emit = @import("emit.zig"); -const ir = @import("./intermediate_representation.zig"); +const ir = @import("intermediate_representation.zig"); const Compilation = @import("../Compilation.zig"); const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const AutoArrayHashMap = data_structures.AutoArrayHashMap; +const BlockList = data_structures.BlockList; + +const log = Compilation.log; +const logln = Compilation.logln; const x86_64 = @This(); +pub const Logger = enum { + register_allocation_new_instructions, + instruction_selection_block, + instruction_selection_ir_function, + instruction_selection_new_instruction, + instruction_selection_cache_flush, + instruction_selection_mir_function, + instruction_selection_register_operand_list, + register_allocation_block, + register_allocation_problematic_hint, + register_allocation_assignment, + register_allocation_reload, + register_allocation_function_before, + register_allocation_new_instruction, + register_allocation_new_instruction_function_before, + register_allocation_instruction_avoid_copy, + register_allocation_function_after, + register_allocation_operand_list_verification, + encoding, + + pub var bitset = std.EnumSet(Logger).initMany(&.{ + .instruction_selection_ir_function, + .instruction_selection_mir_function, + // .instruction_selection_register_operand_list, + // .register_allocation_block, + // .register_allocation_problematic_hint, + // .register_allocation_assignment, + // .register_allocation_reload, + // .register_allocation_function_before, + // .register_allocation_new_instruction, + // .register_allocation_new_instruction_function_before, + // .register_allocation_instruction_avoid_copy, + .register_allocation_function_after, + // .register_allocation_operand_list_verification, + .encoding, + }); +}; + +const Register = struct { + list: List = .{}, + index: Index, + + const invalid = Register{ + .index = .{ + .physical = .no_register, + }, + }; + + fn isValid(register: Register) bool { + return switch (register.index) { + .physical => |physical| physical != .no_register, + .virtual => true, + }; + } + + const Index = union(enum) { + physical: Register.Physical, + virtual: Register.Virtual.Index, + }; + + const State = union(enum) { + virtual: Virtual.Index, + free, + preassigned, + livein, + }; + const Class = enum { + not_a_register, + any, + // gp8, + // gp16, + gp32, + gp64, + gp64_nosp, + + pub const Descriptor = struct { + size: u16, + spill_size: u16, + spill_alignment: u16, + }; + }; + + const Physical = enum(u9) { + no_register = 0, + ah = 1, + al = 2, + ax = 3, + bh = 4, + bl = 5, + bp = 6, + bph = 7, + bpl = 8, + bx = 9, + ch = 10, + cl = 11, + cs = 12, + cx = 13, + df = 14, + dh = 15, + di = 16, + dih = 17, + dil = 18, + dl = 19, + ds = 20, + dx = 21, + eax = 22, + ebp = 23, + ebx = 24, + ecx = 25, + edi = 26, + edx = 27, + eflags = 28, + eip = 29, + eiz = 30, + es = 31, + esi = 32, + esp = 33, + fpcw = 34, + fpsw = 35, + fs = 36, + fs_base = 37, + gs = 38, + gs_base = 39, + hax = 40, + hbp = 41, + hbx = 42, + hcx = 43, + hdi = 44, + hdx = 45, + hip = 46, + hsi = 47, + hsp = 48, + ip = 49, + mxcsr = 50, + rax = 51, + rbp = 52, + rbx = 53, + rcx = 54, + rdi = 55, + rdx = 56, + rflags = 57, + rip = 58, + riz = 59, + rsi = 60, + rsp = 61, + si = 62, + sih = 63, + sil = 64, + sp = 65, + sph = 66, + spl = 67, + ss = 68, + ssp = 69, + tmmcfg = 70, + _eflags = 71, + cr0 = 72, + cr1 = 73, + cr2 = 74, + cr3 = 75, + cr4 = 76, + cr5 = 77, + cr6 = 78, + cr7 = 79, + cr8 = 80, + cr9 = 81, + cr10 = 82, + cr11 = 83, + cr12 = 84, + cr13 = 85, + cr14 = 86, + cr15 = 87, + dr0 = 88, + dr1 = 89, + dr2 = 90, + dr3 = 91, + dr4 = 92, + dr5 = 93, + dr6 = 94, + dr7 = 95, + dr8 = 96, + dr9 = 97, + dr10 = 98, + dr11 = 99, + dr12 = 100, + dr13 = 101, + dr14 = 102, + dr15 = 103, + fp0 = 104, + fp1 = 105, + fp2 = 106, + fp3 = 107, + fp4 = 108, + fp5 = 109, + fp6 = 110, + fp7 = 111, + k0 = 112, + k1 = 113, + k2 = 114, + k3 = 115, + k4 = 116, + k5 = 117, + k6 = 118, + k7 = 119, + mm0 = 120, + mm1 = 121, + mm2 = 122, + mm3 = 123, + mm4 = 124, + mm5 = 125, + mm6 = 126, + mm7 = 127, + r8 = 128, + r9 = 129, + r10 = 130, + r11 = 131, + r12 = 132, + r13 = 133, + r14 = 134, + r15 = 135, + st0 = 136, + st1 = 137, + st2 = 138, + st3 = 139, + st4 = 140, + st5 = 141, + st6 = 142, + st7 = 143, + tmm0 = 144, + tmm1 = 145, + tmm2 = 146, + tmm3 = 147, + tmm4 = 148, + tmm5 = 149, + tmm6 = 150, + tmm7 = 151, + xmm0 = 152, + xmm1 = 153, + xmm2 = 154, + xmm3 = 155, + xmm4 = 156, + xmm5 = 157, + xmm6 = 158, + xmm7 = 159, + xmm8 = 160, + xmm9 = 161, + xmm10 = 162, + xmm11 = 163, + xmm12 = 164, + xmm13 = 165, + xmm14 = 166, + xmm15 = 167, + xmm16 = 168, + xmm17 = 169, + xmm18 = 170, + xmm19 = 171, + xmm20 = 172, + xmm21 = 173, + xmm22 = 174, + xmm23 = 175, + xmm24 = 176, + xmm25 = 177, + xmm26 = 178, + xmm27 = 179, + xmm28 = 180, + xmm29 = 181, + xmm30 = 182, + xmm31 = 183, + ymm0 = 184, + ymm1 = 185, + ymm2 = 186, + ymm3 = 187, + ymm4 = 188, + ymm5 = 189, + ymm6 = 190, + ymm7 = 191, + ymm8 = 192, + ymm9 = 193, + ymm10 = 194, + ymm11 = 195, + ymm12 = 196, + ymm13 = 197, + ymm14 = 198, + ymm15 = 199, + ymm16 = 200, + ymm17 = 201, + ymm18 = 202, + ymm19 = 203, + ymm20 = 204, + ymm21 = 205, + ymm22 = 206, + ymm23 = 207, + ymm24 = 208, + ymm25 = 209, + ymm26 = 210, + ymm27 = 211, + ymm28 = 212, + ymm29 = 213, + ymm30 = 214, + ymm31 = 215, + zmm0 = 216, + zmm1 = 217, + zmm2 = 218, + zmm3 = 219, + zmm4 = 220, + zmm5 = 221, + zmm6 = 222, + zmm7 = 223, + zmm8 = 224, + zmm9 = 225, + zmm10 = 226, + zmm11 = 227, + zmm12 = 228, + zmm13 = 229, + zmm14 = 230, + zmm15 = 231, + zmm16 = 232, + zmm17 = 233, + zmm18 = 234, + zmm19 = 235, + zmm20 = 236, + zmm21 = 237, + zmm22 = 238, + zmm23 = 239, + zmm24 = 240, + zmm25 = 241, + zmm26 = 242, + zmm27 = 243, + zmm28 = 244, + zmm29 = 245, + zmm30 = 246, + zmm31 = 247, + r8b = 248, + r9b = 249, + r10b = 250, + r11b = 251, + r12b = 252, + r13b = 253, + r14b = 254, + r15b = 255, + r8bh = 256, + r9bh = 257, + r10bh = 258, + r11bh = 259, + r12bh = 260, + r13bh = 261, + r14bh = 262, + r15bh = 263, + r8d = 264, + r9d = 265, + r10d = 266, + r11d = 267, + r12d = 268, + r13d = 269, + r14d = 270, + r15d = 271, + r8w = 272, + r9w = 273, + r10w = 274, + r11w = 275, + r12w = 276, + r13w = 277, + r14w = 278, + r15w = 279, + r8wh = 280, + r9wh = 281, + r10wh = 282, + r11wh = 283, + r12wh = 284, + r13wh = 285, + r14wh = 286, + r15wh = 287, + k0_k1 = 288, + k2_k3 = 289, + k4_k5 = 290, + k6_k7 = 291, + + const Descriptor = struct { + subregisters: []const Register.Physical = &.{}, + }; + }; + + const Virtual = struct { + register_class: Register.Class, + use_def_list_head: Operand.Index = Operand.Index.invalid, + + pub const List = BlockList(@This()); + pub const Index = Virtual.List.Index; + pub const Allocation = Virtual.List.Allocation; + }; + + const List = struct { + previous: Operand.Index = Operand.Index.invalid, + next: Operand.Index = Operand.Index.invalid, + }; +}; + +const register_descriptors = std.EnumArray(Register.Physical, Register.Physical.Descriptor).init(.{ + .no_register = .{}, + .ah = .{}, + .al = .{}, + .ax = .{}, + .bh = .{}, + .bl = .{}, + .bp = .{}, + .bph = .{}, + .bpl = .{}, + .bx = .{}, + .ch = .{}, + .cl = .{}, + .cs = .{}, + .cx = .{}, + .df = .{}, + .dh = .{}, + .di = .{}, + .dih = .{}, + .dil = .{}, + .dl = .{}, + .ds = .{}, + .dx = .{}, + .eax = .{}, + .ebp = .{}, + .ebx = .{}, + .ecx = .{}, + .edi = .{}, + .edx = .{}, + .eflags = .{}, + .eip = .{ + .subregisters = &.{ .ip, .hip }, + }, + .eiz = .{}, + .es = .{}, + .esi = .{}, + .esp = .{}, + .fpcw = .{}, + .fpsw = .{}, + .fs = .{}, + .fs_base = .{}, + .gs = .{}, + .gs_base = .{}, + .hax = .{}, + .hbp = .{}, + .hbx = .{}, + .hcx = .{}, + .hdi = .{}, + .hdx = .{}, + .hip = .{}, + .hsi = .{}, + .hsp = .{}, + .ip = .{}, + .mxcsr = .{}, + .rax = .{}, + .rbp = .{}, + .rbx = .{}, + .rcx = .{}, + .rdi = .{}, + .rdx = .{}, + .rflags = .{}, + .rip = .{ + .subregisters = &.{.eip}, + }, + .riz = .{}, + .rsi = .{}, + .rsp = .{}, + .si = .{}, + .sih = .{}, + .sil = .{}, + .sp = .{}, + .sph = .{}, + .spl = .{}, + .ss = .{}, + .ssp = .{}, + .tmmcfg = .{}, + ._eflags = .{}, + .cr0 = .{}, + .cr1 = .{}, + .cr2 = .{}, + .cr3 = .{}, + .cr4 = .{}, + .cr5 = .{}, + .cr6 = .{}, + .cr7 = .{}, + .cr8 = .{}, + .cr9 = .{}, + .cr10 = .{}, + .cr11 = .{}, + .cr12 = .{}, + .cr13 = .{}, + .cr14 = .{}, + .cr15 = .{}, + .dr0 = .{}, + .dr1 = .{}, + .dr2 = .{}, + .dr3 = .{}, + .dr4 = .{}, + .dr5 = .{}, + .dr6 = .{}, + .dr7 = .{}, + .dr8 = .{}, + .dr9 = .{}, + .dr10 = .{}, + .dr11 = .{}, + .dr12 = .{}, + .dr13 = .{}, + .dr14 = .{}, + .dr15 = .{}, + .fp0 = .{}, + .fp1 = .{}, + .fp2 = .{}, + .fp3 = .{}, + .fp4 = .{}, + .fp5 = .{}, + .fp6 = .{}, + .fp7 = .{}, + .k0 = .{}, + .k1 = .{}, + .k2 = .{}, + .k3 = .{}, + .k4 = .{}, + .k5 = .{}, + .k6 = .{}, + .k7 = .{}, + .mm0 = .{}, + .mm1 = .{}, + .mm2 = .{}, + .mm3 = .{}, + .mm4 = .{}, + .mm5 = .{}, + .mm6 = .{}, + .mm7 = .{}, + .r8 = .{}, + .r9 = .{}, + .r10 = .{}, + .r11 = .{}, + .r12 = .{}, + .r13 = .{}, + .r14 = .{}, + .r15 = .{}, + .st0 = .{}, + .st1 = .{}, + .st2 = .{}, + .st3 = .{}, + .st4 = .{}, + .st5 = .{}, + .st6 = .{}, + .st7 = .{}, + .tmm0 = .{}, + .tmm1 = .{}, + .tmm2 = .{}, + .tmm3 = .{}, + .tmm4 = .{}, + .tmm5 = .{}, + .tmm6 = .{}, + .tmm7 = .{}, + .xmm0 = .{}, + .xmm1 = .{}, + .xmm2 = .{}, + .xmm3 = .{}, + .xmm4 = .{}, + .xmm5 = .{}, + .xmm6 = .{}, + .xmm7 = .{}, + .xmm8 = .{}, + .xmm9 = .{}, + .xmm10 = .{}, + .xmm11 = .{}, + .xmm12 = .{}, + .xmm13 = .{}, + .xmm14 = .{}, + .xmm15 = .{}, + .xmm16 = .{}, + .xmm17 = .{}, + .xmm18 = .{}, + .xmm19 = .{}, + .xmm20 = .{}, + .xmm21 = .{}, + .xmm22 = .{}, + .xmm23 = .{}, + .xmm24 = .{}, + .xmm25 = .{}, + .xmm26 = .{}, + .xmm27 = .{}, + .xmm28 = .{}, + .xmm29 = .{}, + .xmm30 = .{}, + .xmm31 = .{}, + .ymm0 = .{}, + .ymm1 = .{}, + .ymm2 = .{}, + .ymm3 = .{}, + .ymm4 = .{}, + .ymm5 = .{}, + .ymm6 = .{}, + .ymm7 = .{}, + .ymm8 = .{}, + .ymm9 = .{}, + .ymm10 = .{}, + .ymm11 = .{}, + .ymm12 = .{}, + .ymm13 = .{}, + .ymm14 = .{}, + .ymm15 = .{}, + .ymm16 = .{}, + .ymm17 = .{}, + .ymm18 = .{}, + .ymm19 = .{}, + .ymm20 = .{}, + .ymm21 = .{}, + .ymm22 = .{}, + .ymm23 = .{}, + .ymm24 = .{}, + .ymm25 = .{}, + .ymm26 = .{}, + .ymm27 = .{}, + .ymm28 = .{}, + .ymm29 = .{}, + .ymm30 = .{}, + .ymm31 = .{}, + .zmm0 = .{}, + .zmm1 = .{}, + .zmm2 = .{}, + .zmm3 = .{}, + .zmm4 = .{}, + .zmm5 = .{}, + .zmm6 = .{}, + .zmm7 = .{}, + .zmm8 = .{}, + .zmm9 = .{}, + .zmm10 = .{}, + .zmm11 = .{}, + .zmm12 = .{}, + .zmm13 = .{}, + .zmm14 = .{}, + .zmm15 = .{}, + .zmm16 = .{}, + .zmm17 = .{}, + .zmm18 = .{}, + .zmm19 = .{}, + .zmm20 = .{}, + .zmm21 = .{}, + .zmm22 = .{}, + .zmm23 = .{}, + .zmm24 = .{}, + .zmm25 = .{}, + .zmm26 = .{}, + .zmm27 = .{}, + .zmm28 = .{}, + .zmm29 = .{}, + .zmm30 = .{}, + .zmm31 = .{}, + .r8b = .{}, + .r9b = .{}, + .r10b = .{}, + .r11b = .{}, + .r12b = .{}, + .r13b = .{}, + .r14b = .{}, + .r15b = .{}, + .r8bh = .{}, + .r9bh = .{}, + .r10bh = .{}, + .r11bh = .{}, + .r12bh = .{}, + .r13bh = .{}, + .r14bh = .{}, + .r15bh = .{}, + .r8d = .{}, + .r9d = .{}, + .r10d = .{}, + .r11d = .{}, + .r12d = .{}, + .r13d = .{}, + .r14d = .{}, + .r15d = .{}, + .r8w = .{}, + .r9w = .{}, + .r10w = .{}, + .r11w = .{}, + .r12w = .{}, + .r13w = .{}, + .r14w = .{}, + .r15w = .{}, + .r8wh = .{}, + .r9wh = .{}, + .r10wh = .{}, + .r11wh = .{}, + .r12wh = .{}, + .r13wh = .{}, + .r14wh = .{}, + .r15wh = .{}, + .k0_k1 = .{}, + .k2_k3 = .{}, + .k4_k5 = .{}, + .k6_k7 = .{}, +}); + +// const SubregisterIndex = struct { +// size: u16, +// offset: u16 = 0, +// }; +// +// const SubRegisterIndexType = enum { +// sub_8bit, +// sub_8bit_hi, +// sub_16bit_, +// sub_16bit_hi, +// sub_32bit, +// }; + +// const subregister_indices = std.EnumArray(SubRegisterIndexType, []const SubregisterIndex).init(.{ +// }); + +// const Sub8Bit = enum{ +// ax = 0, +// cx = 1, +// dx = 2, +// bx = 3, +// }; + +const GP32 = enum(u3) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, +}; + +const GP64 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +const GP64NOSP = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +const XMMRegister = u4; + +const CallingConvention = struct { + argument_registers: RegisterGroupMap, + syscall_registers: []const Register.Physical, + + const Id = Compilation.CallingConvention; +}; + +const RegisterGroupMap = std.EnumArray(Register.Class, []const Register.Physical); + +const zero_register_class_descriptor = Register.Class.Descriptor{ + .size = 0, + .spill_size = 0, + .spill_alignment = 0, +}; +const register_class_descriptors = std.EnumArray(Register.Class, Register.Class.Descriptor).init(.{ + .not_a_register = zero_register_class_descriptor, + .any = zero_register_class_descriptor, + .gp32 = .{ + .size = 32, + .spill_size = 32, + .spill_alignment = 32, + }, + .gp64 = .{ + .size = 64, + .spill_size = 64, + .spill_alignment = 64, + }, + .gp64_nosp = .{ + .size = 64, + .spill_size = 64, + .spill_alignment = 64, + }, +}); + +const registers_by_class = RegisterGroupMap.init(.{ + .not_a_register = &.{}, + .any = &.{}, + .gp32 = &.{ + .eax, + .ecx, + .edx, + .esi, + .edi, + .ebx, + .ebp, + .esp, + .r8d, + .r9d, + .r10d, + .r11d, + .r14d, + .r15d, + .r12d, + .r13d, + }, + .gp64 = &.{ + .rax, + .rcx, + .rdx, + .rsi, + .rdi, + .r8, + .r9, + .r10, + .r11, + .rbx, + .r14, + .r15, + .r12, + .r13, + .rbp, + .rsp, + }, + .gp64_nosp = &.{}, +}); + +// TODO: fix this +const system_v_gp32_argument_registers = [4]Register.Physical{ .edi, .esi, .edx, .ecx }; +const system_v_gp64_argument_registers = [6]Register.Physical{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; +const system_v_xmm_argument_registers = [8]Register.Physical{ .xmm0, .xmm1, .xmm2, .xmm3, .xmm4, .xmm5, .xmm6, .xmm7 }; +const system_v_syscall_registers = [7]Register.Physical{ .rax, .rdi, .rsi, .rdx, .r10, .r8, .r9 }; + +const system_v = CallingConvention{ + .argument_registers = RegisterGroupMap.init(.{ + .not_a_register = &.{}, + .any = &.{}, + .gp32 = &system_v_gp32_argument_registers, + .gp64 = &system_v_gp64_argument_registers, + .gp64_nosp = &.{}, + }), + .syscall_registers = &system_v_syscall_registers, +}; + +const calling_conventions = std.EnumArray(CallingConvention.Id, CallingConvention).init(.{ + .system_v = system_v, +}); + +const ValueType = struct { + size: u16, + element_count: u16, + element_type: u32, + data_type: DataType, + scalarness: Scalarness, + + const DataType = enum(u1) { + integer = 0, + float = 1, + }; + const Scalarness = enum(u1) { + scalar = 0, + vector = 1, + }; + + const Id = enum(u32) { + any = 0, + // other = 1, + // i1 = 2, + // i8 = 3, + // i16 = 4, + i32 = 5, + i64 = 6, + // i128 = 7, + }; +}; + +const value_types = std.EnumArray(ValueType.Id, ValueType).init(.{ + .any = .{ + .size = 0, + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.any), + .data_type = .integer, + .scalarness = .scalar, + }, + .i32 = .{ + .size = @sizeOf(u32), + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.i32), + .data_type = .integer, + .scalarness = .scalar, + }, + .i64 = .{ + .size = @sizeOf(u64), + .element_count = 1, + .element_type = @intFromEnum(ValueType.Id.i64), + .data_type = .integer, + .scalarness = .scalar, + }, +}); + +const register_classes = std.EnumArray(ValueType.Id, Register.Class).init(.{ + .any = .any, + .i32 = .gp32, + .i64 = .gp64, +}); + +const Memory = struct { + alignment: u64, + // low_level_type: LowLevelType, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; +}; + +const LowLevelType = packed struct(u64) { + u: packed union { + vector: Vector, + scalar: Scalar, + }, + scalar: bool, + pointer: bool, + + const Vector = packed struct(u62) { + foo: u62 = 0, + }; + + const Scalar = packed struct {}; +}; + +const AddressingMode = struct { + base: AddressingMode.Base, + scale: u32 = 1, + displacement: i32 = 0, + index_register: u32 = 0, + const Base = union(enum) { + register_base: u32, + frame_index: u32, + }; +}; + +const StackObject = struct { + size: u64, + alignment: u32, + spill_slot: bool, + ir: ir.Instruction.Index, +}; + +const InstructionSelection = struct { + local_value_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Register) = .{}, + value_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Register) = .{}, + block_map: data_structures.AutoHashMap(ir.BasicBlock.Index, BasicBlock.Index) = .{}, + liveins: data_structures.AutoArrayHashMap(Register.Physical, Register.Virtual.Index) = .{}, + memory_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, Memory.Index) = .{}, + stack_map: data_structures.AutoArrayHashMap(ir.Instruction.Index, u32) = .{}, + physical_register_use_or_definition_list: std.EnumArray(Register.Physical, Operand.Index) = std.EnumArray(Register.Physical, Operand.Index).initFill(Operand.Index.invalid), + current_block: BasicBlock.Index = BasicBlock.Index.invalid, + stack_objects: ArrayList(StackObject) = .{}, + function: *MIR.Function, + instruction_cache: ArrayList(Instruction.Index) = .{}, + + fn storeRegisterToStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, source_register: Register.Physical, kill: bool, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { + _ = virtual_register; + const stack_object = instruction_selection.stack_objects.items[frame_index]; + switch (@divExact(stack_object.size, 8)) { + @sizeOf(u64) => { + switch (register_class) { + .gp64 => { + const instruction_id = Instruction.Id.mov64mr; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + + const destination_operand_id = instruction_descriptor.operands[0].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .register = .{ + .index = .{ + .physical = source_register, + }, + }, + }, + .flags = .{ + .dead_or_kill = kill, + }, + }; + + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => std.debug.panic("Stack object size: {}", .{stack_object.size}), + } + } + + fn loadRegisterFromStackSlot(instruction_selection: *InstructionSelection, mir: *MIR, insert_before_instruction_index: usize, destination_register: Register.Physical, frame_index: u32, register_class: Register.Class, virtual_register: Register.Virtual.Index) !void { + _ = virtual_register; + const stack_object = instruction_selection.stack_objects.items[frame_index]; + switch (@divExact(stack_object.size, 8)) { + @sizeOf(u64) => { + switch (register_class) { + .gp64 => { + const instruction_id = Instruction.Id.mov64rm; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp64, + .u = .{ + .register = .{ + .index = .{ + .physical = destination_register, + }, + }, + }, + .flags = .{ .type = .def }, + }; + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}", .{insert_before_instruction_index}); + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + @sizeOf(u32) => switch (register_class) { + .gp32 => { + const instruction_id = Instruction.Id.mov32rm; + const instruction_descriptor = comptime instruction_descriptors.get(instruction_id); + const source_operand_id = instruction_descriptor.operands[1].id; + const addressing_mode = AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .memory = .{ .addressing_mode = addressing_mode }, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp32, + .u = .{ + .register = .{ + .index = .{ + .physical = destination_register, + }, + }, + }, + .flags = .{ .type = .def }, + }; + const instruction_index = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + logln(.codegen, .register_allocation_new_instructions, "Inserting instruction at index {}", .{insert_before_instruction_index}); + try mir.blocks.get(instruction_selection.current_block).instructions.insert(mir.allocator, insert_before_instruction_index, instruction_index); + }, + else => |t| @panic(@tagName(t)), + }, + else => panic("Stack object size: {} bits", .{stack_object.size}), + } + } + + // TODO: add value map on top of local value map? + fn lookupRegisterForValue(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !Register { + if (instruction_selection.value_map.get(ir_instruction_index)) |register| { + return register; + } + + const gop = try instruction_selection.local_value_map.getOrPutValue(mir.allocator, ir_instruction_index, Register.invalid); + return gop.value_ptr.*; + } + + fn getRegisterForValue(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !Register { + const register = try instruction_selection.lookupRegisterForValue(mir, ir_instruction_index); + if (register.isValid()) { + return register; + } + + const instruction = mir.ir.instructions.get(ir_instruction_index); + if (instruction.* != .stack or !instruction_selection.stack_map.contains(ir_instruction_index)) { + const ir_type = getIrType(mir.ir, ir_instruction_index); + const value_type = resolveType(ir_type); + const register_class = register_classes.get(value_type); + const new_register = try mir.createVirtualRegister(register_class); + try instruction_selection.value_map.putNoClobber(mir.allocator, ir_instruction_index, new_register); + return new_register; + } + + unreachable; + } + + // Moving an immediate to a register + fn materializeInteger(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) !void { + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const integer = mir.ir.instructions.get(ir_instruction_index).load_integer; + const value_type = resolveType(integer.type); + // const destination_register_class = register_classes.get(value_type); + // const instruction_id: Instruction.Id = + switch (integer.value.unsigned == 0) { + true => { + const instruction_id: Instruction.Id = switch (value_type) { + // .i8 => unreachable, + // .i16 => unreachable, + .i32 => .mov32r0, + // .i64 => b: { + // if (std.math.cast(u32, integer.value.unsigned)) |_| { + // break :b .mov32ri64; + // } else if (std.math.cast(i32, integer.value.signed)) |_| { + // unreachable; + // } else { + // unreachable; + // } + // }, + else => |t| @panic(@tagName(t)), + }; + const instruction_descriptor = instruction_descriptors.get(instruction_id); + const operand_id = instruction_descriptor.operands[0].id; + // const register_class = register_classes.get(operand_id); + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const xor = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, xor); + }, + false => { + const instruction_id: Instruction.Id = switch (value_type) { + .i32 => .mov32ri, + .i64 => b: { + if (std.math.cast(u32, integer.value.unsigned)) |_| { + break :b .mov32ri64; + } else if (std.math.cast(i32, integer.value.signed)) |_| { + unreachable; + } else { + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + }; + + const instruction_descriptor = instruction_descriptors.get(instruction_id); + const operand_id = instruction_descriptor.operands[0].id; + + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const source_operand = Operand{ + .id = .immediate, + .u = .{ + .immediate = integer.value.unsigned, + }, + .flags = .{}, + }; + + const instr = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, instr); + }, + } + } + + fn getAddressingModeFromIr(instruction_selection: *InstructionSelection, mir: *MIR, ir_instruction_index: ir.Instruction.Index) AddressingMode { + const instruction = mir.ir.instructions.get(ir_instruction_index); + switch (instruction.*) { + .stack => { + const frame_index: u32 = @intCast(instruction_selection.stack_map.getIndex(ir_instruction_index).?); + return AddressingMode{ + .base = .{ + .frame_index = frame_index, + }, + }; + }, + else => |t| @panic(@tagName(t)), + } + } + + fn updateValueMap(instruction_selection: *InstructionSelection, allocator: Allocator, ir_instruction_index: ir.Instruction.Index, register: Register, local: bool) !void { + if (local) { + try instruction_selection.local_value_map.putNoClobber(allocator, ir_instruction_index, register); + } else { + const gop = try instruction_selection.value_map.getOrPutValue(allocator, ir_instruction_index, Register.invalid); + if (!gop.value_ptr.isValid()) { + gop.value_ptr.* = register; + } else if (!std.meta.eql(gop.value_ptr.index, register.index)) { + unreachable; + } + } + } + + fn lowerArguments(instruction_selection: *InstructionSelection, mir: *MIR, ir_function: *ir.Function) !void { + const ir_function_declaration = mir.ir.function_declarations.get(ir_function.declaration); + const ir_arguments = ir_function_declaration.arguments.values(); + const calling_convention = calling_conventions.get(ir_function_declaration.calling_convention); + + try instruction_selection.local_value_map.ensureUnusedCapacity(mir.allocator, ir_arguments.len); + + var gp_count: u8 = 0; + + for (ir_arguments) |ir_argument_instruction_index| { + const ir_argument_instruction = mir.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = mir.ir.arguments.get(ir_argument_instruction.argument); + switch (ir_argument.type) { + .i8, .i16, .i32, .i64 => gp_count += 1, + .void, + .noreturn, + => unreachable, + } + } + + if (gp_count >= 8) { + @panic("Cannot lower arguments"); + } + + var gp_i: u8 = 0; + var fp_i: u8 = 0; + _ = fp_i; + + for (ir_arguments) |ir_argument_instruction_index| { + const ir_argument_instruction = mir.ir.instructions.get(ir_argument_instruction_index); + const ir_argument = mir.ir.arguments.get(ir_argument_instruction.argument); + const value_type = resolveType(ir_argument.type); + const register_class = register_classes.get(value_type); + const argument_registers = calling_convention.argument_registers.get(register_class); + const physical_register = argument_registers[gp_i]; + const operand_id: Operand.Id = switch (register_class) { + inline .gp32, + .gp64, + => |gp| blk: { + gp_i += 1; + break :blk switch (gp) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + }, + else => unreachable, + }; + + // const operand_register_class = register_class_operand_matcher.get(operand_reference.id); + + const virtual_register_index = try instruction_selection.createLiveIn(mir, physical_register, register_class); + const result_register = try mir.createVirtualRegister(register_class); + try mir.append(instruction_selection, .copy, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = result_register, + }, + .flags = .{ + .dead_or_kill = true, + .type = .def, + }, + }, + Operand{ + .id = operand_id, + .u = .{ + .register = .{ + .index = .{ + .virtual = virtual_register_index, + }, + }, + }, + .flags = .{}, + }, + }); + + mir.blocks.get(instruction_selection.current_block).current_stack_index += 1; + + try instruction_selection.updateValueMap(mir.allocator, ir_argument_instruction_index, result_register, true); + try instruction_selection.value_map.putNoClobber(mir.allocator, ir_argument_instruction_index, result_register); + } + } + + fn addLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, register: Register, register_class: Register.Class.Id) !void { + _ = mir; + _ = register_class; + _ = register; + _ = instruction_selection; + unreachable; + } + + fn addExistingLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, physical_register: Register.Physical.Index, virtual_register: Register) !void { + _ = mir; + _ = virtual_register; + _ = physical_register; + _ = instruction_selection; + unreachable; + } + + fn createLiveIn(instruction_selection: *InstructionSelection, mir: *MIR, physical_register: Register.Physical, register_class: Register.Class) !Register.Virtual.Index { + const virtual_register_index = try mir.createVirtualRegisterIndexed(register_class); + try instruction_selection.liveins.putNoClobber(mir.allocator, physical_register, virtual_register_index); + + return virtual_register_index; + } + + fn emitLiveInCopies(instruction_selection: *InstructionSelection, mir: *MIR, entry_block_index: BasicBlock.Index) !void { + const entry_block = mir.blocks.get(entry_block_index); + for (instruction_selection.liveins.keys(), instruction_selection.liveins.values()) |livein_physical_register, livein_virtual_register| { + const vr = mir.virtual_registers.get(livein_virtual_register); + const destination_operand = Operand{ + .id = switch (vr.register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => |t| @panic(@tagName(t)), + }, + .u = .{ + .register = .{ + .index = .{ + .virtual = livein_virtual_register, + }, + }, + }, + .flags = .{ + .type = .def, + }, + }; + const source_operand = Operand{ + .id = destination_operand.id, + .u = .{ + .register = .{ + .index = .{ + .physical = livein_physical_register, + }, + }, + }, + .flags = .{}, + }; + + const instruction_index = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try entry_block.instructions.insert(mir.allocator, 0, instruction_index); + + // TODO: addLiveIn MachineBasicBlock ? unreachable; + } + } +}; + +fn getRegisterClass(register: Register.Physical) Register.Class { + _ = register; +} + +const Instruction = struct { + id: Id, + operands: ArrayList(Operand.Index), + parent: BasicBlock.Index, + + const Id = enum { + call64pcrel32, + copy, + lea64r, + mov32r0, + mov32rm, + mov64rm, + mov32mr, + mov64mr, + mov32ri, + mov32ri64, + mov32rr, + movsx64rm32, + movsx64rr32, + ret, + syscall, + ud2, + xor32rr, + }; + + pub const Descriptor = struct { + operands: []const Operand.Reference = &.{}, + opcode: u16, + format: Format = .pseudo, + flags: Flags = .{}, + + const Flags = packed struct { + implicit_def: bool = false, + two_byte_prefix: bool = false, + }; + + const Format = enum { + pseudo, + no_operands, + add_reg, + mrm_dest_mem, + mrm_source_mem, + mrm_source_reg, + mrm_dest_reg, + }; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; + + pub const Iterator = struct { + pub const Arguments = packed struct { + use: bool, + def: bool, + element: Iterator.Element, + }; + pub const Element = enum(u1) { + operand = 0, + instruction = 1, + }; + + fn Get(comptime arguments: Arguments) type { + return struct { + index: Operand.Index, + mir: *MIR, + + const I = @This(); + + fn new(mir: *MIR, index: Operand.Index) I { + var it = I{ + .index = index, + .mir = mir, + }; + + if (!index.invalid) { + const operand = mir.operands.get(index); + if ((!arguments.use and operand.flags.type == .use) or (!arguments.def and operand.flags.type == .def)) { + it.advance(); + } + } + + return it; + } + + const ReturnValue = switch (arguments.element) { + .instruction => Instruction, + .operand => Operand, + }; + + fn next(it: *I) ?ReturnValue.Index { + const original_operand_index = it.index; + + switch (it.index.invalid) { + false => switch (arguments.element) { + .instruction => { + const original_operand = it.mir.operands.get(original_operand_index); + const instruction = original_operand.parent; + // const i_desc = it.mir.instructions.get(instruction); + // print("Instruction: {}", .{i_desc.id}); + while (true) { + it.advance(); + + if (it.index.invalid) { + break; + } + + const it_operand = it.mir.operands.get(it.index); + if (!it_operand.parent.eq(instruction)) { + break; + } + } + + return instruction; + }, + .operand => { + it.advance(); + return original_operand_index; + }, + }, + true => return null, + } + } + + fn nextPointer(it: *I) ?*ReturnValue { + if (it.next()) |next_index| { + const result = switch (arguments.element) { + .instruction => it.mir.instructions.get(next_index), + .operand => it.mir.operands.get(next_index), + }; + return result; + } else return null; + } + + fn advance(it: *I) void { + assert(!it.index.invalid); + it.advanceRaw(); + + switch (arguments.use) { + true => { + while (!it.index.invalid) { + const operand = it.mir.operands.get(it.index); + if (!arguments.def and operand.flags.type == .def) { + it.advanceRaw(); + } else { + break; + } + } + }, + false => { + if (!it.index.invalid) { + const operand = it.mir.operands.get(it.index); + if (operand.flags.type == .use) { + it.index = Operand.Index.invalid; + } else { + //TODO: assert that is not debug + } + } + }, + } + } + + fn advanceRaw(it: *I) void { + assert(!it.index.invalid); + const old_index = it.index; + const current_operand = it.mir.operands.get(old_index); + assert(current_operand.u == .register); + const next_index = current_operand.u.register.list.next; + it.index = next_index; + + if (it.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[ITERATOR] O{} -> NULL operand index", .{old_index.uniqueInteger()}); + } else { + const operand = it.mir.operands.get(it.index); + logln(.codegen, .register_allocation_problematic_hint, "[ITERATOR] O{} -> O{}: {}", .{ old_index.uniqueInteger(), it.index.uniqueInteger(), operand.flags }); + } + } + }; + } + }; +}; +pub const Operand = struct { + id: Operand.Id, + u: union(enum) { + register: Register, + memory: Operand.Memory, + immediate: Operand.Immediate, + pc_relative: PCRelative, + lea64mem: Lea64Mem, + }, + flags: Flags, + parent: Instruction.Index = Instruction.Index.invalid, + + pub const List = BlockList(@This()); + pub const Index = Operand.List.Index; + pub const Allocation = Operand.List.Allocation; + + fn readsRegister(operand: Operand) bool { + return !operand.flags.undef and !operand.flags.internal_read and (operand.flags.type == .use or operand.flags.subreg); + } + + fn isOnRegisterUseList(operand: *const Operand) bool { + assert(operand.u == .register); + return !operand.u.register.list.previous.invalid; + } + + const Id = enum { + unknown, + i32mem, + i64mem, + gp32, + gp64, + gp64_nosp, + immediate, + i64i32imm_brtarget, + lea64mem, + }; + pub const Type = enum(u1) { + use = 0, + def = 1, + }; + + const Flags = packed struct { + type: Type = .use, + dead_or_kill: bool = false, + undef: bool = false, + early_clobber: bool = false, + internal_read: bool = false, + subreg: bool = false, + renamable: bool = false, + implicit: bool = false, + + fn isDead(flags: Flags) bool { + return flags.dead_or_kill and flags.type == .def; + } + + fn isKill(flags: Flags) bool { + return flags.dead_or_kill and flags.type != .def; + } + }; + + // fn mapOperandIdToPayloadType(comptime id: Operand.Id) type { + // } + fn mapOperandIdToPayloadName(comptime id: Operand.Id) []const u8 { + return switch (id) { + .unknown => @compileError("unsupported"), + .i32mem, + .i64mem, + => "memory", + .gp32, + .gp64, + .gp64_nosp, + => "register", + .immediate => "immediate", + .i64i32imm_brtarget => "pc_relative", + + .lea64mem => "lea64mem", + }; + } + + fn operandUnionPayloadType(comptime id: Operand.Id) type { + const dumb_union = @field(@as(Operand, undefined), "u"); + return @TypeOf(@field(dumb_union, mapOperandIdToPayloadName(id))); + } + + const Reference = struct { + id: Operand.Id, + kind: Operand.Kind, + }; + + const Kind = enum { + src, + dst, + }; + + const Memory = struct { + addressing_mode: AddressingMode, + global_offset: i32 = 0, + }; + + const PCRelative = union(enum) { + function_declaration: MIR.Function.Index, + string_literal: ir.StringLiteral.Index, + imm32: i32, + imm8: i8, + + fn function(ir_function_decl_index: ir.Function.Declaration.Index) Operand { + return Operand{ + .i64i32imm_brtarget = PCRelative{ + .function_declaration = ir_function_decl_index, + }, + }; + } + }; + + const Lea64Mem = struct { + gp64: ?Register, // null means RIP, as this register is mandatory + scale: u8, + scale_reg: ?Register, + displacement: PCRelative, + + fn stringLiteral(ir_load_string_literal_index: ir.StringLiteral.Index) Operand { + return Operand{ + .id = .lea64mem, + .u = .{ + .lea64mem = .{ + .gp64 = null, // rip + .scale = 1, + .scale_reg = null, + .displacement = PCRelative{ + .string_literal = ir_load_string_literal_index, + }, + }, + }, + .flags = .{}, + }; + } + }; + + const Immediate = u64; +}; + +const register_class_operand_matcher = std.EnumArray(Operand.Id, Register.Class).init(.{ + .unknown = .any, + .i64i32imm_brtarget = .not_a_register, + .i32mem = .not_a_register, + .i64mem = .not_a_register, + .gp32 = .gp32, + .gp64 = .gp64, + .gp64_nosp = .gp64_nosp, + .immediate = .not_a_register, + .lea64mem = .not_a_register, +}); + +const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descriptor).init(.{ + .call64pcrel32 = .{ + .format = .no_operands, + .opcode = 0xe8, + .operands = &.{ + .{ + .id = .i64i32imm_brtarget, + .kind = .src, + }, + }, + }, + .copy = .{ + .format = .pseudo, + .opcode = 0, + .operands = &.{ + .{ + .id = .unknown, + .kind = .dst, + }, + .{ + .id = .unknown, + .kind = .src, + }, + }, + }, + .lea64r = .{ + .format = .mrm_source_mem, + .opcode = 0x8d, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .lea64mem, + .kind = .src, + }, + }, + }, + .mov32r0 = .{ + .format = .pseudo, + .opcode = 0, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + }, + }, + .mov32rm = .{ + .format = .mrm_source_mem, + .opcode = 0x8b, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + }, + .mov64rm = .{ + .format = .mrm_source_mem, + .opcode = 0x8b, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .i64mem, + .kind = .src, + }, + }, + }, + .mov32rr = .{ + .format = .mrm_dest_reg, + .opcode = 0x89, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, + .mov32mr = .{ + .format = .mrm_dest_mem, + .opcode = 0x89, + .operands = &.{ + .{ + .id = .i32mem, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, + .mov64mr = .{ + .format = .mrm_dest_mem, + .opcode = 0x89, + .operands = &.{ + .{ + .id = .i64mem, + .kind = .dst, + }, + .{ + .id = .gp64, + .kind = .src, + }, + }, + }, + .mov32ri = .{ + .format = .add_reg, + .opcode = 0xb8, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .immediate, + .kind = .src, + }, + }, + }, + .mov32ri64 = .{ + .format = .pseudo, + .opcode = 0, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .immediate, + .kind = .src, + }, + }, + }, + .movsx64rm32 = .{ + .format = .mrm_source_mem, + .opcode = 0x63, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + }, + .movsx64rr32 = .{ + .format = .mrm_source_reg, + .opcode = 0x63, + .operands = &.{ + .{ + .id = .gp64, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, + .ret = .{ + .format = .no_operands, + .opcode = 0xc3, + .operands = &.{ + .{ + .id = .unknown, + .kind = .src, + }, + }, + }, + .syscall = .{ + .format = .no_operands, + .opcode = 0x05, + .operands = &.{}, + .flags = .{ + .two_byte_prefix = true, + }, + }, + .ud2 = .{ + .format = .no_operands, + .opcode = 0x0b, + .operands = &.{}, + .flags = .{ + .two_byte_prefix = true, + }, + }, + .xor32rr = .{ + .format = .mrm_dest_reg, + .opcode = 0x31, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, +}); + const Size = enum(u2) { one = 0, two = 1, @@ -37,827 +2002,2153 @@ const Size = enum(u2) { .eight => u64, }; } + + fn fromType(t: ir.Type) Size { + return fromByteCount(@intCast(t.getSize())); + } }; -fn Relocation(comptime Target: type) type { - return struct { - target: Target, - instruction_byte_offset: u32, - instruction_length: u8, - source_address_writer_offset: u8, - size: Size, - }; -} - -const LocalRelocation = Relocation(ir.BasicBlock.Index); -const GlobalRelocation = Relocation(u32); - -fn RelocationIndex(comptime relocation_type: RelocationType) type { - return switch (relocation_type) { - .local => ir.BasicBlock.Index, - .global => u32, - }; -} -const RelocationType = enum { - local, - global, +const BasicBlock = struct { + instructions: ArrayList(Instruction.Index) = .{}, + current_stack_index: usize = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; }; pub const MIR = struct { - functions: ArrayList(Function) = .{}, allocator: Allocator, - const GPRegister = struct { - value: ?x86_64.GPRegister = null, - size: Size, - can_omit_if_present: bool = true, - }; - const Stack = struct { - offset: u64, - }; - const Function = struct { - instructions: ArrayList(MIR.Instruction) = .{}, - blocks: AutoArrayHashMap(ir.BasicBlock.Index, u32) = .{}, - instruction_byte_offset: u32 = 0, - }; - const Instruction = struct { - operands: [4]Operand, - ir: ir.Instruction.Index, - id: Id, - operand_count: u8 = 0, + ir: *ir.Result, + target: std.Target, + instructions: BlockList(Instruction) = .{}, + functions: BlockList(Function) = .{}, + blocks: BlockList(BasicBlock) = .{}, + operands: BlockList(Operand) = .{}, + instruction_selections: ArrayList(InstructionSelection) = .{}, + virtual_registers: BlockList(Register.Virtual) = .{}, + function_declaration_map: std.AutoHashMapUnmanaged(ir.Function.Declaration.Index, Function.Index) = .{}, + entry_point: u32 = 0, - pub fn getOperands(instruction: *MIR.Instruction) []Operand { - return instruction.operands[0..instruction.operand_count]; - } - - const Id = enum(u16) { - call, - jmp, - mov, - push, - ret, - sub, - syscall, - ud2, - xor, - }; - }; - const Operand = union(enum) { - gp_register: MIR.GPRegister, - fp_register, - memory, - displacement: struct { - source: ir.BasicBlock.Index, - destination: union(enum) { - block: ir.BasicBlock.Index, - function: ir.Function.Index, - }, - }, - immediate: Compilation.Integer, - stack: Stack, - }; - - const RegisterUse = union(enum) { - general, - ret, - param: x86_64.GPRegister, - syscall_param: x86_64.GPRegister, - }; - - fn addInstruction(mir: *MIR, function: *Function, instruction_id: Instruction.Id, ir_instruction: ir.Instruction.Index, operands: []const Operand) !void { - var out_operands: [4]Operand = undefined; - @memset(std.mem.asBytes(&out_operands), 0); - @memcpy(out_operands[0..operands.len], operands); - - const instruction = MIR.Instruction{ - .operands = out_operands, - .ir = ir_instruction, - .id = instruction_id, - .operand_count = @intCast(operands.len), - }; - print("Adding instruction {s}\n", .{@tagName(instruction_id)}); - try function.instructions.append(mir.allocator, instruction); - } - - fn emitMovRegImm(mir: *MIR, function: *Function, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse, register_size: Size) !void { - if (integer.type.bit_count <= @bitSizeOf(u64)) { - if (integer.value == 0) { - const operand = .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => .a, - .param => unreachable, - .syscall_param => |register| register, - }, - .size = register_size, - }, - }; - - try mir.addInstruction(function, .xor, instruction_index, &.{ - operand, - operand, - }); - } else if (integer.value <= std.math.maxInt(u32)) { - try mir.addInstruction(function, .mov, instruction_index, &.{ - .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => .a, - .param => unreachable, - .syscall_param => |register| register, - }, - .size = .four, - }, - }, - .{ - .immediate = .{ - .value = integer.value, - .type = .{ - .signedness = integer.type.signedness, - .bit_count = 32, - }, - }, - }, - }); - } else { - unreachable; - } - } else { - unreachable; - } - } - - fn emitMovRegStack(mir: *MIR, function: *Function, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { - if (stack_reference.size <= @sizeOf(u64)) { - switch (stack_reference.size) { - @sizeOf(u8) => unreachable, - @sizeOf(u16) => unreachable, - @sizeOf(u32) => { - try mir.addInstruction(function, .mov, instruction_index, &.{ - .{ - .gp_register = .{ - .value = switch (use) { - .general => null, - .ret => unreachable, - .param => unreachable, - .syscall_param => |syscall_register| syscall_register, - }, - .size = Size.fromByteCount(@intCast(stack_reference.size)), - }, - }, - .{ - .stack = .{ - .offset = stack_reference.offset, - }, - }, - }); - }, - @sizeOf(u64) => unreachable, - else => unreachable, - } - } else { - unreachable; - } - } - - pub fn generate(allocator: Allocator, intermediate: *ir.Result) !MIR { - var mir = MIR{ + pub fn selectInstructions(allocator: Allocator, intermediate: *ir.Result, target: std.Target) !*MIR { + logln(.codegen, .instruction_selection_block, "\n[INSTRUCTION SELECTION]\n", .{}); + const mir = try allocator.create(MIR); + mir.* = .{ .allocator = allocator, + .ir = intermediate, + .target = target, }; - try mir.functions.ensureTotalCapacity(allocator, intermediate.functions.len); - var ir_function_it = intermediate.functions.iterator(); - while (ir_function_it.nextPointer()) |ir_function| { - const function = mir.functions.addOneAssumeCapacity(); - function.* = .{}; + try mir.blocks.ensureCapacity(allocator, intermediate.blocks.len); + try mir.functions.ensureCapacity(allocator, intermediate.function_definitions.len); + try mir.instruction_selections.ensureUnusedCapacity(allocator, intermediate.function_definitions.len); + + var ir_function_definition_iterator = intermediate.function_definitions.iterator(); + try mir.function_declaration_map.ensureTotalCapacity(mir.allocator, @intCast(intermediate.function_definitions.len)); + + while (ir_function_definition_iterator.nextPointer()) |ir_function| { + const fn_name = mir.ir.getFunctionName(ir_function.declaration); + + const instruction_selection = mir.instruction_selections.addOneAssumeCapacity(); + const function_allocation = try mir.functions.addOne(mir.allocator); + const function = function_allocation.ptr; + mir.function_declaration_map.putAssumeCapacityNoClobber(ir_function.declaration, function_allocation.index); + function.* = .{ + .mir = mir, + .instruction_selection = instruction_selection, + .name = fn_name, + }; + instruction_selection.* = .{ + .function = function, + }; + } + + var function_iterator = mir.functions.iterator(); + ir_function_definition_iterator = intermediate.function_definitions.iterator(); + + var entry_point: ?u32 = null; + var ir_function_index = ir_function_definition_iterator.getCurrentIndex(); + while (ir_function_definition_iterator.nextPointer()) |ir_function| { + const function_index = function_iterator.getCurrentIndex(); + const function = function_iterator.nextPointer() orelse unreachable; + logln(.codegen, .instruction_selection_ir_function, "Selecting instructions for {}", .{ir_function}); + const instruction_selection = function.instruction_selection; + + if (ir_function_index.eq(intermediate.entry_point)) { + entry_point = function_index.uniqueInteger(); + } + + const ir_function_declaration = mir.ir.function_declarations.get(ir_function.declaration); + const calling_convention = calling_conventions.get(ir_function_declaration.calling_convention); + + try instruction_selection.block_map.ensureUnusedCapacity(allocator, @intCast(ir_function.blocks.items.len)); try function.blocks.ensureTotalCapacity(allocator, ir_function.blocks.items.len); - for (ir_function.blocks.items) |block_index| { - function.blocks.putAssumeCapacity(block_index, @intCast(function.instructions.items.len)); - const basic_block = intermediate.blocks.get(block_index); - if (ir_function.current_stack_offset > 0) { - // TODO: switch on ABI - try mir.addInstruction(function, .push, ir.Instruction.Index.invalid, &.{.{ .gp_register = .{ .value = .bp, .size = .eight } }}); + for (ir_function.blocks.items) |block| { + const block_allocation = try mir.blocks.append(allocator, .{}); + instruction_selection.block_map.putAssumeCapacity(block, block_allocation.index); + function.blocks.appendAssumeCapacity(block_allocation.index); + } - try mir.addInstruction(function, .mov, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .bp, .size = .eight } }, - .{ .gp_register = .{ .value = .sp, .size = .eight } }, - }); + for (mir.ir.blocks.get(ir_function.blocks.items[0]).instructions.items) |ir_instruction_index| { + const ir_instruction = mir.ir.instructions.get(ir_instruction_index); - try mir.addInstruction(function, .sub, ir.Instruction.Index.invalid, &.{ - .{ .gp_register = .{ .value = .sp, .size = .eight } }, - .{ - .immediate = Compilation.Integer{ - .value = ir_function.current_stack_offset, - .type = .{ - .bit_count = 8, - .signedness = .unsigned, - }, - }, - }, - }); + // TODO: take into account exceptions, dynamic allocas? + if (ir_instruction.* == .stack) { + const stack = mir.ir.stack_references.get(ir_instruction.stack); + const ir_type = getIrType(mir.ir, ir_instruction_index); + const value_type = resolveType(ir_type); + const type_info = value_types.get(value_type); + const total_size = type_info.size * stack.count; + const frame_index = try mir.createStackObject(instruction_selection, total_size, @intCast(stack.alignment), ir_instruction_index, false); + try instruction_selection.stack_map.putNoClobber(allocator, ir_instruction_index, frame_index); } - for (basic_block.instructions.items) |instruction_index| { - const instruction = intermediate.instructions.get(instruction_index); - switch (instruction.*) { - .jump => |jump_index| { - const jump = intermediate.jumps.get(jump_index); - try mir.addInstruction(function, .jmp, instruction_index, &.{ - .{ .displacement = .{ - .source = jump.source, - .destination = .{ .block = jump.destination }, - } }, + // TODO: handle stack references outside blocks + } + + instruction_selection.current_block = function.blocks.items[0]; + + try instruction_selection.lowerArguments(mir, ir_function); + + var block_i: usize = function.blocks.items.len; + + while (block_i > 0) { + block_i -= 1; + + const block_index = function.blocks.items[block_i]; + _ = block_index; + const ir_block_index = ir_function.blocks.items[block_i]; + const ir_block = mir.ir.blocks.get(ir_block_index); + + var instruction_i: usize = ir_block.instructions.items.len; + + var folded_load = false; + + while (instruction_i > 0) { + instruction_i -= 1; + + const ir_instruction_index = ir_block.instructions.items[instruction_i]; + const ir_instruction = mir.ir.instructions.get(ir_instruction_index); + + instruction_selection.local_value_map.clearRetainingCapacity(); + + logln(.codegen, .instruction_selection_new_instruction, "Instruction #{}", .{instruction_i}); + + switch (ir_instruction.*) { + .ret => |ir_ret_index| { + const ir_ret = mir.ir.returns.get(ir_ret_index); + const value_type = resolveType(getIrType(mir.ir, ir_ret.instruction)); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_ret.instruction); + + const register_class = register_classes.get(value_type); + + const physical_register = Register{ + .index = .{ + .physical = switch (register_class) { + .gp32 => .eax, + .gp64 => .rax, + else => unreachable, + }, + }, + }; + const operand_id: Operand.Id = switch (register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + + const copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = physical_register, + }, + .flags = .{ + .type = .def, + }, + }, + Operand{ + .id = operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }, }); - }, - .copy => |copy_value_index| { - const copy_value = intermediate.values.get(copy_value_index); - switch (copy_value.*) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .general, Size.fromBitCount(integer.type.bit_count)), - else => |t| @panic(@tagName(t)), - } - }, - .ret => |ret_value_index| { - const ret_value = intermediate.values.get(ret_value_index); - switch (ret_value.*) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .ret, Size.fromBitCount(integer.type.bit_count)), - else => |t| @panic(@tagName(t)), - } - if (ir_function.current_stack_offset > 0) { - unreachable; - } + try instruction_selection.instruction_cache.append(mir.allocator, copy); - try mir.addInstruction(function, .ret, instruction_index, &.{}); - }, - .call => |call_value_index| { - // TODO: args - const call = intermediate.calls.get(call_value_index); - try mir.addInstruction(function, .call, instruction_index, &.{ - .{ - .displacement = .{ - .source = block_index, - .destination = .{ - .function = call.function, - }, + const ret = try mir.buildInstruction(instruction_selection, .ret, &.{ + Operand{ + .id = operand_id, + .u = .{ + .register = physical_register, + }, + .flags = .{ + .implicit = true, }, }, }); + try instruction_selection.instruction_cache.append(mir.allocator, ret); }, - .store => |store_index| { - const store = intermediate.stores.get(store_index); - const source_value = intermediate.values.get(store.source); - const destination_value = intermediate.values.get(store.destination); - switch (destination_value.*) { - .stack_reference => |stack_reference_index| { - const stack_reference = intermediate.stack_references.get(stack_reference_index); - print("stack ref: {}\n", .{stack_reference}); - switch (source_value.*) { - .call => |call_index| try mir.emitStoreForFunctionCallResult(function, intermediate, instruction_index, stack_reference.*, call_index), + .load_integer => try instruction_selection.materializeInteger(mir, ir_instruction_index), + .@"unreachable" => try instruction_selection.instruction_cache.append(mir.allocator, try mir.buildInstruction(instruction_selection, .ud2, &.{})), + .syscall => |ir_syscall_index| { + const ir_syscall = mir.ir.syscalls.get(ir_syscall_index); + const syscall_register_list = calling_convention.syscall_registers[0..ir_syscall.arguments.items.len]; + + for (ir_syscall.arguments.items, syscall_register_list) |ir_argument_index, syscall_register| { + //print("index: {}", .{index}); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); + const destination_register = Register{ + .index = .{ + .physical = syscall_register, + }, + }; + + const source_operand = Operand{ + .id = .gp64, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = .gp64, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const argument_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, argument_copy); + } + + // TODO: handle syscall return value + const syscall = try mir.buildInstruction(instruction_selection, .syscall, &.{}); + try instruction_selection.instruction_cache.append(mir.allocator, syscall); + + const produce_syscall_return_value = switch (instruction_i == ir_block.instructions.items.len - 2) { + true => blk: { + const last_block_instruction = mir.ir.instructions.get(ir_block.instructions.items[ir_block.instructions.items.len - 1]); + break :blk switch (last_block_instruction.*) { + .@"unreachable" => false, else => |t| @panic(@tagName(t)), - } + }; }, - else => |t| @panic(@tagName(t)), + false => true, + }; + + if (produce_syscall_return_value) { + const physical_return_register = Register{ + .index = .{ + .physical = .rax, + }, + }; + const physical_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = physical_return_register, + }, + .flags = .{ .type = .def }, + }; + + const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const virtual_return_operand = Operand{ + .id = .gp64, + .u = .{ + .register = virtual_return_register, + }, + .flags = .{ .type = .def }, + }; + + const syscall_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + virtual_return_operand, + physical_return_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, syscall_result_copy); } }, - .syscall => |syscall_value_index| { - const syscall_value = intermediate.values.get(syscall_value_index); - const syscall = intermediate.syscalls.get(syscall_value.syscall); - for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { - const argument = intermediate.values.get(argument_index).*; - switch (argument) { - .integer => |integer| try mir.emitMovRegImm(function, integer, instruction_index, .{ .syscall_param = syscall_register }, Size.eight), - .stack_reference => |stack_reference_index| { - const stack_reference = intermediate.stack_references.get(stack_reference_index); - try mir.emitMovRegStack(function, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); + .sign_extend => |ir_cast_index| { + const ir_sign_extend = mir.ir.casts.get(ir_cast_index); + assert(!folded_load); + const ir_source_instruction = blk: { + var source = ir_sign_extend.value; + const source_instruction = mir.ir.instructions.get(source); + const result = switch (source_instruction.*) { + .load => b: { + const load = mir.ir.loads.get(source_instruction.load); + folded_load = true; + break :b load.instruction; }, else => |t| @panic(@tagName(t)), - } - } + }; + break :blk result; + }; - try mir.addInstruction(function, .syscall, instruction_index, &.{}); - }, - .@"unreachable" => try mir.addInstruction(function, .ud2, instruction_index, &.{}), - else => |t| @panic(@tagName(t)), - } - } - } - } + const destination_type = resolveType(ir_sign_extend.type); - return mir; - } + const source_type = resolveType(getIrType(mir.ir, ir_source_instruction)); - pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { - for (mir.functions.items) |*function| { - var register_allocator = try RegisterAllocator.init(allocator); - var instructions_to_delete = AutoArrayHashMap(u32, void){}; - for (function.instructions.items, 0..) |*instruction, instruction_index| { - print("#{} {s}\n", .{ instruction_index, @tagName(instruction.id) }); - var allocated_gp_register: ?x86_64.GPRegister = null; - for (instruction.getOperands()) |*operand| { - switch (operand.*) { - .displacement, .immediate, .stack => {}, - .gp_register => |gp_register| switch (instruction.ir.valid) { - true => operand.gp_register.value = blk: { - const value_index = getValueFromInstruction(intermediate, instruction.ir); + if (destination_type != source_type) { + const instruction_id: Instruction.Id = switch (source_type) { + .i32 => switch (destination_type) { + .i64 => switch (folded_load) { + true => .movsx64rm32, + false => .movsx64rr32, + }, + else => unreachable, + }, + else => |t| @panic(@tagName(t)), + }; - if (gp_register.value) |expected_register| { - if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { - switch (value_index.eq(allocated_value)) { - // TODO delete the instruction - true => if (allocated_gp_register == null) unreachable else { - assert(allocated_gp_register.? == expected_register); + const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); + assert(instruction_descriptor.operands.len == 2); + const destination_operand_index = 0; + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const destination_operand = mir.constrainOperandRegisterClass(instruction_descriptor, destination_register, destination_operand_index, .{ .type = .def }); + const source_operand_index = 1; + + const source_operand = switch (folded_load) { + true => blk: { + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_source_instruction); + const memory_id: Operand.Id = switch (source_type) { + .i32 => .i32mem, + .i64 => .i64mem, + else => |t| @panic(@tagName(t)), + }; + const operand = Operand{ + .id = memory_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, }, - // _ = try instructions_to_delete.getOrPut(allocator, @intCast(instruction_index)), //.append(allocator, @intCast(instruction_index)), - false => unreachable, - } - } else { - if (register_allocator.gp_registers.free.get(expected_register)) |_| { - try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); - allocated_gp_register = expected_register; - } else { - unreachable; - } - } + .flags = .{}, + }; + break :blk operand; + }, + false => blk: { + const source_register = try instruction_selection.getRegisterForValue(mir, ir_source_instruction); + break :blk mir.constrainOperandRegisterClass(instruction_descriptor, source_register, source_operand_index, .{}); + }, + }; - break :blk expected_register; - } else { - for (register_allocator.gp_registers.free.keys()) |register| { - try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); - break :blk register; - } else { - unreachable; - } - } - }, - false => {}, - }, - else => |t| @panic(@tagName(t)), - } - } - } - - if (instructions_to_delete.keys().len > 0) { - var next_instruction_to_delete_index: usize = 0; - print("Instructions to delete: ", .{}); - for (instructions_to_delete.keys()) |instruction| { - print("#{}, ", .{instruction}); - } - print("\n", .{}); - for (function.blocks.keys(), function.blocks.values()) |*block_index, *instruction_offset| { - _ = block_index; - while (instructions_to_delete.keys()[next_instruction_to_delete_index] <= instruction_offset.*) : (next_instruction_to_delete_index += 1) { - unreachable; - } - } - - var removed_instruction_count: usize = 0; - for (instructions_to_delete.keys()) |instruction_to_delete_index| { - _ = function.instructions.orderedRemove(instruction_to_delete_index - removed_instruction_count); - removed_instruction_count += 1; - } - - print("Instructions after deletion\n", .{}); - for (function.instructions.items, 0..) |instruction, index| { - print("#{} {s}\n", .{ index, @tagName(instruction.id) }); - } - print("\n", .{}); - } - } - } - - const RegisterAllocator = struct { - gp_registers: RegisterSet(x86_64.GPRegister) = .{}, - - fn init(allocator: Allocator) !RegisterAllocator { - var register_allocator = RegisterAllocator{}; - try register_allocator.gp_registers.free.ensureTotalCapacity(allocator, @typeInfo(x86_64.GPRegister).Enum.fields.len); - inline for (@typeInfo(x86_64.GPRegister).Enum.fields) |enum_field| { - register_allocator.gp_registers.free.putAssumeCapacity(@field(x86_64.GPRegister, enum_field.name), {}); - } - - return register_allocator; - } - }; - - fn RegisterSet(comptime RegisterEnum: type) type { - return struct { - used: AutoArrayHashMap(RegisterEnum, ir.Value.Index) = .{}, - free: AutoArrayHashMap(RegisterEnum, void) = .{}, - - fn allocate(register_set: *@This(), allocator: Allocator, register: RegisterEnum, intermediate: *ir.Result, instruction: MIR.Instruction, value_index: ir.Value.Index) !void { - switch (intermediate.instructions.get(instruction.ir).*) { - .store => {}, - else => { - switch (register_set.free.orderedRemove(register)) { - true => try register_set.used.put(allocator, register, value_index), - false => unreachable, - } - }, - } - } - }; - } - - fn getValueFromInstruction(intermediate: *ir.Result, instruction_index: ir.Instruction.Index) ir.Value.Index { - const instruction = intermediate.instructions.get(instruction_index); - const value_index: ir.Value.Index = switch (instruction.*) { - .copy, .ret, .syscall => |value_index| value_index, - .store => |store_index| blk: { - const store = intermediate.stores.get(store_index); - break :blk store.source; - }, - else => |t| @panic(@tagName(t)), - }; - - return value_index; - } - - fn emitStoreForFunctionCallResult(mir: *MIR, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { - _ = call_index; - _ = intermediate; - if (stack_reference.size <= @sizeOf(u64)) { - switch (stack_reference.size) { - @sizeOf(u8) => unreachable, - @sizeOf(u16) => unreachable, - @sizeOf(u32) => try mir.addInstruction(function, .mov, instruction, &.{ - .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a, .size = Size.fromByteCount(@intCast(stack_reference.size)) } }, - }), - @sizeOf(u64) => unreachable, - else => unreachable, - } - } else { - unreachable; - } - } - - pub fn encode(mir: *const MIR, intermediate: *const ir.Result) !emit.Result { - var local_relocations = ArrayList(LocalRelocation){}; - var global_relocations = ArrayList(GlobalRelocation){}; - var block_index: usize = 0; - - var image = try emit.Result.create(); - - for (mir.functions.items) |*function| { - local_relocations.clearRetainingCapacity(); - function.instruction_byte_offset = @intCast(image.sections.text.index); - for (function.instructions.items, 0..) |*instruction, instruction_index| { - if (block_index < function.blocks.values().len) { - if (instruction_index == function.blocks.values()[block_index]) { - function.blocks.values()[block_index] = @intCast(image.sections.text.index); - block_index += 1; - } - } - - const operands = instruction.getOperands(); - switch (operands.len) { - 0 => switch (instruction.id) { - .ret => image.appendCodeByte(0xc3), - .syscall => image.appendCode(&.{ 0x0f, 0x05 }), - .ud2 => image.appendCode(&.{ 0x0f, 0x0b }), - else => |t| @panic(@tagName(t)), - }, - 1 => switch (instruction.id) { - .call => { - const operand = operands[0]; - assert(operand == .displacement); - switch (operand.displacement.destination) { - .function => |ir_function_index| { - const function_index = ir_function_index.uniqueInteger(); - const current_function_index = @divExact(@intFromPtr(function) - @intFromPtr(mir.functions.items.ptr), @sizeOf(MIR.Function)); - - if (current_function_index < function_index) { - try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.global, .{ - .relocations = &global_relocations, - .target = function_index, - .opcode = 0xe8, - }); - } else { - try encodeRel32Instruction(&image, .{ - .target = mir.functions.items[function_index].instruction_byte_offset, - .opcode = 0xe8, - }); - } - }, - else => |t| @panic(@tagName(t)), - } - }, - .jmp => { - const operand = operands[0]; - assert(operand == .displacement); - if (operand.displacement.source.uniqueInteger() < operand.displacement.destination.block.uniqueInteger()) { - try mir.encodeRel32InstructionWithRelocation(&image, RelocationType.local, .{ - .relocations = &local_relocations, - .target = operand.displacement.destination.block, - .opcode = 0xe9, + const sign_extend = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, }); - } else if (operand.displacement.source.uniqueInteger() == operand.displacement.destination.block.uniqueInteger()) { - unreachable; + + try instruction_selection.instruction_cache.append(mir.allocator, sign_extend); + + try instruction_selection.updateValueMap(mir.allocator, ir_instruction_index, destination_register, false); } else { unreachable; } }, - .push => { - const operand = operands[0]; - switch (operand) { - .gp_register => |gp_register| { - assert(gp_register.size == .eight); - if (Rex.create(.{ .rm = gp_register.value.? })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); + .load => |ir_load_index| { + if (folded_load) { + folded_load = false; + continue; + } + + const ir_load = mir.ir.loads.get(ir_load_index); + const ir_source = ir_load.instruction; + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_source); + const value_type = resolveType(getIrType(mir.ir, ir_source)); + + switch (value_type) { + inline .i32, + .i64, + => |vt| { + const instruction_id: Instruction.Id = switch (vt) { + .i32 => .mov32rm, + .i64 => .mov64rm, + else => |t| @panic(@tagName(t)), + }; + const memory_id: Operand.Id = switch (vt) { + .i32 => .i32mem, + .i64 => .i64mem, + else => |t| @panic(@tagName(t)), + }; + + const instruction_descriptor = instruction_descriptors.getPtrConst(instruction_id); + + const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const destination_operand_index = 0; + const destination_operand_id = instruction_descriptor.operands[destination_operand_index].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ .type = .def }, + }; + + const source_operand = Operand{ + .id = memory_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, + }, + .flags = .{}, + }; + + const load = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + try instruction_selection.instruction_cache.append(mir.allocator, load); + + try instruction_selection.updateValueMap(mir.allocator, ir_instruction_index, destination_register, false); + }, + else => |t| @panic(@tagName(t)), + } + }, + .store => |ir_store_index| { + const ir_store = mir.ir.stores.get(ir_store_index); + const ir_source = ir_store.source; + + const ir_destination = ir_store.destination; + const addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_destination); + + const source_register = try instruction_selection.getRegisterForValue(mir, ir_source); + + const value_type = resolveType(getIrType(mir.ir, ir_source)); + + switch (value_type) { + inline .i32, .i64 => |vt| { + const instruction_id: Instruction.Id = switch (vt) { + // TODO, non-temporal SSE2 MOVNT + .i32 => .mov32mr, + .i64 => .mov64mr, + else => |t| @panic(@tagName(t)), + }; + + const instruction_descriptor = comptime instruction_descriptors.getPtrConst(instruction_id); + const source_operand_index = instruction_descriptor.operands.len - 1; + const source_operand_id = instruction_descriptor.operands[source_operand_index].id; + const source_operand = Operand{ + .id = source_operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + + const destination_operand_id = instruction_descriptor.operands[0].id; + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .memory = .{ + .addressing_mode = addressing_mode, + }, + }, + .flags = .{}, + }; + + const store = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, store); + }, + else => |t| @panic(@tagName(t)), + } + }, + .stack => { + assert(instruction_selection.stack_map.get(ir_instruction_index) != null); + }, + .call => |ir_call_index| { + const ir_call = mir.ir.calls.get(ir_call_index); + for (ir_call.arguments, 0..) |ir_argument_index, index| { + // print("index: {}", .{index}); + const source_register = try instruction_selection.getRegisterForValue(mir, ir_argument_index); + const source_value_type = resolveType(getIrType(mir.ir, ir_argument_index)); + const source_register_class = register_classes.get(source_value_type); + const argument_register = calling_convention.argument_registers.get(source_register_class)[index]; + // print("Argument register: {}", .{argument_register}); + + const destination_register = Register{ + .index = .{ + .physical = argument_register, + }, + }; + + const operand_id: Operand.Id = switch (source_register_class) { + .gp32 => .gp32, + .gp64 => .gp64, + else => unreachable, + }; + const source_operand = Operand{ + .id = operand_id, + .u = .{ + .register = source_register, + }, + .flags = .{}, + }; + const destination_operand = Operand{ + .id = operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{}, + }; + + const copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + destination_operand, + source_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, copy); + } + + const call = try mir.buildInstruction(instruction_selection, .call64pcrel32, &.{ + Operand{ + .id = .i64i32imm_brtarget, + .u = .{ + .pc_relative = .{ + .function_declaration = mir.function_declaration_map.get(ir_call.function).?, + }, + }, + .flags = .{}, + }, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, call); + + const ir_return_type = mir.ir.function_declarations.get(ir_call.function).return_type; + switch (ir_return_type) { + .void, + .noreturn, + => {}, + else => { + const return_type = resolveType(ir_return_type); + switch (return_type) { + inline .i64, .i32 => |rt| { + const register_operand_id = switch (rt) { + .i32 => .gp32, + .i64 => .gp64, + else => unreachable, + }; + const physical_return_register = Register{ + .index = .{ + .physical = switch (rt) { + .i32 => .eax, + .i64 => .rax, + else => unreachable, + }, + }, + }; + + const physical_return_operand = Operand{ + .id = register_operand_id, + .u = .{ + .register = physical_return_register, + }, + .flags = .{}, + }; + + const virtual_return_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); + const virtual_return_operand = Operand{ + .id = register_operand_id, + .u = .{ + .register = virtual_return_register, + }, + .flags = .{ .type = .def }, + }; + + const call_result_copy = try mir.buildInstruction(instruction_selection, .copy, &.{ + virtual_return_operand, + physical_return_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, call_result_copy); + }, + else => |t| @panic(@tagName(t)), } - const opcode = @as(u8, 0x50) | @as(u3, @truncate(@intFromEnum(gp_register.value.?))); - image.appendCodeByte(opcode); + }, + } + }, + else => |t| @panic(@tagName(t)), + } + + var i: usize = instruction_selection.instruction_cache.items.len; + const block = mir.blocks.get(instruction_selection.current_block); + + while (i > 0) { + i -= 1; + + const instruction_index = instruction_selection.instruction_cache.items[i]; + const instruction = mir.instructions.get(instruction_index); + logln(.codegen, .instruction_selection_cache_flush, "Inserting instruction #{} ({s}) into index {} (instruction count: {})", .{ instruction_index.uniqueInteger(), @tagName(instruction.id), block.current_stack_index, block.instructions.items.len }); + try block.instructions.insert(mir.allocator, block.current_stack_index, instruction_index); + } + + instruction_selection.instruction_cache.clearRetainingCapacity(); + } + } + + try instruction_selection.emitLiveInCopies(mir, function.blocks.items[0]); + + logln(.codegen, .instruction_selection_ir_function, "Selected instructions for {}", .{function}); + + ir_function_index = ir_function_definition_iterator.getCurrentIndex(); + } + + mir.entry_point = entry_point orelse unreachable; + + return mir; + } + + fn getNextInstructionIndex(mir: *MIR, instruction_index: Instruction.Index) usize { + const instruction = mir.instructions.get(instruction_index); + const parent_block = mir.blocks.get(instruction.parent); + const next = for (parent_block.instructions.items, 0..) |index, i| { + if (index.eq(instruction_index)) break i + 1; + } else unreachable; + return next; + } + + fn setPhysicalRegister(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index, register: Register.Physical) bool { + const operand = mir.operands.get(operand_index); + if (!operand.flags.subreg) { + mir.setRegisterInOperand(instruction_selection, operand_index, .{ + .physical = register, + }); + operand.flags.renamable = true; + return false; + } + + unreachable; + } + + fn setRegisterInOperand(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index, register: Register.Index) void { + const operand = mir.operands.get(operand_index); + assert(operand.u == .register); + assert(!std.meta.eql(operand.u.register.index, register)); + operand.flags.renamable = false; + mir.removeRegisterOperandFromUseList(instruction_selection, operand); + operand.u.register.index = register; + mir.addRegisterOperandFromUseList(instruction_selection, operand_index); + } + + fn addRegisterOperandFromUseList(mir: *MIR, instruction_selection: *InstructionSelection, operand_index: Operand.Index) void { + const operand = mir.operands.get(operand_index); + assert(!operand.isOnRegisterUseList()); + const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); + const head_index = head_index_ptr.*; + + logln(.codegen, .instruction_selection_register_operand_list, "Adding register list metadata to operand #{}", .{operand_index.uniqueInteger()}); + + switch (head_index.invalid) { + false => { + const head_operand = mir.operands.get(head_index); + assert(std.meta.eql(head_operand.u.register.index, operand.u.register.index)); + + const last_operand_index = head_operand.u.register.list.previous; + const last_operand = mir.operands.get(last_operand_index); + assert(std.meta.eql(last_operand.u.register.index, operand.u.register.index)); + + head_operand.u.register.list.previous = operand_index; + operand.u.register.list.previous = last_operand_index; + + switch (operand.flags.type) { + .def => { + operand.u.register.list.next = head_index; + head_index_ptr.* = operand_index; + }, + .use => { + operand.u.register.list.next = Operand.Index.invalid; + last_operand.u.register.list.next = operand_index; + }, + } + }, + true => { + logln(.codegen, .instruction_selection_register_operand_list, "List is empty, adding it to the top of the list", .{}); + + operand.u.register.list.previous = operand_index; + operand.u.register.list.next = Operand.Index.invalid; + head_index_ptr.* = operand_index; + }, + } + } + + fn removeRegisterOperandFromUseList(mir: *MIR, instruction_selection: *InstructionSelection, operand: *Operand) void { + assert(operand.isOnRegisterUseList()); + const head_index_ptr = mir.getRegisterListHead(instruction_selection, operand.u.register); + const head_index = head_index_ptr.*; + assert(!head_index.invalid); + + const operand_previous = operand.u.register.list.previous; + const operand_next = operand.u.register.list.next; + + const head = mir.operands.get(head_index); + if (operand == head) { + head_index_ptr.* = operand_next; + } else { + const previous = mir.operands.get(operand_previous); + previous.u.register.list.next = operand_next; + } + + const next = switch (operand_next.invalid) { + false => mir.operands.get(operand_next), + true => head, + }; + + next.u.register.list.previous = operand_previous; + + operand.u.register.list.previous = Operand.Index.invalid; + operand.u.register.list.next = Operand.Index.invalid; + } + + fn constrainRegisterClass(mir: *MIR, register: Register, old_register_class: Register.Class) ?Register.Class { + const new_register_class = switch (register.index) { + .virtual => |virtual_register_index| mir.virtual_registers.get(virtual_register_index).register_class, + else => unreachable, + }; + + // print("Old: {}. New: {}", .{ old_register_class, new_register_class }); + switch (old_register_class == new_register_class) { + true => return new_register_class, + false => unreachable, + } + unreachable; + } + + fn constrainOperandRegisterClass(mir: *MIR, instruction_descriptor: *const Instruction.Descriptor, register: Register, operand_index: usize, flags: Operand.Flags) Operand { + assert(register.index == .virtual); + const operand_reference = instruction_descriptor.operands[operand_index]; + const operand_register_class = register_class_operand_matcher.get(operand_reference.id); + // print("Constraint operand #{} with {} (out of {})", .{ operand_index, operand_register_class, operand_reference.id }); + + // const register_class = op + if (mir.constrainRegisterClass(register, operand_register_class) == null) { + unreachable; + } + + return Operand{ + .id = operand_reference.id, + .u = .{ + .register = register, + }, + .flags = flags, + }; + } + + fn createVirtualRegister(mir: *MIR, register_class: Register.Class) !Register { + const virtual_register_index = try mir.createVirtualRegisterIndexed(register_class); + return Register{ + .index = .{ + .virtual = virtual_register_index, + }, + }; + } + + fn createVirtualRegisterIndexed(mir: *MIR, register_class: Register.Class) !Register.Virtual.Index { + const allocation = try mir.virtual_registers.append(mir.allocator, .{ + .register_class = register_class, + }); + return allocation.index; + } + + const RegisterBitset = std.EnumSet(Register.Physical); + + const RegisterAllocator = struct { + reserved: RegisterBitset = RegisterBitset.initEmpty(), + register_states: std.EnumArray(Register.Physical, Register.State) = std.EnumArray(Register.Physical, Register.State).initFill(.free), + used_in_instruction: RegisterBitset = RegisterBitset.initEmpty(), + may_live_across_blocks: std.DynamicBitSetUnmanaged, + live_virtual_registers: std.AutoArrayHashMapUnmanaged(Register.Virtual.Index, LiveRegister) = .{}, + stack_slots: std.AutoHashMapUnmanaged(Register.Virtual.Index, u32) = .{}, + coalesced: ArrayList(Instruction.Index) = .{}, + + fn init(mir: *MIR, instruction_selection: *InstructionSelection) !RegisterAllocator { + var result = RegisterAllocator{ + .may_live_across_blocks = try std.DynamicBitSetUnmanaged.initEmpty(mir.allocator, mir.virtual_registers.len), + }; + + result.reserved.setPresent(.fpcw, true); + result.reserved.setPresent(.fpsw, true); + result.reserved.setPresent(.mxcsr, true); + + for ((try getSubregisters(mir.allocator, .rsp)).keys()) |rsp_subreg| { + result.reserved.setPresent(rsp_subreg, true); + } + + result.reserved.setPresent(.ssp, true); + + for ((try getSubregisters(mir.allocator, .rip)).keys()) |rip_subreg| { + result.reserved.setPresent(rip_subreg, true); + } + + // TODO: complete + const has_frame_pointer = instruction_selection.stack_map.entries.len > 0; + if (has_frame_pointer) { + for ((try getSubregisters(mir.allocator, .rbp)).keys()) |rbp_subreg| { + result.reserved.setPresent(rbp_subreg, true); + } + } + + // TODO: complete + const has_base_pointer = false; + if (has_base_pointer) { + // TODO + } + + result.reserved.setPresent(.cs, true); + result.reserved.setPresent(.ss, true); + result.reserved.setPresent(.ds, true); + result.reserved.setPresent(.es, true); + result.reserved.setPresent(.fs, true); + result.reserved.setPresent(.gs, true); + + inline for ([8]Register.Physical{ .st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7 }) |st_reg| { + result.reserved.setPresent(st_reg, true); + } + + const has_avx512 = false; + if (!has_avx512) { + // TODO xmm alias + } + + // TODO: callee saved registers (CSR) + + // TODO: more setup + + return result; + } + + fn useVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, virtual_register: Register.Virtual.Index, instruction_operand_index: u8) !bool { + const instruction = mir.instructions.get(instruction_index); + const operand_index = instruction.operands.items[instruction_operand_index]; + const operand = mir.operands.get(operand_index); + const gop = try register_allocator.live_virtual_registers.getOrPut(mir.allocator, virtual_register); + const live_register = gop.value_ptr; + switch (gop.found_existing) { + true => { + // TODO: asserts + const assert_result = !operand.flags.isKill() or live_register.last_use.eq(instruction_index); + if (assert_result) { + // logln("Existing live register at instruction #{}: {}", .{ instruction_index.uniqueInteger(), live_register }); + // logln("Function until now: {}", .{instruction_selection.function}); + assert(assert_result); + } + }, + false => { + if (!operand.flags.isKill()) { + // TODO some logic + // unreachable; + if (register_allocator.mayLiveOut(mir, instruction_selection, virtual_register)) { + unreachable; + } else { + operand.flags.dead_or_kill = true; + } + } + + live_register.* = .{ + .virtual = virtual_register, + }; + }, + } + + if (live_register.physical == .no_register) { + const hint: ?Register = blk: { + if (instruction.id == .copy) { + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(source_operand.u == .register); + if (!source_operand.flags.subreg) { + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const hint_register = destination_operand.u.register; + assert(hint_register.index == .physical); + break :blk hint_register; + } + } + break :blk null; + }; + // TODO: handle allocation error here + register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction_index, live_register, hint, false) catch unreachable; + } + + live_register.last_use = instruction_index; + + register_allocator.markUsedRegisterInInstruction(live_register.physical); + return mir.setPhysicalRegister(instruction_selection, operand_index, live_register.physical); + } + + fn isRegisterInClass(register: Register.Physical, register_class: Register.Class) bool { + const result = std.mem.indexOfScalar(Register.Physical, registers_by_class.get(register_class), register) != null; + return result; + } + + fn allocateVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, live_register: *LiveRegister, maybe_hint: ?Register, look_at_physical_register_uses: bool) !void { + assert(live_register.physical == .no_register); + const virtual_register = live_register.virtual; + const register_class = mir.virtual_registers.get(live_register.virtual).register_class; + + if (maybe_hint) |hint_register| { + if (hint_register.index == .physical + // TODO : and isAllocatable + and isRegisterInClass(hint_register.index.physical, register_class) and !register_allocator.isRegisterUsedInInstruction(hint_register.index.physical, look_at_physical_register_uses)) { + if (register_allocator.register_states.get(hint_register.index.physical) == .free) { + register_allocator.assignVirtualToPhysicalRegister(live_register, hint_register.index.physical); + return; + } + } + } + + logln(.codegen, .register_allocation_problematic_hint, "Tracing copies for VR{} in instruction #{}", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); + + const maybe_hint2 = register_allocator.traceCopies(mir, instruction_selection, virtual_register); + if (maybe_hint2) |hint| { + // TODO + const allocatable = true; + logln(.codegen, .register_allocation_problematic_hint, "Hint: {}. Register class: {s}", .{ hint, @tagName(register_class) }); + + if (hint == .physical and allocatable and isRegisterInClass(hint.physical, register_class) and !register_allocator.isRegisterUsedInInstruction(hint.physical, look_at_physical_register_uses)) { + const physical_register = hint.physical; + + if (register_allocator.register_states.get(physical_register) == .free) { + register_allocator.assignVirtualToPhysicalRegister(live_register, physical_register); + return; + } else { + logln(.codegen, .register_allocation_problematic_hint, "Second hint {s} not free", .{@tagName(physical_register)}); + } + } else { + unreachable; + } + } else { + logln(.codegen, .register_allocation_problematic_hint, "Can't take hint for VR{} for instruction #{}", .{ virtual_register.uniqueInteger(), instruction_index.uniqueInteger() }); + } + + const register_class_members = registers_by_class.get(register_class); + assert(register_class_members.len > 0); + + var best_cost: u32 = SpillCost.impossible; + var best_register = Register.Physical.no_register; + // print("Candidates for {s}: ", .{@tagName(register_class)}); + // for (register_class_members) |candidate_register| { + // print("{s}, ", .{@tagName(candidate_register)}); + // } + // print("", .{}); + for (register_class_members) |candidate_register| { + if (register_allocator.isRegisterUsedInInstruction(candidate_register, look_at_physical_register_uses)) continue; + const spill_cost = register_allocator.computeSpillCost(candidate_register); + + if (spill_cost == 0) { + register_allocator.assignVirtualToPhysicalRegister(live_register, candidate_register); + return; + } + + if (maybe_hint) |hint| { + if (hint.index.physical == candidate_register) { + unreachable; + } + } + + if (maybe_hint2) |hint| { + if (hint.physical == candidate_register) { + unreachable; + } + } + + if (spill_cost < best_cost) { + best_register = candidate_register; + best_cost = spill_cost; + } + } + + assert(best_register != .no_register); + + unreachable; + } + + fn computeSpillCost(register_allocator: *RegisterAllocator, physical_register: Register.Physical) u32 { + const register_state = register_allocator.register_states.get(physical_register); + return switch (register_state) { + .free => 0, + .preassigned => SpillCost.impossible, + .virtual => |virtual_register_index| blk: { + const sure_spill = register_allocator.stack_slots.get(virtual_register_index) != null or register_allocator.live_virtual_registers.get(virtual_register_index).?.live_out; + break :blk if (sure_spill) SpillCost.clean else SpillCost.dirty; + }, + .livein => unreachable, + }; + } + + const SpillCost = struct { + const clean = 50; + const dirty = 100; + const pref_bonus = 20; + const impossible = std.math.maxInt(u32); + }; + + fn isRegisterUsedInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical, look_at_physical_register_uses: bool) bool { + _ = look_at_physical_register_uses; + + // TODO: register masks + // if (register_allocator.used_in_instruction.contains(physical_register)) { + // return true; + // } + // // TODO + // else { + // return false; + // } + + const result = register_allocator.used_in_instruction.contains(physical_register); + logln(.codegen, .register_allocation_problematic_hint, "Register {s} used in instruction: {}", .{ @tagName(physical_register), result }); + return result; + } + + fn traceCopyChain(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, given_register: Register) ?Register.Index { + _ = register_allocator; + const chain_length_limit = 3; + var chain_try_count: u32 = 0; + + var register = given_register; + + while (true) { + switch (register.index) { + .physical => return register.index, + .virtual => |vri| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] Operand: VR{}", .{vri.uniqueInteger()}); + + const virtual_head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = vri, + }, + }); + + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] Head operand for VR{}: O{}", .{ vri.uniqueInteger(), virtual_head_index_ptr.uniqueInteger() }); + + var vdef = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, virtual_head_index_ptr.*); + + const operand_index = vdef.index; + + const vdef_instruction = vdef.next() orelse break; + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] VR{} defined in operand #{} of instruction #{}", .{ vri.uniqueInteger(), operand_index.uniqueInteger(), vdef_instruction.uniqueInteger() }); + + const next_operand = vdef.index; + + if (vdef.next()) |unexpected_next_instruction| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopyChain] VR{} also defined in operand #{} unexpected next instruction #{}. Breaking...", .{ vri.uniqueInteger(), next_operand.uniqueInteger(), unexpected_next_instruction.uniqueInteger() }); + break; + } + + const instruction = mir.instructions.get(vdef_instruction); + switch (instruction.id) { + .copy => { + const copy_source_operand_index = instruction.operands.items[1]; + const copy_source_operand = mir.operands.get(copy_source_operand_index); + register = copy_source_operand.u.register; + }, + else => |t| @panic(@tagName(t)), + } + }, + } + + chain_try_count += 1; + if (chain_try_count >= chain_length_limit) break; + } + + return null; + } + + fn traceCopies(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) ?Register.Index { + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Tracing copies for VR{}. Head operand: #{}", .{ virtual_register_index.uniqueInteger(), head_index_ptr.uniqueInteger() }); + + var define_instructions = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, head_index_ptr.*); + + if (!define_instructions.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand before loop: #{}", .{define_instructions.index.uniqueInteger()}); + } + + const definition_limit = 3; + var try_count: u32 = 0; + while (define_instructions.next()) |instruction_index| { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Current instruction: #{}", .{instruction_index.uniqueInteger()}); + if (!define_instructions.index.invalid) { + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand: #{}", .{define_instructions.index.uniqueInteger()}); + } else { + // logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Next operand: invalid", .{}); + } + + const instruction = mir.instructions.get(instruction_index); + switch (instruction.id) { + .mov32rm => {}, + .mov32r0 => {}, + .copy => { + const operand_index = instruction.operands.items[1]; + const operand = mir.operands.get(operand_index); + + if (register_allocator.traceCopyChain(mir, instruction_selection, operand.u.register)) |register| { + return register; + } + + logln(.codegen, .register_allocation_problematic_hint, "[traceCopies] Missed oportunity for register allocation tracing copy chain for VR{}", .{virtual_register_index.uniqueInteger()}); + }, + else => |t| @panic(@tagName(t)), + } + + try_count += 1; + if (try_count >= definition_limit) break; + } + + return null; + } + + fn assignVirtualToPhysicalRegister(register_allocator: *RegisterAllocator, live_register: *LiveRegister, register: Register.Physical) void { + const virtual_register = live_register.virtual; + assert(live_register.physical == .no_register); + assert(register != .no_register); + live_register.physical = register; + register_allocator.register_states.set(register, .{ + .virtual = virtual_register, + }); + + logln(.codegen, .register_allocation_assignment, "Assigning V{} to {s}", .{ virtual_register.uniqueInteger(), @tagName(register) }); + // TODO: debug info + } + + fn usePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const displaced_any = try register_allocator.displacePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + register_allocator.register_states.set(physical_register, .preassigned); + register_allocator.markUsedRegisterInInstruction(physical_register); + return displaced_any; + } + + fn displacePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const state = register_allocator.register_states.getPtr(physical_register); + // print("Trying to displace register {s} with state {s}", .{ @tagName(physical_register), @tagName(state.*) }); + return switch (state.*) { + .free => false, + .preassigned => blk: { + state.* = .free; + break :blk true; + }, + .virtual => |virtual_register| blk: { + const live_reg = register_allocator.live_virtual_registers.getPtr(virtual_register).?; + const before = mir.getNextInstructionIndex(instruction_index); + try register_allocator.reload(mir, instruction_selection, before, virtual_register, physical_register); + state.* = .free; + live_reg.physical = .no_register; + live_reg.reloaded = true; + break :blk true; + }, + .livein => unreachable, + }; + } + + fn reload(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, before_index: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical) !void { + const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); + const register_class = mir.virtual_registers.get(virtual_register).register_class; + logln(.codegen, .register_allocation_reload, "Frame index: {}", .{frame_index}); + + try instruction_selection.loadRegisterFromStackSlot(mir, before_index, physical_register, frame_index, register_class, virtual_register); + } + + fn getStackSpaceFor(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register: Register.Virtual.Index) !u32 { + if (register_allocator.stack_slots.get(virtual_register)) |frame_index| { + return frame_index; + } else { + const register_class = mir.virtual_registers.get(virtual_register).register_class; + const register_class_descriptor = register_class_descriptors.get(register_class); + assert(register_class_descriptor.spill_size > 0); + assert(register_class_descriptor.spill_alignment > 0); + const frame_index = try mir.createSpillStackObject(instruction_selection, register_class_descriptor.spill_size, register_class_descriptor.spill_alignment); + + try register_allocator.stack_slots.put(mir.allocator, virtual_register, frame_index); + return frame_index; + } + } + + fn freePhysicalRegister(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + const state = register_allocator.register_states.getPtr(physical_register); + switch (state.*) { + .free => unreachable, + .preassigned => state.* = .free, + .virtual => |virtual_register_index| { + const live_register = register_allocator.live_virtual_registers.getPtr(virtual_register_index).?; + assert(live_register.physical == physical_register); + register_allocator.register_states.set(physical_register, .free); + live_register.physical = .no_register; + }, + .livein => unreachable, + } + } + + fn markUsedRegisterInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + register_allocator.used_in_instruction.setPresent(physical_register, true); + } + + fn unmarkUsedRegisterInInstruction(register_allocator: *RegisterAllocator, physical_register: Register.Physical) void { + register_allocator.used_in_instruction.setPresent(physical_register, false); + } + + fn definePhysicalRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, physical_register: Register.Physical) !bool { + const displaced_any = try register_allocator.displacePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + register_allocator.register_states.set(physical_register, .preassigned); + return displaced_any; + } + + fn defineVirtualRegister(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, instruction_index: Instruction.Index, operand_index: Operand.Index, virtual_register: Register.Virtual.Index, look_at_physical_register_uses: bool) !bool { + const instruction = mir.instructions.get(instruction_index); + const operand = mir.operands.get(operand_index); + const gop = try register_allocator.live_virtual_registers.getOrPut(mir.allocator, virtual_register); + if (!gop.found_existing) { + gop.value_ptr.* = .{ + .virtual = virtual_register, + }; + if (!operand.flags.dead_or_kill) { + var live_out = false; + if (live_out) { + // TODO + } else { + operand.flags.dead_or_kill = true; + } + } + } + const live_register = gop.value_ptr; + if (live_register.physical == .no_register) { + try register_allocator.allocateVirtualRegister(mir, instruction_selection, instruction_index, live_register, null, look_at_physical_register_uses); + } else { + assert(!register_allocator.isRegisterUsedInInstruction(live_register.physical, look_at_physical_register_uses)); + } + + const physical_register = live_register.physical; + assert(physical_register != .no_register); + if (live_register.reloaded or live_register.live_out) { + const instruction_descriptor = instruction_descriptors.get(instruction.id); + if (!instruction_descriptor.flags.implicit_def) { + const spill_before = mir.getNextInstructionIndex(instruction_index); + const kill = live_register.last_use.invalid; + try register_allocator.spill(mir, instruction_selection, spill_before, virtual_register, physical_register, kill, live_register.live_out); + + live_register.last_use = Instruction.Index.invalid; + } + + live_register.live_out = false; + live_register.reloaded = false; + } + + // bundle? + + register_allocator.markUsedRegisterInInstruction(physical_register); + return mir.setPhysicalRegister(instruction_selection, operand_index, physical_register); + } + + fn spill(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, spill_before: usize, virtual_register: Register.Virtual.Index, physical_register: Register.Physical, kill: bool, live_out: bool) !void { + _ = live_out; + const frame_index = try register_allocator.getStackSpaceFor(mir, instruction_selection, virtual_register); + const register_class = mir.virtual_registers.get(virtual_register).register_class; + try instruction_selection.storeRegisterToStackSlot(mir, spill_before, physical_register, kill, frame_index, register_class, virtual_register); + // TODO: debug operands + } + + fn mayLiveIn(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) bool { + if (register_allocator.may_live_across_blocks.isSet(virtual_register_index.uniqueInteger())) { + unreachable; + } else { + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + + // TODO: setup iterator + var define_instructions = Instruction.Iterator.Get(.{ + .use = false, + .def = true, + .element = .instruction, + }).new(mir, head_index_ptr.*); + while (define_instructions.next()) |_| { + unreachable; + } + + return false; + } + } + + fn mayLiveOut(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, virtual_register_index: Register.Virtual.Index) bool { + if (register_allocator.may_live_across_blocks.isSet(virtual_register_index.uniqueInteger())) { + unreachable; + } else { + if (false) { + // TODO: FIXME if block loops + } + + const head_index_ptr = mir.getRegisterListHead(instruction_selection, .{ + .index = .{ + .virtual = virtual_register_index, + }, + }); + var iterator = Instruction.Iterator.Get(.{ + .use = true, + .def = false, + .element = .instruction, + }).new(mir, head_index_ptr.*); + + const limit = 8; + var count: u32 = 0; + while (iterator.nextPointer()) |use_instruction| { + if (!use_instruction.parent.eq(instruction_selection.current_block)) { + register_allocator.may_live_across_blocks.set(virtual_register_index.uniqueInteger()); + // TODO: return !basic_block.successorsEmpty() + return false; + } + + count += 1; + if (count >= limit) { + register_allocator.may_live_across_blocks.set(virtual_register_index.uniqueInteger()); + // TODO: return !basic_block.successorsEmpty() + return false; + } + + // self loop def + if (false) { + unreachable; + } + } + + return false; + } + } + + fn reloadAtBegin(register_allocator: *RegisterAllocator, mir: *MIR, instruction_selection: *InstructionSelection, basic_block: BasicBlock.Index) !void { + _ = instruction_selection; + _ = mir; + _ = register_allocator; + _ = basic_block; + // if (register_allocator.live_virtual_registers.entries.len > 0) { + // // TODO: basic block liveins (regmasks?) + // + // const live_registers = register_allocator.live_virtual_registers.values(); + // print("Live register count: {}", .{live_registers.len}); + // + // for (live_registers) |live_register| { + // const physical_register = live_register.physical; + // if (physical_register == .no_register) { + // continue; + // } + // + // if (register_allocator.register_states.get(physical_register) == .livein) { + // unreachable; + // } + // + // // assert? + // + // const virtual_register = live_register.virtual; + // if (false) { + // unreachable; + // } else { + // try register_allocator.reload(mir, instruction_selection, 0, virtual_register, physical_register); + // } + // } + // unreachable; + // } + } + }; + + fn getRegisters(operand: *const Operand, register_buffer: []Register) []const Register { + var registers: []Register = register_buffer; + registers.len = 0; + switch (operand.u) { + .register => |register| { + registers.len += 1; + registers[registers.len - 1] = register; + }, + .lea64mem => |lea64mem| { + if (lea64mem.gp64) |register| { + registers.len += 1; + registers[registers.len - 1] = register; + } + + if (lea64mem.scale_reg) |register| { + registers.len += 1; + registers[registers.len - 1] = register; + } + }, + .memory, + .immediate, + .pc_relative, + => {}, + // else => |t| @panic(@tagName(t)), + } + + return registers; + } + + pub fn allocateRegisters(mir: *MIR) !void { + logln(.codegen, .register_allocation_block, "\n[REGISTER ALLOCATION]\n", .{}); + const function_count = mir.functions.len; + var function_iterator = mir.functions.iterator(); + const register_count = @typeInfo(Register.Physical).Enum.fields.len; + _ = register_count; + const register_unit_count = 173; + _ = register_unit_count; + + for (0..function_count) |function_index| { + const function = function_iterator.nextPointer().?; + const instruction_selection = &mir.instruction_selections.items[function_index]; + logln(.codegen, .register_allocation_function_before, "Allocating registers for {}", .{function}); + + var block_i: usize = function.blocks.items.len; + var register_allocator = try RegisterAllocator.init(mir, instruction_selection); + + while (block_i > 0) { + block_i -= 1; + + const block_index = function.blocks.items[block_i]; + const block = mir.blocks.get(block_index); + + var instruction_i: usize = block.instructions.items.len; + + while (instruction_i > 0) { + instruction_i -= 1; + + const instruction_index = block.instructions.items[instruction_i]; + const instruction = mir.instructions.get(instruction_index); + logln(.codegen, .register_allocation_new_instruction, "===============\nInstruction {} (#{})", .{ instruction_i, instruction_index.uniqueInteger() }); + logln(.codegen, .register_allocation_new_instruction_function_before, "{}", .{function}); + + register_allocator.used_in_instruction = RegisterBitset.initEmpty(); + + var physical_register_use = false; + var register_mask = false; + var virtual_register_definition = false; + var register_definition = false; + var early_clobber = false; + var assign_live_throughs = false; + + for (instruction.operands.items, 0..) |operand_index, operand_i| { + _ = operand_i; + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (register.index) { + .virtual => { + if (operand.flags.type == .def) { + register_definition = true; + virtual_register_definition = true; + if (operand.flags.early_clobber) { + early_clobber = true; + assign_live_throughs = true; + } + + // TODO + } + }, + .physical => |physical_register| { + if (!register_allocator.reserved.contains(physical_register)) { + if (operand.flags.type == .def) { + register_definition = true; + + const displaced_any = try register_allocator.definePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (operand.flags.early_clobber) { + early_clobber = true; + } + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + + if (operand.readsRegister()) { + physical_register_use = true; + } + } + }, + }, + else => {}, + } + } + + if (register_definition) { + if (virtual_register_definition) { + var rearranged_implicit_operands = true; + if (assign_live_throughs) { + unreachable; + } else { + while (rearranged_implicit_operands) { + rearranged_implicit_operands = false; + + for (instruction.operands.items) |operand_index| { + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => switch (register.index) { + .virtual => |virtual_register| { + rearranged_implicit_operands = try register_allocator.defineVirtualRegister(mir, instruction_selection, instruction_index, operand_index, virtual_register, false); + if (rearranged_implicit_operands) { + break; + } + }, + .physical => {}, + }, + else => {}, + }, + .lea64mem => |lea64mem| { + assert(lea64mem.gp64 == null); + assert(lea64mem.scale_reg == null); + }, + else => {}, + } + } + } + } + } + + var operand_i = instruction.operands.items.len; + while (operand_i > 0) { + operand_i -= 1; + + const operand_index = instruction.operands.items[operand_i]; + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => switch (register.index) { + .physical => |physical_register| { + register_allocator.freePhysicalRegister(physical_register); + register_allocator.unmarkUsedRegisterInInstruction(physical_register); + }, + .virtual => {}, + }, + .use => {}, + }, + else => {}, + } + } + } + + if (register_mask) { + unreachable; + } + + // Physical register use + if (physical_register_use) { + for (instruction.operands.items) |operand_index| { + const operand = mir.operands.get(operand_index); + + switch (operand.flags.type) { + .def => {}, + .use => switch (operand.u) { + .register => |register| switch (register.index) { + .physical => |physical_register| { + if (!register_allocator.reserved.contains(physical_register)) { + const displaced_any = try register_allocator.usePhysicalRegister(mir, instruction_selection, instruction_index, physical_register); + if (!displaced_any) { + operand.flags.dead_or_kill = true; + } + } + }, + .virtual => {}, + }, + else => {}, + }, + } + } + } + + var undef_use = false; + _ = undef_use; + var rearranged_implicit_operands = true; + while (rearranged_implicit_operands) { + rearranged_implicit_operands = false; + for (instruction.operands.items, 0..) |operand_index, operand_i| { + const operand = mir.operands.get(operand_index); + switch (operand.u) { + .register => |register| switch (operand.flags.type) { + .def => {}, + .use => switch (register.index) { + .physical => {}, + .virtual => |virtual_register_index| { + if (operand.flags.undef) { + unreachable; + } + rearranged_implicit_operands = try register_allocator.useVirtualRegister(mir, instruction_selection, instruction_index, virtual_register_index, @intCast(operand_i)); + if (rearranged_implicit_operands) break; + }, + }, + }, + else => {}, + } + } + } + + if (instruction.id == .copy and instruction.operands.items.len == 2) { + const dst_register = mir.operands.get(instruction.operands.items[0]).u.register.index; + const src_register = mir.operands.get(instruction.operands.items[1]).u.register.index; + + if (std.meta.eql(dst_register, src_register)) { + try register_allocator.coalesced.append(mir.allocator, instruction_index); + logln(.codegen, .register_allocation_instruction_avoid_copy, "Avoiding copy...", .{}); + } + } + } + + for (register_allocator.coalesced.items) |coalesced| { + for (block.instructions.items, 0..) |instruction_index, i| { + if (coalesced.eq(instruction_index)) { + const result = block.instructions.orderedRemove(i); + assert(result.eq(coalesced)); + break; + } + } else unreachable; + } + + logln(.codegen, .register_allocation_function_after, "Allocated registers for {}\n============", .{function}); + } + } + + const clear_virtual_registers = true; + if (clear_virtual_registers) { + mir.clearVirtualRegisters(); + } + } + + fn clearVirtualRegisters(mir: *MIR) void { + var vr_it = mir.virtual_registers.iterator(); + var vr_index = vr_it.getCurrentIndex(); + var verified_virtual_register_count: usize = 0; + var skipped: usize = 0; + while (vr_it.nextPointer()) |vr| { + verified_virtual_register_count += 1; + if (vr.use_def_list_head.invalid) { + skipped += 1; + continue; + } + + mir.verifyUseList(vr.use_def_list_head, vr_index); + vr_index = vr_it.getCurrentIndex(); + } + + logln(.codegen, .register_allocation_operand_list_verification, "Verified {} virtual registers ({} skipped)", .{ verified_virtual_register_count, skipped }); + } + + fn verifyUseList(mir: *MIR, start_operand_index: Operand.Index, register: Register.Virtual.Index) void { + var iterator = Instruction.Iterator.Get(.{ + .use = true, + .def = true, + .element = .operand, + }).new(mir, start_operand_index); + + while (iterator.nextPointer()) |operand| { + const instruction_index = operand.parent; + assert(!instruction_index.invalid); + const instruction = mir.instructions.get(instruction_index); + logln(.codegen, .register_allocation_operand_list_verification, "Verifying instruction #{}, operand #{}", .{ instruction_index.uniqueInteger(), mir.operands.indexOf(operand).uniqueInteger() }); + _ = instruction; + assert(operand.u == .register); + assert(operand.u.register.index == .virtual and operand.u.register.index.virtual.eq(register)); + } + + unreachable; + } + + fn getGP32Encoding(operand: Operand) Encoding.GP32 { + assert(operand.id == .gp32); + const physical_register = operand.u.register.index.physical; + const gp_register_encoding: Encoding.GP32 = switch (physical_register) { + .eax => .a, + .edi => .di, + else => |t| @panic(@tagName(t)), + }; + + return gp_register_encoding; + } + + fn getGP64Encoding(operand: Operand) Encoding.GP64 { + assert(operand.id == .gp64); + const physical_register = operand.u.register.index.physical; + const gp_register_encoding: Encoding.GP64 = switch (physical_register) { + .rax => .a, + .rdi => .di, + else => |t| @panic(@tagName(t)), + }; + + return gp_register_encoding; + } + + pub fn encode(mir: *MIR) !*emit.Result { + const image = try mir.allocator.create(emit.Result); + image.* = try emit.Result.create(mir.allocator, mir.target, mir.entry_point); + + var function_iterator = mir.functions.iterator(); + + var function_offsets = std.AutoArrayHashMapUnmanaged(Function.Index, u32){}; + try function_offsets.ensureTotalCapacity(mir.allocator, mir.functions.len); + try image.sections.items[0].symbol_table.ensureTotalCapacity(mir.allocator, mir.functions.len); + + while (function_iterator.nextPointer()) |function| { + const function_index = mir.functions.indexOf(function); + logln(.codegen, .encoding, "\n{s}:", .{function.name}); + + const function_offset: u32 = @intCast(image.getTextSection().index); + + function_offsets.putAssumeCapacityNoClobber(function_index, function_offset); + image.sections.items[0].symbol_table.putAssumeCapacityNoClobber(function.name, function_offset); + + const stack_size = blk: { + var result: u32 = 0; + + for (function.instruction_selection.stack_objects.items) |stack_object| { + assert(std.mem.isAligned(result, stack_object.alignment)); + result += @intCast(stack_object.size); + } + + break :blk result; + }; + + if (stack_size != 0) { + image.appendCodeByte(0x55); // push rbp + image.appendCode(&.{ 0x48, 0x89, 0xe5 }); // mov rbp, rsp + + // sub rsp, stack_offset + if (std.math.cast(u8, stack_size)) |stack_size_u8| { + image.appendCode(&.{ 0x48, 0x83, 0xec, stack_size_u8 }); + } else { + unreachable; + } + } + + for (function.blocks.items) |block_index| { + const block = mir.blocks.get(block_index); + for (block.instructions.items) |instruction_index| { + const instruction = mir.instructions.get(instruction_index); + + const instruction_offset = image.getTextSection().index; + + switch (instruction.id) { + .mov32r0 => { + assert(instruction.operands.items.len == 1); + const operand = mir.operands.get(instruction.operands.items[0]); + const gp_register_encoding = getGP32Encoding(operand.*); + const new_instruction_id = Instruction.Id.xor32rr; + const instruction_descriptor = instruction_descriptors.get(new_instruction_id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + const direct = true; + const modrm = ModRm{ + .rm = @intCast(@intFromEnum(gp_register_encoding)), + .reg = @intCast(@intFromEnum(gp_register_encoding)), + .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), + }; + image.appendCodeByte(@bitCast(modrm)); + }, + .ret => {}, + .mov32mr => { + assert(instruction.operands.items.len == 2); + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_gp32 = getGP32Encoding(source_operand.*); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + assert(destination_operand.u == .memory); + const memory = destination_operand.u.memory; + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(source_gp32)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .mov32rm => { + assert(instruction.operands.items.len == 2); + + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_gp32 = getGP32Encoding(destination_operand.*); + + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(source_operand.u == .memory); + const source_memory = source_operand.u.memory; + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(destination_gp32)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (source_memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .mov32ri64 => { + assert(instruction.operands.items.len == 2); + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_immediate: u32 = @intCast(source_operand.u.immediate); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_gp64 = getGP64Encoding(destination_operand.*); + const destination_gp32 = switch (destination_gp64) { + inline else => |gp64| @field(Encoding.GP32, @tagName(gp64)), + }; + + const opcode = @as(u8, 0xb8) | @as(u3, @intCast(@intFromEnum(destination_gp32))); + image.appendCodeByte(opcode); + + image.appendCode(std.mem.asBytes(&source_immediate)); + }, + .movsx64rm32 => { + assert(instruction.operands.items.len == 2); + + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const destination_register = getGP64Encoding(destination_operand.*); + + const source_operand = mir.operands.get(instruction.operands.items[1]); + const source_memory = source_operand.u.memory; + + const rex = Rex{ + .b = false, + .x = false, + .r = false, + .w = true, + }; + image.appendCodeByte(@bitCast(rex)); + + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + const modrm = ModRm{ + .rm = @intFromEnum(Encoding.GP32.bp), + .reg = @intCast(@intFromEnum(destination_register)), + .mod = @as(u2, @intFromBool(false)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); + + switch (source_memory.addressing_mode.base) { + .frame_index => |frame_index| { + const stack_offset = blk: { + var computed_stack_offset: usize = 0; + for (function.instruction_selection.stack_objects.items[0 .. frame_index + 1]) |stack_object| { + assert(std.mem.isAligned(computed_stack_offset, stack_object.alignment)); + computed_stack_offset += stack_object.size; + } + + break :blk -@as(i64, @intCast(computed_stack_offset)); + }; + const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; + + const stack_bytes = std.mem.asBytes(&stack_offset)[0..displacement_bytes]; + image.appendCode(stack_bytes); + }, + else => |t| @panic(@tagName(t)), + } + }, + .syscall => image.appendCode(&.{ 0x0f, 0x05 }), + .ud2 => image.appendCode(&.{ 0x0f, 0x0b }), + .call64pcrel32 => { + // TODO: emit relocation + assert(instruction.operands.items.len == 1); + const operand = mir.operands.get(instruction.operands.items[0]); + const instruction_descriptor = instruction_descriptors.get(instruction.id); + const opcode: u8 = @intCast(instruction_descriptor.opcode); + image.appendCodeByte(opcode); + + switch (operand.u) { + .pc_relative => |pc_relative| { + // TODO: fix + const callee = pc_relative.function_declaration; + const caller = function_index; + + const instruction_len = 5; + + if (callee.uniqueInteger() <= caller.uniqueInteger()) { + const callee_offset: i64 = @intCast(function_offsets.get(callee).?); + const caller_offset: i64 = @intCast(instruction_offset + instruction_len); + const offset: i32 = @intCast(callee_offset - caller_offset); + image.appendCode(std.mem.asBytes(&offset)); + } else { + image.appendCode(&.{ 0, 0, 0, 0 }); + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + } + }, + .copy => { + assert(instruction.operands.items.len == 2); + const destination_operand = mir.operands.get(instruction.operands.items[0]); + const source_operand = mir.operands.get(instruction.operands.items[1]); + assert(destination_operand.id == source_operand.id); + + // const destination_physical_register = destination_operand.u.register.index.physical; + // _ = destination_physical_register; + // const source_physical_register = source_operand.u.register.index.physical; + switch (destination_operand.id) { + .gp32 => { + image.appendCodeByte(0x89); + + const destination_register = getGP32Encoding(destination_operand.*); + const source_register = getGP32Encoding(source_operand.*); + const modrm = ModRm{ + .rm = @intCast(@intFromEnum(destination_register)), + .reg = @intCast(@intFromEnum(source_register)), + .mod = @as(u2, @intFromBool(true)) << 1 | @intFromBool(true), + }; + image.appendCodeByte(@bitCast(modrm)); }, else => |t| @panic(@tagName(t)), } }, else => |t| @panic(@tagName(t)), - }, - 2 => switch (operands[0]) { - .gp_register => |dst_gp_register| switch (operands[1]) { - .gp_register => |src_gp_register| { - assert(dst_gp_register.size == src_gp_register.size); - const direct = true; - const rm = dst_gp_register.value.?; - const reg = src_gp_register.value.?; + } - if (Rex.create(.{ - .rm = rm, - .reg = reg, - .rm_size = dst_gp_register.size, - })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x88, 0x89 }, - .xor => .{ 0x30, 0x31 }, - else => |t| @panic(@tagName(t)), - }; - - image.appendCodeByte(switch (dst_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], - }); - - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), - }; - image.appendCodeByte(@bitCast(modrm)); - }, - .immediate => |src_immediate| { - assert(src_immediate.type.bit_count % @bitSizeOf(u8) == 0); - print("DST GP register: {}. SRC immediate: {}\n", .{ dst_gp_register, src_immediate }); - switch (instruction.id) { - .mov => switch (@intFromEnum(dst_gp_register.value.?) > std.math.maxInt(u3)) { - true => unreachable, // Use RM encoding - false => { - const opcode: u8 = switch (dst_gp_register.size) { - .one => 0xb0, - else => 0xb8, - }; - const opcode_byte = opcode | @intFromEnum(dst_gp_register.value.?); - image.appendCodeByte(opcode_byte); - const immediate_byte_count = @as(usize, 1) << @intFromEnum(dst_gp_register.size); - print("Immediate byte count: {}\n", .{immediate_byte_count}); - for (std.mem.asBytes(&src_immediate.value)[0..immediate_byte_count]) |immediate_byte| { - image.appendCodeByte(immediate_byte); - } - }, - }, - else => { - const immediate8_different_than_register = src_immediate.type.bit_count == 8 and dst_gp_register.size != .one; - switch (dst_gp_register.value.? == .a and !immediate8_different_than_register) { - true => unreachable, - false => { - const reg: x86_64.GPRegister = @enumFromInt(@as(u3, switch (instruction.id) { - .sub => 5, - else => |t| @panic(@tagName(t)), - })); - if (Rex.create(.{ .reg = reg, .rm = dst_gp_register.value.?, .rm_size = dst_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode: u8 = switch (immediate8_different_than_register) { - true => switch (instruction.id) { - .sub => 0x83, - else => |t| @panic(@tagName(t)), - }, - false => unreachable, - }; - image.appendCodeByte(opcode); - - const rm = dst_gp_register.value.?; - const direct = true; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = @as(u2, @intFromBool(direct)) << 1 | @intFromBool(direct), - }; - image.appendCodeByte(@bitCast(modrm)); - - switch (Size.fromBitCount(src_immediate.type.bit_count)) { - inline else => |size| image.appendCode(std.mem.asBytes(&@as(size.toInteger(), @intCast(src_immediate.value)))), - } - }, - } - }, - } - }, - .stack => |src_stack| { - const stack_offset = -@as(i64, @intCast(src_stack.offset)); - for (std.mem.asBytes(&stack_offset)) |stack_byte| { - print("0x{x} ", .{stack_byte}); - } - print("\n", .{}); - const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; - - const reg = dst_gp_register.value.?; - if (Rex.create(.{ .reg = reg, .rm_size = dst_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x8a, 0x8b }, - else => |t| @panic(@tagName(t)), - }; - - image.appendCodeByte(switch (dst_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], - }); - - const rm = x86_64.GPRegister.bp; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = 0b01, - }; - image.appendCodeByte(@bitCast(modrm)); - - image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); - }, - else => |t| @panic(@tagName(t)), - }, - .stack => |dst_stack| switch (operands[1]) { - .gp_register => |src_gp_register| switch (instruction.id) { - .mov => { - const stack_offset = -@as(i64, @intCast(dst_stack.offset)); - for (std.mem.asBytes(&stack_offset)) |stack_byte| { - print("0x{x} ", .{stack_byte}); - } - print("\n", .{}); - const displacement_bytes: u3 = if (std.math.cast(i8, stack_offset)) |_| @sizeOf(i8) else if (std.math.cast(i32, stack_offset)) |_| @sizeOf(i32) else unreachable; - - const reg = src_gp_register.value.?; - if (Rex.create(.{ .reg = reg, .rm_size = src_gp_register.size })) |rex_byte| { - image.appendCodeByte(@bitCast(rex_byte)); - } - const opcode_option: [2]u8 = switch (instruction.id) { - .mov => .{ 0x88, 0x89 }, - else => |t| @panic(@tagName(t)), - }; - - image.appendCodeByte(switch (src_gp_register.size) { - .one => opcode_option[0], - else => opcode_option[1], - }); - - const rm = x86_64.GPRegister.bp; - const modrm = ModRm{ - .rm = @truncate(@intFromEnum(rm)), - .reg = @truncate(@intFromEnum(reg)), - .mod = 0b01, - }; - image.appendCodeByte(@bitCast(modrm)); - - image.appendCode(std.mem.asBytes(&stack_offset)[0..displacement_bytes]); - }, - else => |t| @panic(@tagName(t)), - }, - else => |t| @panic(@tagName(t)), - }, - else => |t| @panic(@tagName(t)), - }, - 3 => switch (instruction.id) { - else => |t| @panic(@tagName(t)), - }, - 4 => switch (instruction.id) { - else => |t| @panic(@tagName(t)), - }, - else => unreachable, + if (instruction_offset != image.getTextSection().index) { + const print_tags = true; + if (print_tags) { + var offset = @tagName(instruction.id).len + 2; + log(.codegen, .encoding, "{s}: ", .{@tagName(instruction.id)}); + const margin = 16; + while (offset < margin) : (offset += 1) { + log(.codegen, .encoding, " ", .{}); + } + } + for (image.getTextSection().content[instruction_offset..image.getTextSection().index]) |byte| { + log(.codegen, .encoding, "0x{x:0>2} ", .{byte}); + } + log(.codegen, .encoding, "\n", .{}); + } } } - for (local_relocations.items) |relocation| { - const source_offset: i64 = relocation.instruction_byte_offset + relocation.instruction_length; - const destination_offset: i64 = function.blocks.get(relocation.target).?; - print("Source: {}. Destination: {}\n", .{ source_offset, destination_offset }); - const displacement_offset = destination_offset - source_offset; - const address_to_address = @intFromPtr(&image.sections.text.content[relocation.instruction_byte_offset + relocation.source_address_writer_offset]); - switch (relocation.size) { - inline .one, .four => |relocation_size| { - const RelocationInteger = switch (relocation_size) { - .one => i8, - .four => i32, - else => @compileError("Unreachable"), - }; - const ptr: *align(1) RelocationInteger = @ptrFromInt(address_to_address); - ptr.* = @intCast(displacement_offset); - }, - else => unreachable, + const last_block_index = function.blocks.items[function.blocks.items.len - 1]; + const last_block = mir.blocks.get(last_block_index); + const last_block_last_instruction_index = last_block.instructions.items[last_block.instructions.items.len - 1]; + const last_block_last_instruction = mir.instructions.get(last_block_last_instruction_index); + + if (last_block_last_instruction.id == .ret) { + if (stack_size != 0) { + // add rsp, stack_offset + if (std.math.cast(u8, stack_size)) |stack_size_u8| { + image.appendCode(&.{ 0x48, 0x83, 0xc4, stack_size_u8 }); + } else { + unreachable; + } + + image.appendCodeByte(0x5d); // pop rbp } + + image.appendCodeByte(0xc3); } - - print("Function code:\n", .{}); - for (image.sections.text.content[function.instruction_byte_offset..][0..image.sections.text.index]) |code_byte| { - print("0x{x:0>2} ", .{code_byte}); - } - print("\n", .{}); } - for (global_relocations.items) |global_relocation| { - _ = global_relocation; - unreachable; - } - - image.entry_point = mir.functions.items[intermediate.entry_point].instruction_byte_offset; - return image; } - fn encodeRel32Instruction(image: *emit.Result, arguments: struct { - target: u32, - opcode: u8, - }) !void { - const instruction_byte_offset: u32 = @intCast(image.sections.text.index); - const instruction_length = 5; - - const source_offset: i64 = instruction_byte_offset + instruction_length; - const destination_offset: i64 = arguments.target; - const offset: i32 = @intCast(destination_offset - source_offset); - - image.appendCodeByte(arguments.opcode); - image.appendCode(std.mem.asBytes(&offset)); + fn getRegisterListHead(mir: *MIR, instruction_selection: *InstructionSelection, register: Register) *Operand.Index { + switch (register.index) { + .physical => |physical| { + const operand_index = instruction_selection.physical_register_use_or_definition_list.getPtr(physical); + return operand_index; + }, + .virtual => |virtual_register_index| { + const virtual_register = mir.virtual_registers.get(virtual_register_index); + return &virtual_register.use_def_list_head; + }, + } } - fn encodeRel32InstructionWithRelocation(mir: *const MIR, image: *emit.Result, comptime relocation_type: RelocationType, arguments: struct { - relocations: *ArrayList(Relocation(RelocationIndex(relocation_type))), - target: RelocationIndex(relocation_type), - opcode: u8, - }) !void { - const instruction_byte_offset = image.sections.text.index; - const source_address_writer_offset = 1; - const instruction_length = 5; - const size = .four; + const Function = struct { + blocks: ArrayList(BasicBlock.Index) = .{}, + instruction_selection: *InstructionSelection, + mir: *MIR, + name: []const u8, - image.appendCodeByte(arguments.opcode); - image.appendCode(&(.{0} ** 4)); + pub const List = BlockList(@This()); + pub const Index = List.Index; + pub const Allocation = List.Allocation; - try arguments.relocations.append(mir.allocator, .{ - .instruction_byte_offset = @intCast(instruction_byte_offset), - .source_address_writer_offset = source_address_writer_offset, - .instruction_length = instruction_length, - .target = arguments.target, + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + const function_name = function.name; + try writer.print("{s}:\n", .{function_name}); + for (function.blocks.items, 0..) |block_index, function_block_index| { + try writer.print("#{}: ({})\n", .{ function_block_index, block_index.uniqueInteger() }); + const block = function.mir.blocks.get(block_index); + for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { + try writer.print("%{} (${}): ", .{ block_instruction_index, instruction_index.uniqueInteger() }); + const instruction = function.mir.instructions.get(instruction_index).*; + try writer.print("{s}", .{@tagName(instruction.id)}); + for (instruction.operands.items, 0..) |operand_index, i| { + const operand = function.mir.operands.get(operand_index); + try writer.print(" O{} ", .{operand_index.uniqueInteger()}); + switch (operand.u) { + .register => |register| { + switch (register.index) { + .physical => |physical| try writer.writeAll(@tagName(physical)), + .virtual => |virtual| try writer.print("VR{}", .{virtual.uniqueInteger()}), + } + }, + .memory => |memory| { + const base = memory.addressing_mode.base; + switch (base) { + .register_base => unreachable, + .frame_index => |frame_index| try writer.print("SF{}", .{frame_index}), + } + }, + else => try writer.writeAll(@tagName(operand.u)), + } + + if (i < instruction.operands.items.len - 1) { + try writer.writeByte(','); + } + } + + try writer.writeByte('\n'); + } + + try writer.writeByte('\n'); + } + _ = options; + _ = fmt; + } + }; + + fn buildInstruction(mir: *MIR, instruction_selection: *InstructionSelection, instruction: Instruction.Id, operands: []const Operand) !Instruction.Index { + // Some sanity check + { + if (instruction != .copy) { + const descriptor = instruction_descriptors.getPtrConst(instruction); + if (descriptor.operands.len != operands.len) unreachable; + for (descriptor.operands, operands) |descriptor_operand, operand| { + switch (descriptor_operand.id) { + .unknown => {}, + else => if (descriptor_operand.id != operand.id) unreachable, + } + } + } + } + + var list = try ArrayList(Operand.Index).initCapacity(mir.allocator, operands.len); + const instruction_allocation = try mir.instructions.addOne(mir.allocator); + // TODO: MachineRegisterInfo::addRegOperandToUseList + for (operands) |operand_value| { + const operand_allocation = try mir.operands.append(mir.allocator, operand_value); + list.appendAssumeCapacity(operand_allocation.index); + const operand = operand_allocation.ptr; + const operand_index = operand_allocation.index; + operand_allocation.ptr.parent = instruction_allocation.index; + + switch (operand.u) { + .register => mir.addRegisterOperandFromUseList(instruction_selection, operand_index), + .lea64mem => |lea64mem| { + // TODO + assert(lea64mem.gp64 == null); + assert(lea64mem.scale_reg == null); + }, + .memory, + .immediate, + .pc_relative, + => {}, + } + } + + instruction_allocation.ptr.* = .{ + .id = instruction, + .operands = list, + .parent = instruction_selection.current_block, + }; + + if (instruction == .copy) { + const i = instruction_allocation.ptr.*; + _ = i; + // print("Built copy: DST: {}. SRC: {}", .{ mir.operands.get(i.operands.items[0]).u.register.index, mir.operands.get(i.operands.items[1]).u.register.index }); + } + + return instruction_allocation.index; + } + + fn append(mir: *MIR, instruction_selection: *InstructionSelection, id: Instruction.Id, operands: []const Operand) !void { + const instruction = try mir.buildInstruction(instruction_selection, id, operands); + const current_block = mir.blocks.get(instruction_selection.current_block); + try current_block.instructions.append(mir.allocator, instruction); + } + + fn createSpillStackObject(mir: *MIR, instruction_selection: *InstructionSelection, spill_size: u32, spill_alignment: u32) !u32 { + const frame_index = try mir.createStackObject(instruction_selection, spill_size, spill_alignment, ir.Instruction.Index.invalid, true); + return frame_index; + } + + fn createStackObject(mir: *MIR, instruction_selection: *InstructionSelection, size: u64, asked_alignment: u32, ir_instruction: ir.Instruction.Index, is_spill_slot: bool) !u32 { + const stack_realignable = false; + const alignment = clampStackAlignment(!stack_realignable, asked_alignment, 16); + const index: u32 = @intCast(instruction_selection.stack_objects.items.len); + try instruction_selection.stack_objects.append(mir.allocator, .{ .size = size, + .alignment = alignment, + .spill_slot = is_spill_slot, + .ir = ir_instruction, }); + return index; + } + + fn clampStackAlignment(clamp: bool, alignment: u32, stack_alignment: u32) u32 { + if (!clamp or alignment <= stack_alignment) return alignment; + return stack_alignment; } }; -const RegisterImmediate = struct { - immediate: ir.Value.Index, - register: GPRegister, - register_size: Size, - immediate_size: Size, -}; - -const RegisterMemoryRegister = struct { - destination: GPRegister, - source: GPRegister, - size: Size, - direct: bool, -}; - -const Displacement = struct { - instruction_index: u16, - size: Size, - source: u16, - destination: u16, -}; - -const RmResult = struct { - rex: Rex, - mod_rm: ModRm, -}; - -const RmAndRexArguments = packed struct { - rm: GPRegister, - reg: GPRegister, - direct: bool, - bit64: bool, - sib: bool, -}; const ModRm = packed struct(u8) { rm: u3, reg: u3, @@ -871,44 +4162,130 @@ const Rex = packed struct(u8) { w: bool, fixed: u4 = 0b0100, - fn create(args: struct { - rm: ?GPRegister = null, - reg: ?GPRegister = null, - sib: bool = false, - rm_size: ?Size = null, - }) ?Rex { - const rex_byte = Rex{ - .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, - .x = args.sib, - .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, - .w = if (args.rm_size) |rm_size| rm_size == .eight else false, - }; + // fn create32RR(args: struct { + // rm: Encoding.GP32, + // reg: Encoding.GP32, + // sib: bool = false, + // }) ?Rex { + // if (args.sib) { + // unreachable; + // } else { + // } + // } - if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { - return rex_byte; - } else { - return null; + // fn create(args: struct { + // rm: ?GPRegister = null, + // reg: ?GPRegister = null, + // sib: bool = false, + // rm_size: ?Size = null, + // }) ?Rex { + // const rex_byte = Rex{ + // .b = if (args.rm) |rm| @intFromEnum(rm) > std.math.maxInt(u3) else false, + // .x = args.sib, + // .r = if (args.reg) |reg| @intFromEnum(reg) > std.math.maxInt(u3) else false, + // .w = if (args.rm_size) |rm_size| rm_size == .eight else false, + // }; + // + // if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { + // return rex_byte; + // } else { + // return null; + // } + // } +}; + +fn getIrType(intermediate: *ir.Result, ir_instruction_index: ir.Instruction.Index) ir.Type { + const ir_instruction = intermediate.instructions.get(ir_instruction_index); + return switch (ir_instruction.*) { + .argument => |argument_index| intermediate.arguments.get(argument_index).type, + .stack => |stack_index| intermediate.stack_references.get(stack_index).type, + .load => |load_index| getIrType(intermediate, intermediate.loads.get(load_index).instruction), + .syscall => |_| .i64, + .load_integer => |integer| integer.type, + .load_string_literal => .i64, + .call => |call_index| intermediate.function_declarations.get(intermediate.calls.get(call_index).function).return_type, + .sign_extend => |cast_index| intermediate.casts.get(cast_index).type, + else => |t| @panic(@tagName(t)), + }; +} + +fn resolveType(ir_type: ir.Type) ValueType.Id { + return switch (ir_type) { + inline //.i8, + //.i16, + .i32, + .i64, + => |ir_type_ct| @field(ValueType.Id, @typeInfo(ir.Type).Enum.fields[@intFromEnum(ir_type_ct)].name), + .i8, .i16 => unreachable, + .void, + .noreturn, + => unreachable, + }; +} + +const RegisterSet = AutoArrayHashMap(Register.Physical, void); + +fn getSubregisters(allocator: Allocator, reg: Register.Physical) !RegisterSet { + var result = RegisterSet{}; + + try getSubregistersRecursive(allocator, &result, reg); + + return result; +} + +fn getSubregistersRecursive(allocator: Allocator, set: *RegisterSet, reg: Register.Physical) !void { + if (set.get(reg) == null) { + try set.putNoClobber(allocator, reg, {}); + const register_descriptor = register_descriptors.getPtrConst(reg); + for (register_descriptor.subregisters) |subreg| { + try getSubregistersRecursive(allocator, set, subreg); } } +} + +const Encoding = struct { + const GP32 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + }; + const GP64 = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + }; }; -const GPRegister = enum(u4) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, - r8 = 8, - r9 = 9, - r10 = 10, - r11 = 11, - r12 = 12, - r13 = 13, - r14 = 14, - r15 = 15, +const LiveRegister = struct { + last_use: Instruction.Index = Instruction.Index.invalid, + virtual: Register.Virtual.Index, + physical: Register.Physical = Register.Physical.no_register, + live_out: bool = false, + reloaded: bool = false, }; - -const syscall_registers = [7]GPRegister{ .a, .di, .si, .d, .r10, .r8, .r9 }; diff --git a/src/data_structures.zig b/src/data_structures.zig index 39b0df9..85ea8d0 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -40,10 +40,10 @@ pub fn BlockList(comptime T: type) type { index: u6, block: u24, _reserved: bool = false, - valid: bool = true, + invalid: bool = false, pub const invalid = Index{ - .valid = false, + .invalid = true, .index = 0, .block = 0, }; @@ -53,32 +53,45 @@ pub fn BlockList(comptime T: type) type { } pub fn uniqueInteger(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return @as(u30, @truncate(@as(u32, @bitCast(index)))); } + + pub fn fromInteger(usize_index: usize) Index { + const index: u32 = @intCast(usize_index); + const block: u24 = @intCast(index / item_count); + const i: u6 = @intCast(index % item_count); + return .{ + .index = i, + .block = block, + }; + } }; pub const Iterator = struct { - block_index: u26, - element_index: u7, + block_index: u24, + element_index: u6, list: *const List, + pub fn getCurrentIndex(i: *const Iterator) Index { + return .{ + .block = i.block_index, + .index = @intCast(i.element_index), + }; + } + pub fn next(i: *Iterator) ?T { return if (i.nextPointer()) |ptr| ptr.* else null; } pub fn nextPointer(i: *Iterator) ?*T { - if (i.element_index >= item_count) { - i.block_index += 1; - i.element_index = 0; - } - - while (i.block_index < i.list.blocks.items.len) : (i.block_index += 1) { - while (i.element_index < item_count) : (i.element_index += 1) { - if (i.list.blocks.items[i.block_index].bitset.isSet(i.element_index)) { - const index = i.element_index; - i.element_index += 1; - return &i.list.blocks.items[i.block_index].items[index]; + for (i.block_index..i.list.blocks.items.len) |block_index| { + for (@as(u8, i.element_index)..item_count) |element_index| { + if (i.list.blocks.items[i.block_index].bitset.isSet(element_index)) { + i.element_index = @intCast(element_index); + i.element_index +%= 1; + i.block_index = @as(u24, @intCast(block_index)) + @intFromBool(i.element_index < element_index); + return &i.list.blocks.items[block_index].items[element_index]; } } } @@ -101,7 +114,7 @@ pub fn BlockList(comptime T: type) type { } pub fn get(list: *List, index: Index) *T { - assert(index.valid); + assert(!index.invalid); return &list.blocks.items[index.block].items[index.index]; } @@ -136,6 +149,7 @@ pub fn BlockList(comptime T: type) type { new_block.* = .{}; const index = new_block.allocateIndex() catch unreachable; const ptr = &new_block.items[index]; + list.first_block += @intFromBool(block_index != 0); break :blk Allocation{ .ptr = ptr, .index = .{ @@ -159,13 +173,11 @@ pub fn BlockList(comptime T: type) type { } } - pub fn indexOf(list: *List, elem: *T) Index { + pub fn indexOf(list: *const List, elem: *const T) Index { const address = @intFromPtr(elem); - std.debug.print("Items: {}. Block count: {}\n", .{ list.len, list.blocks.items.len }); for (list.blocks.items, 0..) |*block, block_index| { const base = @intFromPtr(&block.items[0]); const top = base + @sizeOf(T) * item_count; - std.debug.print("Bitset: {}. address: 0x{x}. Base: 0x{x}. Top: 0x{x}\n", .{ block.bitset, address, base, top }); if (address >= base and address < top) { return .{ .block = @intCast(block_index), @@ -199,3 +211,113 @@ pub fn enumFromString(comptime E: type, string: []const u8) ?E { } } else null; } + +pub fn StringKeyMap(comptime Value: type) type { + return struct { + list: std.MultiArrayList(Data) = .{}, + const Key = u32; + const Data = struct { + key: Key, + value: Value, + }; + + pub fn length(string_map: *@This()) usize { + return string_map.list.len; + } + + fn hash(string: []const u8) Key { + const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string)); + return string_key; + } + + pub fn getKey(string_map: *const @This(), string: []const u8) ?Key { + return if (string_map.getKeyPtr(string)) |key_ptr| key_ptr.* else null; + } + + pub fn getKeyPtr(string_map: *const @This(), string_key: Key) ?*const Key { + for (string_map.list.items(.key)) |*key_ptr| { + if (key_ptr.* == string_key) { + return key_ptr; + } + } else { + return null; + } + } + + pub fn getValue(string_map: *const @This(), key: Key) ?Value { + if (string_map.getKeyPtr(key)) |key_ptr| { + const index = string_map.indexOfKey(key_ptr); + return string_map.list.items(.value)[index]; + } else { + return null; + } + } + + pub fn indexOfKey(string_map: *const @This(), key_ptr: *const Key) usize { + return @divExact(@intFromPtr(key_ptr) - @intFromPtr(string_map.list.items(.key).ptr), @sizeOf(Key)); + } + + const GOP = struct { + key: Key, + found_existing: bool, + }; + + pub fn getOrPut(string_map: *@This(), allocator: Allocator, string: []const u8, value: Value) !GOP { + const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string)); + for (string_map.list.items(.key)) |key| { + if (key == string_key) return .{ + .key = string_key, + .found_existing = true, + }; + } else { + try string_map.list.append(allocator, .{ + .key = string_key, + .value = value, + }); + + return .{ + .key = string_key, + .found_existing = false, + }; + } + } + }; +} + +const page_size = std.mem.page_size; +extern fn pthread_jit_write_protect_np(enabled: bool) void; + +pub fn mmap(size: usize, flags: packed struct { + executable: bool = false, +}) ![]align(page_size) u8 { + return switch (@import("builtin").os.tag) { + .windows => blk: { + const windows = std.os.windows; + break :blk @as([*]align(page_size) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; + }, + .linux, .macos => |os_tag| blk: { + const jit = switch (os_tag) { + .macos => 0x800, + .linux => 0, + else => unreachable, + }; + const execute_flag: switch (os_tag) { + .linux => u32, + .macos => c_int, + else => unreachable, + } = if (flags.executable) std.os.PROT.EXEC else 0; + const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); + const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit; + + const result = try std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); + if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) { + if (flags.executable) { + pthread_jit_write_protect_np(false); + } + } + + break :blk result; + }, + else => @compileError("OS not supported"), + }; +} diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index fc63727..45fbc47 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -10,6 +10,8 @@ const ArrayList = data_structures.ArrayList; const enumFromString = data_structures.enumFromString; const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; +const logln = Compilation.logln; const fs = @import("../fs.zig"); pub const Token = packed struct(u64) { @@ -35,6 +37,15 @@ pub const Token = packed struct(u64) { fixed_keyword_fn = 0x0e, fixed_keyword_unreachable = 0x0f, fixed_keyword_return = 0x10, + fixed_keyword_ssize = 0x11, + fixed_keyword_usize = 0x12, + fixed_keyword_switch = 0x13, + fixed_keyword_if = 0x14, + fixed_keyword_else = 0x15, + fixed_keyword_struct = 0x16, + fixed_keyword_enum = 0x17, + fixed_keyword_union = 0x18, + fixed_keyword_extern = 0x19, keyword_unsigned_integer = 0x1f, keyword_signed_integer = 0x20, bang = '!', // 0x21 @@ -86,6 +97,15 @@ pub const FixedKeyword = enum { @"fn", @"unreachable", @"return", + ssize, + usize, + @"switch", + @"if", + @"else", + @"struct", + @"enum", + @"union", + @"extern", }; pub const Result = struct { @@ -93,7 +113,14 @@ pub const Result = struct { time: u64, }; -pub fn analyze(allocator: Allocator, text: []const u8) !Result { +pub const Logger = enum { + main, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + +pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) !Result { + _ = file_index; const time_start = std.time.Instant.now() catch unreachable; var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); var index: usize = 0; @@ -114,8 +141,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { } // const identifier = text[start_index..][0 .. index - start_index]; - // _ = identifier; - // std.debug.print("Identifier: {s}\n", .{identifier}); + // logln("Identifier: {s}", .{identifier}); if (start_character == 'u' or start_character == 's') { var index_integer = start_index + 1; @@ -138,7 +164,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), } else .identifier; }, - '(', ')', '{', '}', '-', '=', ';', '#', '@', ',', '.' => |operator| blk: { + '(', ')', '{', '}', '[', ']', '-', '=', ';', '#', '@', ',', '.', ':', '>', '<', '*', '!' => |operator| blk: { index += 1; break :blk @enumFromInt(operator); }, @@ -185,11 +211,8 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { }); } - const should_log = true; - if (should_log) { - for (tokens.items, 0..) |token, i| { - std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); - } + for (tokens.items, 0..) |token, i| { + logln(.lexer, .main, "#{} {s}\n", .{ i, @tagName(token.id) }); } const time_end = std.time.Instant.now() catch unreachable; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index ce75292..82ed488 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -2,6 +2,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const equal = std.mem.eql; +const panic = std.debug.panic; const Compilation = @import("../Compilation.zig"); const File = Compilation.File; const Module = Compilation.Module; @@ -10,9 +11,12 @@ const Package = Compilation.Package; const ArgumentList = Compilation.ArgumentList; const Assignment = Compilation.Assignment; const Block = Compilation.Block; +const Call = Compilation.Call; const Declaration = Compilation.Declaration; +const Enum = Compilation.Enum; const Field = Compilation.Field; const Function = Compilation.Function; +const Intrinsic = Compilation.Intrinsic; const Loop = Compilation.Loop; const Scope = Compilation.Scope; const ScopeType = Compilation.ScopeType; @@ -20,6 +24,23 @@ const Struct = Compilation.Struct; const Type = Compilation.Type; const Value = Compilation.Value; +const log = Compilation.log; +const logln = Compilation.logln; + +pub const Logger = enum { + type, + identifier, + symbol_declaration, + scope_node, + node, + typecheck, + @"switch", + block, + call, + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + const lexical_analyzer = @import("lexical_analyzer.zig"); const Token = lexical_analyzer.Token; @@ -32,27 +53,26 @@ const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const HashMap = data_structures.AutoHashMap; -const print = std.debug.print; - const Analyzer = struct { allocator: Allocator, module: *Module, current_file: File.Index, - fn getSourceFile(analyzer: *Analyzer, scope_index: Scope.Index) []const u8 { + fn getScopeSourceFile(analyzer: *Analyzer, scope_index: Scope.Index) []const u8 { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); return file.source_code; } - fn getNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) Node { + fn getScopeNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) Node { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); - const result = file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; - return result; + const result = &file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; + logln(.sema, .scope_node, "Fetching node #{} (0x{x}) from scope #{} from file #{} with id: {s}\n", .{ node_index.uniqueInteger(), @intFromPtr(result), scope_index.uniqueInteger(), scope.file.uniqueInteger(), @tagName(result.id) }); + return result.*; } - fn getToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) Token { + fn getScopeToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) Token { const scope = analyzer.module.scopes.get(scope_index); const file = analyzer.module.files.get(scope.file); const result = file.lexical_analyzer_result.tokens.items[token_index]; @@ -60,14 +80,32 @@ const Analyzer = struct { return result; } - fn getNodeList(analyzer: *Analyzer, scope_index: Scope.Index, list_index: u32) ArrayList(Node.Index) { + fn getScopeNodeList(analyzer: *Analyzer, scope_index: Scope.Index, node: Node) ArrayList(Node.Index) { const scope = analyzer.module.scopes.get(scope_index); - const file = analyzer.module.files.get(scope.file); - return file.syntactic_analyzer_result.node_lists.items[list_index]; + return getFileNodeList(analyzer, scope.file, node); + } + + fn getFileNodeList(analyzer: *Analyzer, file_index: File.Index, node: Node) ArrayList(Node.Index) { + assert(node.id == .node_list); + const file = analyzer.module.files.get(file_index); + const list_index = node.left; + return file.syntactic_analyzer_result.node_lists.items[list_index.uniqueInteger()]; + } + + fn getFileToken(analyzer: *Analyzer, file_index: File.Index, token: Token.Index) Token { + const file = analyzer.module.files.get(file_index); + const result = file.lexical_analyzer_result.tokens.items[token]; + return result; + } + + fn getFileNode(analyzer: *Analyzer, file_index: File.Index, node_index: Node.Index) Node { + const file = analyzer.module.files.get(file_index); + const result = file.syntactic_analyzer_result.nodes.items[node_index.unwrap()]; + return result; } fn comptimeBlock(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value.Index { - const comptime_node = analyzer.getNode(scope_index, node_index); + const comptime_node = analyzer.getScopeNode(scope_index, node_index); const comptime_block = try analyzer.block(scope_index, .{ .none = {} }, comptime_node.left); const value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ @@ -94,8 +132,9 @@ const Analyzer = struct { } fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + logln(.sema, .block, "Resolving block from scope #{} in file #{}\n", .{ scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger() }); var reaches_end = true; - const block_node = analyzer.getNode(scope_index, node_index); + const block_node = analyzer.getScopeNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; switch (block_node.id) { .block_one, .comptime_block_one => { @@ -106,7 +145,7 @@ const Analyzer = struct { try statement_nodes.append(analyzer.allocator, block_node.left); try statement_nodes.append(analyzer.allocator, block_node.right); }, - .block, .comptime_block => statement_nodes = analyzer.getNodeList(scope_index, block_node.left.unwrap()), + .block, .comptime_block => unreachable, //statement_nodes = analyzer.getNodeList(scope_index, block_node.left.unwrap()), else => |t| @panic(@tagName(t)), } @@ -115,7 +154,8 @@ const Analyzer = struct { .block, .block_zero, .block_one, .block_two => false, else => |t| @panic(@tagName(t)), }; - print("Is comptime: {}\n", .{is_comptime}); + + logln(.sema, .block, "Is comptime: {}\n", .{is_comptime}); var statements = ArrayList(Value.Index){}; @@ -124,105 +164,56 @@ const Analyzer = struct { unreachable; } - const statement_node = analyzer.getNode(scope_index, statement_node_index); + const statement_node = analyzer.getScopeNode(scope_index, statement_node_index); const statement_value = switch (statement_node.id) { - inline .assign, .simple_while => |statement_id| blk: { - const specific_value_index = switch (statement_id) { - .assign => { - print("Assign: #{}\n", .{node_index.value}); - assert(statement_node.id == .assign); - switch (statement_node.left.valid) { - // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` - false => { - const right_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); - right_value_allocation.ptr.* = .{ - .unresolved = .{ - .node_index = statement_node.right, - }, - }; - try analyzer.resolveNode(right_value_allocation.ptr, scope_index, ExpectType.none, statement_node.right); - // switch (right_value_allocation.ptr.*) { - // else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), - // } - try statements.append(analyzer.allocator, right_value_allocation.index); - continue; - }, - true => { - // const id = analyzer.tokenIdentifier(.token); - // print("id: {s}\n", .{id}); - // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); + .assign => (try analyzer.module.values.append(analyzer.allocator, try analyzer.processAssignment(scope_index, statement_node_index))).index, + .simple_while => blk: { + const loop_allocation = try analyzer.module.loops.append(analyzer.allocator, .{ + .condition = Value.Index.invalid, + .body = Value.Index.invalid, + .breaks = false, + }); + loop_allocation.ptr.condition = (try analyzer.unresolvedAllocate(scope_index, ExpectType.boolean, statement_node.left)).index; + loop_allocation.ptr.body = (try analyzer.unresolvedAllocate(scope_index, ExpectType.none, statement_node.right)).index; - // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { - // unreachable; - // } else { - // const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ - // .store = result.left, - // .load = result.right, - // }); - // return assignment_index; - // } - unreachable; - }, - } - }, - .simple_while => statement: { - const loop_allocation = try analyzer.module.loops.append(analyzer.allocator, .{ - .condition = Value.Index.invalid, - .body = Value.Index.invalid, - .breaks = false, - }); - loop_allocation.ptr.condition = (try analyzer.unresolvedAllocate(scope_index, ExpectType.boolean, statement_node.left)).index; - loop_allocation.ptr.body = (try analyzer.unresolvedAllocate(scope_index, ExpectType.none, statement_node.right)).index; + // TODO: bool true + reaches_end = loop_allocation.ptr.breaks or unreachable; - // TODO: bool true - reaches_end = loop_allocation.ptr.breaks or unreachable; - - break :statement loop_allocation.index; - }, - else => unreachable, - }; - const value = @unionInit(Value, switch (statement_id) { - .assign => "assign", - .simple_while => "loop", - else => unreachable, - }, specific_value_index); - const value_allocation = try analyzer.module.values.append(analyzer.allocator, value); + const value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .loop = loop_allocation.index, + }); break :blk value_allocation.index; }, .@"unreachable" => blk: { reaches_end = false; - break :blk Values.@"unreachable".getIndex(); + break :blk Compilation.Values.@"unreachable".getIndex(); + }, + .simple_symbol_declaration => blk: { + const declaration_index = try analyzer.symbolDeclaration(scope_index, statement_node_index, .local); + const declaration = analyzer.module.declarations.get(declaration_index); + const init_value = analyzer.module.values.get(declaration.init_value); + switch (init_value.isComptime() and declaration.mutability == .@"const") { + // Dont add comptime declaration statements + true => continue, + false => { + const statement_value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .declaration = declaration_index, + }); + break :blk statement_value_allocation.index; + }, + } }, - .simple_variable_declaration => (try analyzer.module.values.append(analyzer.allocator, .{ - .declaration = try analyzer.symbolDeclaration(scope_index, statement_node_index, .local), - })).index, .@"return" => blk: { reaches_end = false; - const return_expression: Value.Index = switch (statement_node_index.valid) { - // TODO: expect type - true => ret: { - const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); - return_value_allocation.ptr.* = .{ - .unresolved = .{ - .node_index = statement_node.left, - }, - }; - try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, statement_node.left); - break :ret return_value_allocation.index; - }, - false => @panic("TODO: ret void"), - }; - const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ - .value = return_expression, - }); + const return_value_allocation = try analyzer.module.values.append(analyzer.allocator, try analyzer.processReturn(scope_index, expect_type, statement_node_index)); - const return_expression_value_allocation = try analyzer.module.values.append(analyzer.allocator, .{ - .@"return" = return_value_allocation.index, - }); - - break :blk return_expression_value_allocation.index; + break :blk return_value_allocation.index; }, + .call_two, .call => (try analyzer.module.values.append(analyzer.allocator, .{ + .call = try analyzer.processCall(scope_index, statement_node_index), + })).index, + .@"switch" => (try analyzer.module.values.append(analyzer.allocator, try analyzer.processSwitch(scope_index, statement_node_index))).index, else => |t| @panic(@tagName(t)), }; @@ -237,41 +228,420 @@ const Analyzer = struct { return block_allocation.index; } - fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { - const identifier_hash = try analyzer.identifierFromToken(node_scope_index, node_token); - const scope = analyzer.module.scopes.get(scope_index); - // TODO: search in upper scopes too - const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); - if (identifier_scope_lookup.found_existing) { - const declaration_index = identifier_scope_lookup.value_ptr.*; - const declaration = analyzer.module.declarations.get(declaration_index); - const init_value = analyzer.module.values.get(declaration.init_value); - print("Declaration found: {}\n", .{init_value}); - switch (init_value.*) { - .unresolved => |ur| try analyzer.resolveNode(init_value, scope_index, expect_type, ur.node_index), - else => {}, - } - if (init_value.isComptime() and declaration.mutability == .@"const") { - return declaration.init_value; - } else { - const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ - .declaration_reference = declaration_index, - }); - return ref_allocation.index; + fn processCall(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Call.Index { + const node = analyzer.getScopeNode(scope_index, node_index); + logln(.sema, .call, "Node index: {}. Left index: {}\n", .{ node_index.uniqueInteger(), node.left.uniqueInteger() }); + assert(!node.left.invalid); + const left_value_index = switch (!node.left.invalid) { + true => blk: { + const member_or_namespace_node_index = node.left; + assert(!member_or_namespace_node_index.invalid); + const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, member_or_namespace_node_index); + break :blk this_value_allocation.index; + }, + false => unreachable, //Value.Index.invalid, + }; + + const left_type = switch (left_value_index.invalid) { + false => switch (analyzer.module.values.get(left_value_index).*) { + .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.types.get(analyzer.module.functions.get(function_index).prototype).function).return_type, + else => |t| @panic(@tagName(t)), + }, + true => Type.Index.invalid, + }; + const arguments_index = switch (node.id) { + .call, .call_two => |call_tag| (try analyzer.module.argument_lists.append(analyzer.allocator, .{ + .array = b: { + const argument_list_node_index = node.right; + const call_argument_node_list = switch (call_tag) { + .call => analyzer.getScopeNodeList(scope_index, analyzer.getScopeNode(scope_index, argument_list_node_index)).items, + .call_two => &.{argument_list_node_index}, + else => unreachable, + }; + + switch (analyzer.module.values.get(left_value_index).*) { + .function => |function_index| { + const function = analyzer.module.functions.get(function_index); + const function_prototype = analyzer.module.function_prototypes.get(analyzer.module.types.get(function.prototype).function); + const argument_declarations = function_prototype.arguments.?; + logln(.sema, .call, "Argument declaration count: {}. Argument node list count: {}\n", .{ argument_declarations.len, call_argument_node_list.len }); + var argument_array = ArrayList(Value.Index){}; + if (argument_declarations.len == call_argument_node_list.len) { + for (argument_declarations, call_argument_node_list) |argument_declaration_index, argument_node_index| { + const argument_declaration = analyzer.module.declarations.get(argument_declaration_index); + // const argument_declaration_type = analyzer.module.types.get(argument_declaration.type); + // assert(argument_declaration.type.valid); + const call_argument_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType{ + .type_index = argument_declaration.type, + }, argument_node_index); + try call_argument_allocation.ptr.typeCheck(analyzer.module, argument_declaration.type); + // const call_argument_type_index = call_argument_allocation.ptr.getType(analyzer.module); + // const call_argument_type = analyzer.module.types.get(call_argument_type_index); + // if (call_argument_type_index != argument_declaration.type) { + // if (std.meta.activeTag(call_argument_type.*) == std.meta.activeTag(argument_declaration_type.*)) { + // if (!call_argument_type.equalTypeCanCoerce(argument_declaration_type)) { + // unreachable; + // } + // } else { + // try call_argument_type.promote(argument_declaration_type); + // call_argument_allocation.ptr.setType(argument_declaration.type); + // } + // } + + try argument_array.append(analyzer.allocator, call_argument_allocation.index); + } + + break :b argument_array; + } else { + panic("Function call has argument count mismatch: call has {}, function declaration has {}\n", .{ call_argument_node_list.len, argument_declarations.len }); + } + }, + else => |t| @panic(@tagName(t)), + } + }, + })).index, + .call_one => ArgumentList.Index.invalid, + else => |t| @panic(@tagName(t)), + }; + const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ + .value = left_value_index, + .arguments = arguments_index, + + .type = left_type, + }); + + return call_allocation.index; + } + + fn typeCheckEnumLiteral(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index, enum_type: *const Enum) !?u32 { + const enum_name = tokenBytes(analyzer.getScopeToken(scope_index, token_index), analyzer.getScopeSourceFile(scope_index)); + const enum_name_hash = try analyzer.processIdentifier(enum_name); + + for (enum_type.fields.items) |enum_field_index| { + const enum_field = analyzer.module.enum_fields.get(enum_field_index); + const existing = analyzer.module.getName(enum_field.name).?; + if (enum_field.name == enum_name_hash) { + return enum_name_hash; } + + logln(.sema, .typecheck, "Existing enum field \"{s}\" != enum literal \"{s}\"\n", .{ existing, enum_name }); } else { - std.debug.panic("Identifier not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getToken(scope_index, node_token), analyzer.getSourceFile(scope_index)) }); + return null; + } + } + + fn processSwitch(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + assert(node.id == .@"switch"); + + analyzer.debugNode(scope_index, node_index); + + const switch_expr = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + const switch_case_list_node = analyzer.getScopeNode(scope_index, node.right); + const switch_case_node_list = switch (switch_case_list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, switch_case_list_node).items, + else => |t| @panic(@tagName(t)), + }; + + switch (switch_expr.ptr.*) { + .enum_field => |e_field_index| { + const e_field = analyzer.module.enum_fields.get(e_field_index); + const enum_type = analyzer.module.enums.get(e_field.parent); + const enum_field_name = analyzer.module.getName(e_field.name); + _ = enum_field_name; + + var else_case_index: ?usize = null; + _ = else_case_index; + var existing_enums = ArrayList(u32){}; + var switch_case_groups = try ArrayList(ArrayList(u32)).initCapacity(analyzer.allocator, switch_case_node_list.len); + + for (switch_case_node_list, 0..) |switch_case_node_index, index| { + _ = index; + const switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_index); + switch (switch_case_node.left.invalid) { + false => { + const switch_case_condition_node = analyzer.getScopeNode(scope_index, switch_case_node.left); + var switch_case_group = ArrayList(u32){}; + switch (switch_case_condition_node.id) { + .enum_literal => { + if (try typeCheckEnumLiteral(analyzer, scope_index, switch_case_condition_node.token + 1, enum_type)) |enum_name_hash| { + for (existing_enums.items) |existing| { + if (enum_name_hash == existing) { + // Duplicate case + unreachable; + } + } + + try switch_case_group.append(analyzer.allocator, enum_name_hash); + try existing_enums.append(analyzer.allocator, enum_name_hash); + } else { + unreachable; + } + }, + .node_list => { + const node_list = analyzer.getScopeNodeList(scope_index, switch_case_condition_node); + try switch_case_group.ensureTotalCapacity(analyzer.allocator, node_list.items.len); + for (node_list.items) |case_condition_node_index| { + const case_condition_node = analyzer.getScopeNode(scope_index, case_condition_node_index); + switch (case_condition_node.id) { + .enum_literal => { + if (try typeCheckEnumLiteral(analyzer, scope_index, case_condition_node.token + 1, enum_type)) |enum_name_hash| { + for (existing_enums.items) |existing| { + if (enum_name_hash == existing) { + // Duplicate case + unreachable; + } + } + + try existing_enums.append(analyzer.allocator, enum_name_hash); + switch_case_group.appendAssumeCapacity(enum_name_hash); + } else { + unreachable; + } + }, + else => |t| @panic(@tagName(t)), + } + } + }, + else => |t| @panic(@tagName(t)), + } + + switch_case_groups.appendAssumeCapacity(switch_case_group); + }, + true => { + unreachable; + // if (existing_enums.items.len == enum_type.fields.items.len) { + // unreachable; + // } + // + // else_case_index = index; + }, + } + } + + const group_index = for (switch_case_groups.items, 0..) |switch_case_group, switch_case_group_index| { + break for (switch_case_group.items) |case_name| { + if (e_field.name == case_name) { + break switch_case_group_index; + } + } else continue; + } else { + unreachable; + }; + + logln(.sema, .@"switch", "Index: {}\n", .{group_index}); + + const true_switch_case_node = analyzer.getScopeNode(scope_index, switch_case_node_list[group_index]); + var result = Value{ + .unresolved = .{ + .node_index = true_switch_case_node.right, + }, + }; + + try analyzer.resolveNode(&result, scope_index, ExpectType.none, true_switch_case_node.right); + + return result; + }, + else => |t| @panic(@tagName(t)), + } + + unreachable; + } + + fn processAssignment(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + assert(node.id == .assign); + const assignment = switch (node.left.invalid) { + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + true => { + var result = Value{ + .unresolved = .{ + .node_index = node.right, + }, + }; + + try analyzer.resolveNode(&result, scope_index, ExpectType.none, node.right); + + return result; + }, + false => { + // const id = analyzer.tokenIdentifier(.token); + // logln("id: {s}\n", .{id}); + // const left = try analyzer.expression(scope_index, ExpectType.none, statement_node.left); + + // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { + // unreachable; + // } else { + // const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ + // .store = result.left, + // .load = result.right, + // }); + // return assignment_index; + // } + unreachable; + }, + }; + _ = assignment; + + unreachable; + } + + fn processReturn(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) !Value { + const node = analyzer.getScopeNode(scope_index, node_index); + const return_expression: Value.Index = switch (node_index.invalid) { + // TODO: expect type + false => ret: { + const return_value_allocation = try analyzer.module.values.addOne(analyzer.allocator); + return_value_allocation.ptr.* = .{ + .unresolved = .{ + .node_index = node.left, + }, + }; + try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, node.left); + break :ret return_value_allocation.index; + }, + true => @panic("TODO: ret void"), + }; + + const return_value_allocation = try analyzer.module.returns.append(analyzer.allocator, .{ + .value = return_expression, + }); + + return .{ + .@"return" = return_value_allocation.index, + }; + } + + const DeclarationLookup = struct { + declaration: Declaration.Index, + scope: Scope.Index, + }; + + fn lookupDeclarationInCurrentAndParentScopes(analyzer: *Analyzer, scope_index: Scope.Index, identifier_hash: u32) ?DeclarationLookup { + var scope_iterator = scope_index; + while (!scope_iterator.invalid) { + const scope = analyzer.module.scopes.get(scope_iterator); + if (scope.declarations.get(identifier_hash)) |declaration_index| { + return .{ + .declaration = declaration_index, + .scope = scope_iterator, + }; + } + + scope_iterator = scope.parent; + } + + return null; + } + + fn doIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_token: Token.Index, node_scope_index: Scope.Index) !Value.Index { + const identifier = analyzer.tokenIdentifier(node_scope_index, node_token); + logln(.sema, .identifier, "Referencing identifier: \"{s}\"\n", .{identifier}); + const identifier_hash = try analyzer.processIdentifier(identifier); + + if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_hash)) |lookup| { + const declaration_index = lookup.declaration; + const declaration = analyzer.module.declarations.get(declaration_index); + + // Up until now, only arguments have no initialization value + const typecheck_result = switch (declaration.init_value.invalid) { + false => blk: { + const init_value = analyzer.module.values.get(declaration.init_value); + logln(.sema, .identifier, "Declaration found: {}\n", .{init_value}); + const is_unresolved = init_value.* == .unresolved; + switch (is_unresolved) { + true => { + try analyzer.resolveNode(init_value, lookup.scope, expect_type, init_value.unresolved.node_index); + declaration.type = init_value.getType(analyzer.module); + switch (init_value.*) { + .function => |function_index| { + try analyzer.module.function_name_map.put(analyzer.allocator, function_index, declaration.name); + }, + else => {}, + } + }, + false => {}, + } + + logln(.sema, .identifier, "Declaration resolved as: {}\n", .{init_value}); + logln(.sema, .identifier, "Declaration mutability: {s}. Is comptime: {}\n", .{ @tagName(declaration.mutability), init_value.isComptime() }); + + const typecheck_result = try analyzer.typeCheck(expect_type, declaration.type); + + if (init_value.isComptime() and declaration.mutability == .@"const") { + assert(!declaration.init_value.invalid); + assert(typecheck_result == .success); + return declaration.init_value; + } + + break :blk typecheck_result; + }, + true => try analyzer.typeCheck(expect_type, declaration.type), + }; + + const ref_allocation = try analyzer.module.values.append(analyzer.allocator, .{ + .declaration_reference = .{ + .value = declaration_index, + .type = switch (expect_type) { + .none => declaration.type, + .type_index => switch (typecheck_result) { + .success => expect_type.type_index, + else => declaration.type, + }, + .flexible_integer => blk: { + assert(!declaration.type.invalid); + break :blk declaration.type; + }, + }, + }, + }); + + return switch (typecheck_result) { + .success => ref_allocation.index, + inline .zero_extend, .sign_extend => |extend| blk: { + const cast_allocation = try analyzer.module.casts.append(analyzer.allocator, .{ + .value = ref_allocation.index, + .type = switch (expect_type) { + .flexible_integer => |flexible_integer| t: { + const cast_type = Type.Integer.getIndex(.{ + .signedness = switch (extend) { + .zero_extend => .unsigned, + .sign_extend => .signed, + else => unreachable, + }, + .bit_count = flexible_integer.byte_count << 3, + }); + break :t cast_type; + }, + else => |t| @panic(@tagName(t)), + }, + }); + const value_allocation = try analyzer.module.values.append(analyzer.allocator, @unionInit(Value, @tagName(extend), cast_allocation.index)); + break :blk value_allocation.index; + }, + }; + } else { + const scope = analyzer.module.scopes.get(scope_index); + panic("Identifier \"{s}\" not found in scope #{} of file #{} referenced by scope #{} of file #{}: {s}", .{ identifier, scope_index.uniqueInteger(), scope.file.uniqueInteger(), node_scope_index.uniqueInteger(), analyzer.module.scopes.get(node_scope_index).file.uniqueInteger(), tokenBytes(analyzer.getScopeToken(scope_index, node_token), analyzer.getScopeSourceFile(scope_index)) }); } } fn getArguments(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !ArrayList(Node.Index) { var arguments = ArrayList(Node.Index){}; - const node = analyzer.getNode(scope_index, node_index); + const node = analyzer.getScopeNode(scope_index, node_index); switch (node.id) { .compiler_intrinsic_two => { try arguments.append(analyzer.allocator, node.left); try arguments.append(analyzer.allocator, node.right); }, + .compiler_intrinsic => { + const argument_list_node_index = node.left; + assert(!argument_list_node_index.invalid); + const node_list_node = analyzer.getScopeNode(scope_index, argument_list_node_index); + const node_list = analyzer.getScopeNodeList(scope_index, node_list_node); + + return node_list; + }, else => |t| @panic(@tagName(t)), } @@ -279,8 +649,8 @@ const Analyzer = struct { } fn resolveNode(analyzer: *Analyzer, value: *Value, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!void { - const node = analyzer.getNode(scope_index, node_index); - print("Resolving node #{}: {}\n", .{ node_index.uniqueInteger(), node }); + const node = analyzer.getScopeNode(scope_index, node_index); + logln(.sema, .node, "Resolving node #{} in scope #{} from file #{}: {}\n", .{ node_index.uniqueInteger(), scope_index.uniqueInteger(), analyzer.module.scopes.get(scope_index).file.uniqueInteger(), node }); assert(value.* == .unresolved); @@ -294,7 +664,7 @@ const Analyzer = struct { switch (expect_type) { .none => {}, .type_index => |expected_type| { - if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) { + if (@as(u32, @bitCast(Type.boolean)) != @as(u32, @bitCast(expected_type))) { @panic("TODO: compile error"); } }, @@ -306,47 +676,57 @@ const Analyzer = struct { // break :blk Values.getIndex(.bool_true); }, - .compiler_intrinsic_one, .compiler_intrinsic_two => blk: { + .compiler_intrinsic_one, .compiler_intrinsic_two, .compiler_intrinsic => blk: { const intrinsic_name = analyzer.tokenIdentifier(scope_index, node.token + 1); - const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; - print("Intrinsic: {s}\n", .{@tagName(intrinsic)}); + logln(.sema, .node, "Intrinsic: {s}\n", .{intrinsic_name}); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse panic("Unknown intrinsic: {s}\n", .{intrinsic_name}); switch (intrinsic) { .import => { assert(node.id == .compiler_intrinsic_one); - const import_argument = analyzer.getNode(scope_index, node.left); + const import_argument = analyzer.getScopeNode(scope_index, node.left); switch (import_argument.id) { .string_literal => { const import_name = analyzer.tokenStringLiteral(scope_index, import_argument.token); const import_file = try analyzer.module.importFile(analyzer.allocator, analyzer.current_file, import_name); + logln(.sema, .node, "Importing \"{s}\"...\n", .{import_name}); - if (import_file.file.is_new) { - // TODO: fix error - try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, import_file.file.ptr); - } else { - unreachable; - } - - break :blk .{ - .type = try analyzeFile(value, analyzer.allocator, analyzer.module, import_file.file.ptr, import_file.file.index), + const result = .{ + .type = switch (import_file.file.is_new) { + true => true_block: { + const new_file_index = import_file.file.index; + try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, new_file_index); + const analyze_result = try analyzeFile(value, analyzer.allocator, analyzer.module, new_file_index); + logln(.sema, .node, "Done analyzing {s}!\n", .{import_name}); + break :true_block analyze_result; + }, + false => false_block: { + const file_type = import_file.file.ptr.type; + assert(!file_type.invalid); + break :false_block file_type; + }, + }, }; + + break :blk result; }, else => unreachable, } }, .syscall => { var argument_nodes = try analyzer.getArguments(scope_index, node_index); - print("Argument count: {}\n", .{argument_nodes.items.len}); + logln(.sema, .node, "Argument count: {}\n", .{argument_nodes.items.len}); if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { - const number_allocation = try analyzer.unresolvedAllocate(scope_index, .{ + const argument_expect_type = .{ .flexible_integer = .{ .byte_count = 8, }, - }, argument_nodes.items[0]); + }; + const number_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_nodes.items[0]); const number = number_allocation.index; - assert(number.valid); + assert(!number.invalid); var arguments = std.mem.zeroes([6]Value.Index); for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { - const argument_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_node_index); + const argument_allocation = try analyzer.unresolvedAllocate(scope_index, argument_expect_type, argument_node_index); arguments[argument_index] = argument_allocation.index; } @@ -366,24 +746,65 @@ const Analyzer = struct { unreachable; } }, + .@"error" => { + assert(node.id == .compiler_intrinsic_one); + const message_node = analyzer.getScopeNode(scope_index, node.left); + switch (message_node.id) { + .string_literal => panic("error: {s}", .{analyzer.tokenStringLiteral(scope_index, message_node.token)}), + else => |t| @panic(@tagName(t)), + } + unreachable; + }, } unreachable; }, .function_definition => blk: { - const function_prototype_index = try analyzer.functionPrototype(scope_index, node.left); + const function_scope_allocation = try analyzer.allocateScope(.{ + .parent = scope_index, + .file = analyzer.module.scopes.get(scope_index).file, + }); - const function_body = try analyzer.block(scope_index, .{ + const function_prototype_index = try analyzer.functionPrototype(function_scope_allocation.index, node.left); + + const function_body = try analyzer.block(function_scope_allocation.index, .{ .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), }, node.right); - const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ - .prototype = function_prototype_index, - .body = function_body, + const prototype_type = try analyzer.module.types.append(analyzer.allocator, .{ + .function = function_prototype_index, }); + + const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = prototype_type.index, + .body = function_body, + .scope = function_scope_allocation.index, + }); + break :blk .{ .function = function_allocation.index, }; }, + .function_prototype => blk: { + const function_prototype_index = try analyzer.functionPrototype(scope_index, node_index); + const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); + + break :blk switch (function_prototype.attributes.@"extern") { + true => b: { + const prototype_type = try analyzer.module.types.append(analyzer.allocator, .{ + .function = function_prototype_index, + }); + const function_allocation = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = prototype_type.index, + .body = Block.Index.invalid, + .scope = Scope.Index.invalid, + }); + break :b .{ + .function = function_allocation.index, + }; + }, + false => unreachable, + }; + }, .simple_while => unreachable, .block_zero, .block_one => blk: { const block_index = try analyzer.block(scope_index, expect_type, node_index); @@ -392,126 +813,321 @@ const Analyzer = struct { }; }, .number_literal => switch (std.zig.parseNumberLiteral(analyzer.numberBytes(scope_index, node.token))) { - .int => |integer| blk: { - assert(expect_type != .none); - const int_type = switch (expect_type) { - .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ - .bit_count = flexible_integer_type.byte_count << 3, - .signedness = .unsigned, + .int => |integer| .{ + .integer = .{ + .value = integer, + .type = switch (expect_type) { + .none => Type.comptime_int, + .flexible_integer, .type_index => Type.Integer.getIndex(switch (expect_type) { + .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ + .bit_count = flexible_integer_type.byte_count << 3, + .signedness = .unsigned, + }, + .type_index => |type_index| a: { + const type_info = analyzer.module.types.get(type_index); + break :a switch (type_info.*) { + .integer => |int| int, + else => |t| @panic(@tagName(t)), + }; + }, + else => unreachable, + }), }, - .type_index => |type_index| a: { - const type_info = analyzer.module.types.get(type_index); - break :a switch (type_info.*) { - .integer => |int| int, - else => |t| @panic(@tagName(t)), - }; - }, - else => |t| @panic(@tagName(t)), - }; - break :blk .{ - .integer = .{ - .value = integer, - .type = int_type, - }, - }; + .signedness = .unsigned, + }, }, else => |t| @panic(@tagName(t)), }, - .call_one => blk: { - const this_value_node_index = node.left; - const this_value_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, this_value_node_index); - const value_type = switch (this_value_allocation.ptr.*) { - .function => |function_index| analyzer.module.function_prototypes.get(analyzer.module.functions.get(function_index).prototype).return_type, - else => |t| @panic(@tagName(t)), - }; - - const call_allocation = try analyzer.module.calls.append(analyzer.allocator, .{ - .value = this_value_allocation.index, - .arguments = ArgumentList.Index.invalid, - .type = value_type, - }); - break :blk .{ - .call = call_allocation.index, - }; + .call, .call_one, .call_two => .{ + .call = try analyzer.processCall(scope_index, node_index), }, .field_access => blk: { - const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + logln(.sema, .node, "left alocation...\n", .{}); const identifier = analyzer.tokenIdentifier(scope_index, node.right.value); - _ = identifier; + logln(.sema, .node, "Field access identifier for RHS: \"{s}\"\n", .{identifier}); + analyzer.debugNode(scope_index, node_index); + const left_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, node.left); + switch (left_allocation.ptr.*) { .type => |type_index| { - const left_type = analyzer.module.types.get(type_index); - switch (left_type.*) { - .@"struct" => |struct_index| { - const struct_type = analyzer.module.structs.get(struct_index); - const right_index = try analyzer.doIdentifier(struct_type.scope, ExpectType.none, node.right.value, scope_index); - const right_value = analyzer.module.values.get(right_index); - switch (right_value.*) { - .function => break :blk right_value.*, - else => unreachable, - } - print("Right: {}\n", .{right_value}); - // struct_scope.declarations.get(identifier); + if (!type_index.invalid) { + const left_type = analyzer.module.types.get(type_index); + switch (left_type.*) { + .@"struct" => |struct_index| { + const struct_type = analyzer.module.structs.get(struct_index); + const right_index = try analyzer.doIdentifier(struct_type.scope, ExpectType.none, node.right.value, scope_index); + const right_value = analyzer.module.values.get(right_index); + switch (right_value.*) { + .function, .type, .enum_field => break :blk right_value.*, + .declaration_reference => |declaration_reference| { + const declaration = analyzer.module.declarations.get(declaration_reference.value); + const declaration_name = analyzer.module.getName(declaration.name).?; + logln(.sema, .node, "Decl ref: {s}\n", .{declaration_name}); + logln(.sema, .node, "TODO: maybe this should not be runtime", .{}); + unreachable; + }, + else => |t| @panic(@tagName(t)), + } + logln(.sema, .node, "Right: {}\n", .{right_value}); + // struct_scope.declarations.get(identifier); - unreachable; + unreachable; + }, + .@"enum" => |enum_index| { + const enum_type = analyzer.module.enums.get(enum_index); + const identifier_hash = try analyzer.processIdentifier(identifier); + + const result = for (enum_type.fields.items) |enum_field_index| { + const enum_field = analyzer.module.enum_fields.get(enum_field_index); + if (enum_field.name == identifier_hash) { + break enum_field_index; + } + } else { + @panic("No enum found"); + }; + const enum_field = analyzer.module.enum_fields.get(result); + const enum_field_name = analyzer.module.getName(enum_field.name).?; + logln(.sema, .node, "Enum field name resolution: {s}\n", .{enum_field_name}); + break :blk .{ + .enum_field = result, + }; + }, + else => |t| @panic(@tagName(t)), + } + unreachable; + } else { + panic("Identifier \"{s}\" not found. Type empty", .{identifier}); + } + }, + .declaration_reference => |declaration_reference| { + switch (left_allocation.ptr.*) { + .declaration_reference => |reference| { + const declaration = analyzer.module.declarations.get(reference.value); + const declaration_type_index = declaration.type; + const declaration_type = analyzer.module.types.get(declaration_type_index); + switch (declaration_type.*) { + .slice => unreachable, + else => |t| @panic(@tagName(t)), + } }, else => |t| @panic(@tagName(t)), } + _ = declaration_reference; unreachable; }, else => |t| @panic(@tagName(t)), } unreachable; }, + .string_literal => .{ + .string_literal = try analyzer.processStringLiteral(scope_index, node_index), + }, + .@"switch" => try analyzer.processSwitch(scope_index, node_index), + .enum_type => blk: { + const list_node = analyzer.getScopeNode(scope_index, node.left); + const field_node_list = switch (list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, list_node), + else => |t| @panic(@tagName(t)), + }; + + var field_list = try ArrayList(Enum.Field.Index).initCapacity(analyzer.allocator, field_node_list.items.len); + const enum_allocation = try analyzer.module.enums.addOne(analyzer.allocator); + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .@"enum" = enum_allocation.index, + }); + + for (field_node_list.items) |field_node_index| { + const field_node = analyzer.getScopeNode(scope_index, field_node_index); + const identifier = analyzer.tokenIdentifier(scope_index, field_node.token); + logln(.sema, .node, "Enum field: {s}\n", .{identifier}); + assert(field_node.left.invalid); + + const enum_hash_name = try analyzer.processIdentifier(identifier); + + const enum_field_allocation = try analyzer.module.enum_fields.append(analyzer.allocator, .{ + .name = enum_hash_name, + .value = Value.Index.invalid, + .parent = enum_allocation.index, + }); + + field_list.appendAssumeCapacity(enum_field_allocation.index); + } + + enum_allocation.ptr.* = .{ + .scope = Scope.Index.invalid, + .fields = field_list, + .type = type_allocation.index, + }; + + break :blk .{ + .type = type_allocation.index, + }; + }, + .assign => try analyzer.processAssignment(scope_index, node_index), + .signed_integer_type, .unsigned_integer_type => .{ + .type = try analyzer.resolveType(scope_index, node_index), + }, + .@"return" => try analyzer.processReturn(scope_index, expect_type, node_index), else => |t| @panic(@tagName(t)), }; } + fn debugNode(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) void { + const node = analyzer.getScopeNode(scope_index, node_index); + const source_file = analyzer.getScopeSourceFile(scope_index); + const token = analyzer.getScopeToken(scope_index, node.token); + logln(.sema, .node, "Debugging node {s}:\n\n```\n{s}\n```\n", .{ @tagName(node.id), source_file[token.start..] }); + } + + fn processStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !u32 { + const string_literal_node = analyzer.getScopeNode(scope_index, node_index); + assert(string_literal_node.id == .string_literal); + const string_literal = analyzer.tokenStringLiteral(scope_index, string_literal_node.token); + const string_key = try analyzer.module.addStringLiteral(analyzer.allocator, string_literal); + return string_key; + } + fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index { const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); return function_prototype.return_type; } - fn functionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Function.Prototype.Index { - const node = analyzer.getNode(scope_index, node_index); - switch (node.id) { - .simple_function_prototype => { - const arguments: ?[]const Field.Index = blk: { - if (node.left.get() == null) break :blk null; - const argument_node = analyzer.getNode(scope_index, node.left); - switch (argument_node.id) { - else => |t| @panic(@tagName(t)), - } + fn resolveType(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Type.Index { + const type_node = analyzer.getScopeNode(scope_index, node_index); + const type_index: Type.Index = switch (type_node.id) { + .identifier => blk: { + const token = analyzer.getScopeToken(scope_index, type_node.token); + const source_file = analyzer.getScopeSourceFile(scope_index); + const identifier = tokenBytes(token, source_file); + logln(.sema, .type, "Identifier: \"{s}\"", .{identifier}); + const resolved_value_index = try analyzer.doIdentifier(scope_index, ExpectType.type, type_node.token, scope_index); + const resolved_value = analyzer.module.values.get(resolved_value_index); + break :blk switch (resolved_value.*) { + .type => |type_index| type_index, + else => |t| @panic(@tagName(t)), }; - const return_type_node = analyzer.getNode(scope_index, node.right); - const return_type: Type.Index = switch (return_type_node.id) { - .identifier => { - unreachable; + }, + .keyword_noreturn => Type.noreturn, + inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { + const bit_count: u16 = @intCast(type_node.left.value); + logln(.sema, .type, "Bit count: {}", .{bit_count}); + break :blk switch (bit_count) { + inline 8, 16, 32, 64 => |hardware_bit_count| Type.Integer.getIndex(.{ + .bit_count = hardware_bit_count, + .signedness = switch (int_type_signedness) { + .signed_integer_type => .signed, + .unsigned_integer_type => .unsigned, + else => @compileError("OOO"), + }, + }), + else => unreachable, + }; + }, + .many_pointer_type => blk: { + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .pointer = .{ + .element_type = try resolveType(analyzer, scope_index, type_node.left), + .many = true, + .@"const" = switch (analyzer.getScopeToken(scope_index, type_node.token + 3).id) { + .fixed_keyword_const => true, + .fixed_keyword_var => false, + else => |t| @panic(@tagName(t)), + }, }, - .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, - inline .signed_integer_type, .unsigned_integer_type => |int_type_signedness| blk: { - const bit_count: u16 = @intCast(return_type_node.left.value); - print("Bit count: {}\n", .{bit_count}); - break :blk switch (bit_count) { - inline 8, 16, 32, 64 => |hardware_bit_count| Type.Index{ - .block = 0, - .index = @ctz(hardware_bit_count) - @ctz(@as(u8, 8)) + switch (int_type_signedness) { - .signed_integer_type => HardwareSignedIntegerType, - .unsigned_integer_type => HardwareUnsignedIntegerType, - else => unreachable, - }.offset, - }, - else => unreachable, - }; + }); + break :blk type_allocation.index; + }, + .slice_type => blk: { + const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ + .slice = .{ + .element_type = try resolveType(analyzer, scope_index, type_node.right), }, + }); + break :blk type_allocation.index; + }, + .void_type => Type.void, + .ssize_type => Type.ssize, + .usize_type => Type.usize, + else => |t| @panic(@tagName(t)), + }; + return type_index; + } + + fn processSimpleFunctionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, simple_function_prototype_node_index: Node.Index) !Function.Prototype { + const simple_function_prototype_node = analyzer.getScopeNode(scope_index, simple_function_prototype_node_index); + assert(simple_function_prototype_node.id == .simple_function_prototype); + const arguments_node_index = simple_function_prototype_node.left; + const return_type_node_index = simple_function_prototype_node.right; + + const arguments: ?[]const Declaration.Index = switch (arguments_node_index.invalid) { + true => null, + false => blk: { + const argument_list_node = analyzer.getScopeNode(scope_index, arguments_node_index); + // logln("Function prototype argument list node: {}\n", .{function_prototype_node.left.uniqueInteger()}); + const argument_node_list = switch (argument_list_node.id) { + .node_list => analyzer.getScopeNodeList(scope_index, argument_list_node), else => |t| @panic(@tagName(t)), }; - const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, .{ - .arguments = arguments, - .return_type = return_type, - }); + assert(argument_node_list.items.len > 0); + if (argument_node_list.items.len > 0) { + var arguments = try ArrayList(Declaration.Index).initCapacity(analyzer.allocator, argument_node_list.items.len); + const scope = analyzer.module.scopes.get(scope_index); + _ = scope; + for (argument_node_list.items, 0..) |argument_node_index, index| { + const argument_node = analyzer.getScopeNode(scope_index, argument_node_index); + switch (argument_node.id) { + .argument_declaration => { + const argument_type = try analyzer.resolveType(scope_index, argument_node.left); + const argument_declaration = try analyzer.declarationCommon(scope_index, .local, .@"const", argument_node.token, argument_type, Value.Index.invalid, @intCast(index)); + arguments.appendAssumeCapacity(argument_declaration); + }, + else => |t| @panic(@tagName(t)), + } + } + + break :blk arguments.items; + } else { + break :blk null; + } + }, + }; + + const return_type = try analyzer.resolveType(scope_index, return_type_node_index); + + return .{ + .arguments = arguments, + .return_type = return_type, + }; + } + + fn functionPrototype(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index) !Function.Prototype.Index { + const function_prototype_node = analyzer.getScopeNode(scope_index, node_index); + switch (function_prototype_node.id) { + .simple_function_prototype => { + const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, try analyzer.processSimpleFunctionPrototype(scope_index, node_index)); + + return function_prototype_allocation.index; + }, + .function_prototype => { + var function_prototype = try analyzer.processSimpleFunctionPrototype(scope_index, function_prototype_node.left); + const function_prototype_attribute_list_node = analyzer.getScopeNode(scope_index, function_prototype_node.right); + const attribute_node_list = analyzer.getScopeNodeList(scope_index, function_prototype_attribute_list_node); + var calling_convention: ?Compilation.CallingConvention = null; + + for (attribute_node_list.items) |attribute_node_index| { + const attribute_node = analyzer.getScopeNode(scope_index, attribute_node_index); + + switch (attribute_node.id) { + .extern_qualifier => function_prototype.attributes.@"extern" = true, + else => |t| @panic(@tagName(t)), + } + } + + function_prototype.attributes.calling_convention = calling_convention orelse Compilation.CallingConvention.system_v; + + const function_prototype_allocation = try analyzer.module.function_prototypes.append(analyzer.allocator, function_prototype); return function_prototype_allocation.index; }, else => |t| @panic(@tagName(t)), @@ -533,6 +1149,17 @@ const Analyzer = struct { node_buffer[1] = node.right; break :blk &node_buffer; }, + .main => blk: { + const node_list_node = analyzer.getFileNode(file_index, node.left); + const node_list = switch (node_list_node.id) { + .node_list => analyzer.getFileNodeList(file_index, node_list_node), + else => |t| @panic(@tagName(t)), + }; + break :blk node_list.items; + // const node_list = file.syntactic_analyzer_result.node_lists.items[node.left.unwrap()]; + // break :blk node_list.items; + }, + .main_zero => &.{}, else => |t| @panic(@tagName(t)), }; @@ -544,7 +1171,7 @@ const Analyzer = struct { const scope = new_scope.ptr; const scope_index = new_scope.index; - const is_file = !parent_scope_index.valid; + const is_file = parent_scope_index.invalid; assert(is_file); const struct_allocation = try analyzer.module.structs.append(analyzer.allocator, .{ @@ -553,6 +1180,11 @@ const Analyzer = struct { const type_allocation = try analyzer.module.types.append(analyzer.allocator, .{ .@"struct" = struct_allocation.index, }); + + if (parent_scope_index.invalid) { + file.type = type_allocation.index; + } + scope.type = type_allocation.index; value.* = .{ .type = type_allocation.index, @@ -564,7 +1196,7 @@ const Analyzer = struct { declarations: u32 = 0, } = .{}; for (nodes) |member_index| { - const member = analyzer.getNode(scope_index, member_index); + const member = analyzer.getFileNode(file_index, member_index); const member_type = getContainerMemberType(member.id); switch (member_type) { @@ -579,7 +1211,7 @@ const Analyzer = struct { var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); for (nodes) |member_index| { - const member = analyzer.getNode(scope_index, member_index); + const member = analyzer.getFileNode(file_index, member_index); const member_type = getContainerMemberType(member.id); const array_list = switch (member_type) { .declaration => &declaration_nodes, @@ -589,26 +1221,26 @@ const Analyzer = struct { } for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + const declaration_node = analyzer.getFileNode(file_index, declaration_node_index); switch (declaration_node.id) { .@"comptime" => {}, - .simple_variable_declaration => _ = try analyzer.symbolDeclaration(scope_index, declaration_node_index, .global), + .simple_symbol_declaration => _ = try analyzer.symbolDeclaration(scope_index, declaration_node_index, .global), else => unreachable, } } // TODO: consider iterating over scope declarations instead? for (declaration_nodes.items) |declaration_node_index| { - const declaration_node = analyzer.getNode(scope_index, declaration_node_index); + const declaration_node = analyzer.getFileNode(file_index, declaration_node_index); switch (declaration_node.id) { .@"comptime" => _ = try analyzer.comptimeBlock(scope_index, declaration_node_index), - .simple_variable_declaration => {}, + .simple_symbol_declaration => {}, else => |t| @panic(@tagName(t)), } } for (field_nodes.items) |field_index| { - const field_node = analyzer.getNode(scope_index, field_index); + const field_node = analyzer.getFileNode(file_index, field_index); _ = field_node; @panic("TODO: fields"); @@ -620,55 +1252,78 @@ const Analyzer = struct { } } + fn declarationCommon(analyzer: *Analyzer, scope_index: Scope.Index, scope_type: ScopeType, mutability: Compilation.Mutability, identifier_token: Token.Index, type_index: Type.Index, init_value: Value.Index, argument_index: ?u32) !Declaration.Index { + const identifier = analyzer.tokenIdentifier(scope_index, identifier_token); + const identifier_index = try analyzer.processIdentifier(identifier); + + if (analyzer.lookupDeclarationInCurrentAndParentScopes(scope_index, identifier_index)) |lookup| { + const declaration_name = analyzer.tokenIdentifier(lookup.scope, identifier_token); + panic("Existing name in lookup: {s}", .{declaration_name}); + } + + // Check if the symbol name is already occupied in the same scope + const scope = analyzer.module.scopes.get(scope_index); + const declaration_allocation = try analyzer.module.declarations.append(analyzer.allocator, .{ + .name = identifier_index, + .scope_type = scope_type, + .mutability = mutability, + .init_value = init_value, + .type = type_index, + .argument_index = argument_index, + }); + + try scope.declarations.put(analyzer.allocator, identifier_index, declaration_allocation.index); + + return declaration_allocation.index; + } + fn symbolDeclaration(analyzer: *Analyzer, scope_index: Scope.Index, node_index: Node.Index, scope_type: ScopeType) !Declaration.Index { - const declaration_node = analyzer.getNode(scope_index, node_index); - assert(declaration_node.id == .simple_variable_declaration); - assert(!declaration_node.left.valid); - const mutability: Compilation.Mutability = switch (analyzer.getToken(scope_index, declaration_node.token).id) { + const declaration_node = analyzer.getScopeNode(scope_index, node_index); + assert(declaration_node.id == .simple_symbol_declaration); + const expect_type = switch (declaration_node.left.invalid) { + false => switch (scope_type) { + .local => ExpectType{ + .type_index = try analyzer.resolveType(scope_index, declaration_node.left), + }, + .global => ExpectType.none, + }, + true => ExpectType.none, + }; + const mutability: Compilation.Mutability = switch (analyzer.getScopeToken(scope_index, declaration_node.token).id) { .fixed_keyword_const => .@"const", .fixed_keyword_var => .@"var", else => |t| @panic(@tagName(t)), }; const expected_identifier_token_index = declaration_node.token + 1; - const expected_identifier_token = analyzer.getToken(scope_index, expected_identifier_token_index); + const expected_identifier_token = analyzer.getScopeToken(scope_index, expected_identifier_token_index); if (expected_identifier_token.id != .identifier) { - print("Error: found: {}", .{expected_identifier_token.id}); + logln(.sema, .symbol_declaration, "Error: found: {}", .{expected_identifier_token.id}); @panic("Expected identifier"); } // TODO: Check if it is a keyword - const identifier_index = try analyzer.identifierFromToken(scope_index, expected_identifier_token_index); + assert(!declaration_node.right.invalid); - const declaration_name = analyzer.tokenIdentifier(scope_index, expected_identifier_token_index); - // Check if the symbol name is already occupied in the same scope - const scope = analyzer.module.scopes.get(scope_index); - const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); - if (scope_lookup.found_existing) { - std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); - } - - // Check if the symbol name is already occupied in parent scopes - var upper_scope_index = scope.parent; - - while (upper_scope_index.valid) { - @panic("TODO: upper scope"); - } - assert(declaration_node.right.valid); - - const declaration_allocation = try analyzer.module.declarations.append(analyzer.allocator, .{ - .name = declaration_name, - .scope_type = scope_type, - .mutability = mutability, - .init_value = (try analyzer.module.values.append(analyzer.allocator, .{ + const argument = null; + assert(argument == null); + const init_value_allocation = switch (scope_type) { + .local => try analyzer.unresolvedAllocate(scope_index, expect_type, declaration_node.right), + .global => try analyzer.module.values.append(analyzer.allocator, .{ .unresolved = .{ .node_index = declaration_node.right, }, - })).index, - }); + }), + }; - scope_lookup.value_ptr.* = declaration_allocation.index; + assert(argument == null); + const type_index = switch (scope_type) { + .local => init_value_allocation.ptr.getType(analyzer.module), + .global => Type.Index.invalid, + }; - return declaration_allocation.index; + const result = try analyzer.declarationCommon(scope_index, scope_type, mutability, expected_identifier_token_index, type_index, init_value_allocation.index, argument); + + return result; } const MemberType = enum { @@ -679,28 +1334,19 @@ const Analyzer = struct { fn getContainerMemberType(member_id: Node.Id) MemberType { return switch (member_id) { .@"comptime" => .declaration, - .simple_variable_declaration => .declaration, + .simple_symbol_declaration => .declaration, else => unreachable, }; } - fn identifierFromToken(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) !u32 { - const identifier = analyzer.tokenIdentifier(scope_index, token_index); - const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); - - const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); - - if (lookup_result.found_existing) { - return lookup_result.key_ptr.*; - } else { - return key; - } + fn processIdentifier(analyzer: *Analyzer, string: []const u8) !u32 { + return analyzer.module.addName(analyzer.allocator, string); } fn tokenIdentifier(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .identifier); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); const identifier = tokenBytes(token, source_file); return identifier; @@ -711,18 +1357,18 @@ const Analyzer = struct { } fn numberBytes(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .number_literal); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); const bytes = tokenBytes(token, source_file); return bytes; } fn tokenStringLiteral(analyzer: *Analyzer, scope_index: Scope.Index, token_index: Token.Index) []const u8 { - const token = analyzer.getToken(scope_index, token_index); + const token = analyzer.getScopeToken(scope_index, token_index); assert(token.id == .string_literal); - const source_file = analyzer.getSourceFile(scope_index); + const source_file = analyzer.getScopeSourceFile(scope_index); // Eat double quotes const string_literal = tokenBytes(token, source_file)[1..][0 .. token.len - 2]; @@ -732,6 +1378,87 @@ const Analyzer = struct { fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !Scope.Allocation { return analyzer.module.scopes.append(analyzer.allocator, scope_value); } + + const TypeCheckResult = enum { + success, + zero_extend, + sign_extend, + }; + + fn typeCheck(analyzer: *Analyzer, expect_type: ExpectType, source: Type.Index) !TypeCheckResult { + return switch (expect_type) { + .none => TypeCheckResult.success, + .type_index => |expected_type_index| { + if (expected_type_index.eq(source)) { + return TypeCheckResult.success; + } + + const destination_type = analyzer.module.types.get(expected_type_index); + const source_type = analyzer.module.types.get(source); + + switch (destination_type.*) { + .type => switch (source_type.* == .type) { + true => return TypeCheckResult.success, + false => unreachable, + }, + .integer => |destination_int| switch (source_type.*) { + .integer => |source_int| { + if (destination_int.getSize() < source_int.getSize()) { + @panic("Destination integer type is smaller than sourcE"); + } else if (destination_int.getSize() > source_int.getSize()) { + unreachable; + } else { + unreachable; + } + }, + .comptime_int => return TypeCheckResult.success, + else => |t| @panic(@tagName(t)), + }, + // TODO: type safety + .pointer => |destination_pointer| switch (source_type.*) { + .pointer => |source_pointer| { + switch (source_pointer.many == destination_pointer.many and source_pointer.element_type.eq(destination_pointer.element_type)) { + true => return TypeCheckResult.success, + false => unreachable, + } + }, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + } + }, + .flexible_integer => |expected_flexible_integer| { + const source_type = analyzer.module.types.get(source); + switch (source_type.*) { + .integer => |source_integer| { + const source_size = source_integer.getSize(); + if (expected_flexible_integer.byte_count < source_size) { + unreachable; + } else if (expected_flexible_integer.byte_count > source_size) { + return switch (source_integer.signedness) { + .signed => .sign_extend, + .unsigned => .zero_extend, + }; + } else { + return TypeCheckResult.success; + } + }, + // TODO: add type safety + .pointer => |pointer| { + _ = pointer; + switch (expected_flexible_integer.byte_count == 8) { + true => return TypeCheckResult.success, + false => unreachable, + } + }, + else => |t| @panic(@tagName(t)), + } + // if (expected_flexible_integer.byte_count < + // _ = expected_flexible_integer; + }, + // else => |t| @panic(@tagName(t)), + }; + } }; const ExpectType = union(enum) { @@ -743,7 +1470,11 @@ const ExpectType = union(enum) { .none = {}, }; pub const boolean = ExpectType{ - .type_index = type_boolean, + .type_index = Type.boolean, + }; + + pub const @"type" = ExpectType{ + .type_index = Type.type, }; const FlexibleInteger = struct { @@ -752,123 +1483,17 @@ const ExpectType = union(enum) { }; }; -const type_boolean = Type.Index{ - .block = 0, - .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), -}; - -// Each time an enum is added here, a corresponding insertion in the initialization must be made -const Values = enum { - bool_false, - bool_true, - @"unreachable", - - fn getIndex(value: Values) Value.Index { - const absolute: u32 = @intFromEnum(value); - const foo = @as(Value.Index, undefined); - const ElementT = @TypeOf(@field(foo, "index")); - const BlockT = @TypeOf(@field(foo, "block")); - const divider = std.math.maxInt(ElementT); - const element_index: ElementT = @intCast(absolute % divider); - const block_index: BlockT = @intCast(absolute / divider); - return .{ - .index = element_index, - .block = block_index, - }; - } -}; - -const Intrinsic = enum { - import, - syscall, -}; - -const FixedTypeKeyword = enum { - void, - noreturn, - bool, - - const offset = 0; -}; - -const HardwareUnsignedIntegerType = enum { - u8, - u16, - u32, - u64, - - const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; -}; - -const HardwareSignedIntegerType = enum { - s8, - s16, - s32, - s64, - - const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; -}; - -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, file_index: File.Index) !Type.Index { - _ = file_index; - inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); - } - - inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, .{ - .integer = .{ - .signedness = .unsigned, - .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { - .u8 => 8, - .u16 => 16, - .u32 => 32, - .u64 => 64, - }, - }, - }); - } - - inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { - _ = try module.types.append(compilation.base_allocator, .{ - .integer = .{ - .signedness = .signed, - .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { - .s8 => 8, - .s16 => 16, - .s32 => 32, - .s64 => 64, - }, - }, - }); - } - - _ = try module.values.append(compilation.base_allocator, .{ - .bool = false, - }); - - _ = try module.values.append(compilation.base_allocator, .{ - .bool = true, - }); - - _ = try module.values.append(compilation.base_allocator, .{ - .@"unreachable" = {}, - }); - - const value_allocation = try module.values.append(compilation.base_allocator, .{ - .unresolved = .{ - .node_index = .{ .value = 0 }, - }, - }); - - const result = analyzeExistingPackage(value_allocation.ptr, compilation, module, package); +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_value: *Value) !void { + _ = try analyzeExistingPackage(main_value, compilation, module, package); var decl_iterator = module.declarations.iterator(); while (decl_iterator.nextPointer()) |decl| { - if (equal(u8, decl.name, "_start")) { + const declaration_name = module.getName(decl.name).?; + if (equal(u8, declaration_name, "_start")) { const value = module.values.get(decl.init_value); module.entry_point = switch (value.*) { - .function => |function_index| function_index.uniqueInteger(), + .function => |function_index| function_index, + .unresolved => panic("Unresolved declaration: {s}\n", .{declaration_name}), else => |t| @panic(@tagName(t)), }; break; @@ -876,20 +1501,18 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, } else { @panic("Entry point not found"); } - - return result; } pub fn analyzeExistingPackage(value: *Value, compilation: *Compilation, module: *Module, package: *Package) !Type.Index { const package_import = try module.importPackage(compilation.base_allocator, package); assert(!package_import.file.is_new); - const package_file = package_import.file.ptr; const file_index = package_import.file.index; - return try analyzeFile(value, compilation.base_allocator, module, package_file, file_index); + return try analyzeFile(value, compilation.base_allocator, module, file_index); } -pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file: *File, file_index: File.Index) !Type.Index { +pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file_index: File.Index) !Type.Index { + const file = module.files.get(file_index); assert(value.* == .unresolved); assert(file.status == .parsed); @@ -899,12 +1522,6 @@ pub fn analyzeFile(value: *Value, allocator: Allocator, module: *Module, file: * .module = module, }; - var buffer = [2]Node.Index{ - Node.Index.invalid, - Node.Index.invalid, - }; - _ = buffer; - const result = try analyzer.structType(value, Scope.Index.invalid, .{ .value = 0 }, file_index); return result; } diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 5efc621..1d16353 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -2,7 +2,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const equal = std.mem.eql; -const log = std.log; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; @@ -12,6 +11,11 @@ const HashMap = data_structures.HashMap; const lexical_analyzer = @import("lexical_analyzer.zig"); const Token = lexical_analyzer.Token; +const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; +const log = Compilation.log; +const logln = Compilation.logln; + pub const Result = struct { nodes: ArrayList(Node), node_lists: ArrayList(Node.List), @@ -22,6 +26,21 @@ pub const Options = packed struct { is_comptime: bool, }; +pub const Logger = enum { + token_errors, + symbol_declaration, + node_creation, + main_node, + container_members, + block, + assign, + suffix, + precedence, + @"switch", + + pub var bitset = std.EnumSet(Logger).initEmpty(); +}; + // TODO: pack it to be more efficient pub const Node = packed struct(u128) { token: u32, @@ -33,24 +52,24 @@ pub const Node = packed struct(u128) { pub const Index = packed struct(u32) { value: u31, - valid: bool = true, + invalid: bool = false, pub const invalid = Index{ .value = 0, - .valid = false, + .invalid = true, }; pub fn get(index: Index) ?u32 { - return if (index.valid) index.value else null; + return if (index.invvalid) null else index.value; } pub fn unwrap(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return index.value; } pub fn uniqueInteger(index: Index) u32 { - assert(index.valid); + assert(!index.invalid); return index.value; } }; @@ -70,7 +89,7 @@ pub const Node = packed struct(u128) { container_declaration = 6, string_literal = 7, compiler_intrinsic_one = 8, - simple_variable_declaration = 9, + simple_symbol_declaration = 9, assign = 10, @"comptime" = 11, node_list = 12, @@ -96,6 +115,32 @@ pub const Node = packed struct(u128) { main_one = 32, main_two = 33, main_zero = 34, + call_two = 35, + slice_type = 36, + argument_declaration = 37, + compiler_intrinsic = 38, + ssize_type = 39, + usize_type = 40, + void_type = 41, + call = 42, + many_pointer_type = 43, + enum_literal = 44, + address_of = 45, + keyword_false = 46, + compare_equal = 47, + compare_not_equal = 48, + compare_less_than = 49, + compare_greater_than = 50, + compare_less_or_equal = 51, + compare_greater_or_equal = 52, + @"if" = 53, + if_else = 54, + @"switch" = 55, + switch_case = 56, + enum_type = 57, + enum_field = 58, + extern_qualifier = 59, + function_prototype = 60, }; }; @@ -109,52 +154,105 @@ const Analyzer = struct { tokens: []const Token, token_i: u32 = 0, nodes: ArrayList(Node) = .{}, - file: []const u8, + source_file: []const u8, + file_index: File.Index, allocator: Allocator, temporal_node_heap: ArrayList(Node.Index) = .{}, node_lists: ArrayList(Node.List) = .{}, fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { - if (analyzer.tokens[analyzer.token_i].id == token_id) { - const result = analyzer.token_i; + const token_i = analyzer.token_i; + const token = analyzer.tokens[token_i]; + const is_expected_token = token.id == token_id; + if (is_expected_token) { analyzer.token_i += 1; + const result = token_i; return result; } else { + logln(.parser, .token_errors, "Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) }); return error.unexpected_token; } } fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 { const token = analyzer.tokens[token_index]; - return analyzer.file[token.start..][0..token.len]; + return analyzer.source_file[token.start..][0..token.len]; } - fn symbolDeclaration(analyzer: *Analyzer) !Node.Index { + fn symbolDeclaration(analyzer: *Analyzer) anyerror!Node.Index { const first = analyzer.token_i; assert(analyzer.tokens[first].id == .fixed_keyword_var or analyzer.tokens[first].id == .fixed_keyword_const); analyzer.token_i += 1; - _ = try analyzer.expectToken(.identifier); + const declaration_name_token = try analyzer.expectToken(.identifier); + const declaration_name = analyzer.bytes(declaration_name_token); + logln(.parser, .symbol_declaration, "Starting parsing declaration \"{s}\"", .{declaration_name}); - // TODO: type - _ = try analyzer.expectToken(.equal); + logln(.parser, .symbol_declaration, "Current token: {}", .{analyzer.tokens[analyzer.token_i].id}); - const init_node = try analyzer.expression(); - - _ = try analyzer.expectToken(.semicolon); - - // TODO: - const type_node = Node.Index.invalid; - const declaration = Node{ - .id = .simple_variable_declaration, - .token = first, - .left = type_node, - .right = init_node, + const type_node_index = switch (analyzer.tokens[analyzer.token_i].id) { + .colon => blk: { + analyzer.token_i += 1; + break :blk try analyzer.typeExpression(); + }, + else => Node.Index.invalid, }; - const declaration_init_node = analyzer.nodes.items[init_node.unwrap()]; - std.debug.print("Declaration init node: {}\n", .{declaration_init_node}); + _ = try analyzer.expectToken(.equal); - return analyzer.addNode(declaration); + const init_node_index = try analyzer.expression(); + + const init_node = analyzer.nodes.items[init_node_index.unwrap()]; + switch (init_node.id) { + .function_definition => {}, + else => _ = try analyzer.expectToken(.semicolon), + } + + // TODO: + const declaration = Node{ + .id = .simple_symbol_declaration, + .token = first, + .left = type_node_index, + .right = init_node_index, + }; + + logln(.parser, .symbol_declaration, "Adding declaration \"{s}\" with init node of type: {s}", .{ declaration_name, @tagName(init_node.id) }); + // if (analyzer.token_i < analyzer.tokens.len) { + // const first_token = analyzer.tokens[first]; + // const last_token = analyzer.tokens[analyzer.token_i]; + // const declaration_source_start = first_token.start; + // const declaration_source_end = last_token.start; + // + // logln("[ALL]\n", .{}); + // logln("Source file ({} bytes) :\n```\n{s}\n```\n", .{ analyzer.source_file.len, analyzer.source_file }); + // + // logln("[BEFORE]\n", .{}); + // + // logln("Tokens before the declaration: ", .{}); + // for (analyzer.tokens[0..first]) |t| { + // logln("{s} ", .{@tagName(t.id)}); + // } + // logln("\n", .{}); + // logln("Source before the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[0..analyzer.tokens[first].start]}); + // logln("[DECLARATION]\n", .{}); + // + // logln("First token: {}\n", .{first_token}); + // logln("Last token: {}\n", .{last_token}); + // + // logln("Tokens including declaration ([{}-{}])", .{ first, analyzer.token_i }); + // for (analyzer.tokens[first..][0 .. analyzer.token_i - first]) |t| { + // logln("{s} ", .{@tagName(t.id)}); + // } + // logln("\n", .{}); + // + // logln("Source for the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[declaration_source_start..declaration_source_end]}); + // logln("[AFTER]\n", .{}); + // + // // TODO + // // print("Tokens for file #{}\n", .{analyzer. + // // for (analyzer.tokens[ + // } + + return try analyzer.addNode(declaration); } fn containerMembers(analyzer: *Analyzer) !Members { @@ -163,6 +261,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; + logln(.parser, .container_members, "First token for container member: {s}", .{@tagName(analyzer.tokens[first].id)}); const member_node_index: Node.Index = switch (analyzer.tokens[first].id) { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .left_brace => blk: { @@ -182,6 +281,8 @@ const Analyzer = struct { else => |t| @panic(@tagName(t)), }; + logln(.parser, .container_members, "Container member {s}", .{@tagName(analyzer.nodes.items[member_node_index.unwrap()].id)}); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index); } @@ -197,7 +298,11 @@ const Analyzer = struct { .left = members_array[0], .right = members_array[1], }, - else => |len| std.debug.panic("Len: {}", .{len}), + else => |len| .{ + .len = len, + .left = try analyzer.nodeList(members_array), + .right = Node.Index.invalid, + }, }; return members; @@ -210,13 +315,16 @@ const Analyzer = struct { const function_prototype = try analyzer.functionPrototype(); const is_comptime = false; _ = is_comptime; - const function_body = try analyzer.block(.{ .is_comptime = false }); - return analyzer.addNode(.{ - .id = .function_definition, - .token = token, - .left = function_prototype, - .right = function_body, - }); + return switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => try analyzer.addNode(.{ + .id = .function_definition, + .token = token, + .left = function_prototype, + .right = try analyzer.block(.{ .is_comptime = false }), + }), + .semicolon => function_prototype, + else => |t| @panic(@tagName(t)), + }; } fn functionPrototype(analyzer: *Analyzer) !Node.Index { @@ -225,12 +333,46 @@ const Analyzer = struct { const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); const return_type = try analyzer.typeExpression(); - return analyzer.addNode(.{ + const simple_function_prototype = try analyzer.addNode(.{ .id = .simple_function_prototype, .token = token, .left = arguments, .right = return_type, }); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon, .left_brace => simple_function_prototype, + else => blk: { + var list = Node.List{}; + while (true) { + const attribute = switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon, .left_brace => break, + .fixed_keyword_extern => b: { + const result = try analyzer.addNode(.{ + .id = .extern_qualifier, + .token = analyzer.token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + analyzer.token_i += 1; + break :b result; + }, + else => b: { + if (true) unreachable; + break :b undefined; + }, + }; + try list.append(analyzer.allocator, attribute); + } + + break :blk try analyzer.addNode(.{ + .id = .function_prototype, + .token = token, + .left = simple_function_prototype, + .right = try analyzer.nodeList(list.items), + }); + }, + }; } fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { @@ -240,14 +382,32 @@ const Analyzer = struct { var list = ArrayList(Node.Index){}; + var foo = false; while (analyzer.tokens[analyzer.token_i].id != end_token) { - @panic("TODO: argument list"); + const identifier = try analyzer.expectToken(.identifier); + _ = try analyzer.expectToken(.colon); + const type_expression = try analyzer.typeExpression(); + // const type_expression_node = analyzer.nodes.items[type_expression.unwrap()]; + // _ = type_expression_node; + // logln("Type expression node: {}\n", .{type_expression_node}); + foo = true; + + if (analyzer.tokens[analyzer.token_i].id == .comma) { + analyzer.token_i += 1; + } + + try list.append(analyzer.allocator, try analyzer.addNode(.{ + .id = .argument_declaration, + .token = identifier, + .left = type_expression, + .right = Node.Index.invalid, + })); } _ = try analyzer.expectToken(end_token); if (list.items.len != 0) { - @panic("TODO: arguments"); + return try analyzer.nodeList(list.items); } else { return Node.Index.invalid; } @@ -266,6 +426,7 @@ const Analyzer = struct { while (analyzer.tokens[analyzer.token_i].id != .right_brace) { const first_statement_token = analyzer.tokens[analyzer.token_i]; + logln(.parser, .block, "First statement token: {s}\n", .{@tagName(first_statement_token.id)}); const statement_index = switch (first_statement_token.id) { .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { .colon => { @@ -274,11 +435,17 @@ const Analyzer = struct { else => try analyzer.assignExpressionStatement(), }, .fixed_keyword_unreachable, .fixed_keyword_return => try analyzer.assignExpressionStatement(), - .fixed_keyword_while => try analyzer.whileStatement(options), + + .fixed_keyword_while => try analyzer.whileExpression(options), + .fixed_keyword_switch => try analyzer.switchExpression(), + .fixed_keyword_if => try analyzer.ifExpression(), .fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(), else => |t| @panic(@tagName(t)), }; + const node = analyzer.nodes.items[statement_index.unwrap()]; + logln(.parser, .block, "Adding statement: {s}\n", .{@tagName(node.id)}); + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); } @@ -323,10 +490,11 @@ const Analyzer = struct { .right = Node.Index.invalid, }, }; + return analyzer.addNode(node); } - fn whileStatement(analyzer: *Analyzer, options: Options) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + fn whileExpression(analyzer: *Analyzer, options: Options) anyerror!Node.Index { const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); _ = try analyzer.expectToken(.left_parenthesis); @@ -344,10 +512,117 @@ const Analyzer = struct { }); } + fn switchExpression(analyzer: *Analyzer) anyerror!Node.Index { + logln(.parser, .@"switch", "Parsing switch...\n", .{}); + const switch_token = analyzer.token_i; + analyzer.token_i += 1; + _ = try analyzer.expectToken(.left_parenthesis); + const switch_expression = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + logln(.parser, .@"switch", "Parsed switch expression...\n", .{}); + _ = try analyzer.expectToken(.left_brace); + + var list = Node.List{}; + + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const case_token = analyzer.token_i; + logln(.parser, .@"switch", "Parsing switch case...\n", .{}); + const case_node = switch (analyzer.tokens[case_token].id) { + .fixed_keyword_else => blk: { + analyzer.token_i += 1; + break :blk Node.Index.invalid; + }, + else => blk: { + var array_list = Node.List{}; + while (true) { + try array_list.append(analyzer.allocator, try analyzer.expression()); + switch (analyzer.tokens[analyzer.token_i].id) { + .comma => analyzer.token_i += 1, + .equal => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .greater => break, + else => {}, + }, + else => {}, + } + } + + break :blk switch (array_list.items.len) { + 0 => unreachable, + 1 => array_list.items[0], + else => try analyzer.nodeList(array_list.items), + }; + }, + }; + _ = try analyzer.expectToken(.equal); + _ = try analyzer.expectToken(.greater); + const is_left_brace = analyzer.tokens[analyzer.token_i].id == .left_brace; + const expr = switch (is_left_brace) { + true => try analyzer.block(.{ + .is_comptime = false, + }), + false => try analyzer.assignExpression(), + }; + + _ = try analyzer.expectToken(.comma); + + const node = try analyzer.addNode(.{ + .id = .switch_case, + .token = case_token, + .left = case_node, + .right = expr, + }); + + try list.append(analyzer.allocator, node); + } + + _ = try analyzer.expectToken(.right_brace); + + return try analyzer.addNode(.{ + .id = .@"switch", + .token = switch_token, + .left = switch_expression, + .right = try analyzer.nodeList(list.items), + }); + } + + fn ifExpression(analyzer: *Analyzer) anyerror!Node.Index { + const if_token = analyzer.token_i; + analyzer.token_i += 1; + + _ = try analyzer.expectToken(.left_parenthesis); + const if_expression = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + + const if_block = try analyzer.block(.{ .is_comptime = false }); + + const if_node = try analyzer.addNode(.{ + .id = .@"if", + .token = if_token, + .left = if_expression, + .right = if_block, + }); + + const result = switch (analyzer.tokens[analyzer.token_i].id) { + .fixed_keyword_else => blk: { + analyzer.token_i += 1; + + break :blk try analyzer.addNode(.{ + .id = .if_else, + .token = if_token, + .left = if_node, + .right = try analyzer.expression(), + }); + }, + else => if_node, + }; + + return result; + } + fn assignExpression(analyzer: *Analyzer) !Node.Index { const expr = try analyzer.expression(); const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { - .semicolon => return expr, + .semicolon, .comma => return expr, .equal => .assign, else => |t| @panic(@tagName(t)), }; @@ -362,8 +637,9 @@ const Analyzer = struct { .left = expr, .right = try analyzer.expression(), }; - std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); - return analyzer.addNode(node); + + logln(.parser, .assign, "assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); + return try analyzer.addNode(node); } fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { @@ -390,7 +666,7 @@ const Analyzer = struct { const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; - return switch (parameters.len) { + return try switch (parameters.len) { 1 => analyzer.addNode(.{ .id = .compiler_intrinsic_one, .token = hash, @@ -403,59 +679,114 @@ const Analyzer = struct { .left = parameters[0], .right = parameters[1], }), - else => unreachable, + else => analyzer.addNode(.{ + .id = .compiler_intrinsic, + .token = hash, + .left = try analyzer.nodeList(parameters), + .right = Node.Index.invalid, + }), }; } - fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { - return analyzer.expressionPrecedence(0); + fn expression(analyzer: *Analyzer) anyerror!Node.Index { + return try analyzer.expressionPrecedence(0); } fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { var result = try analyzer.prefixExpression(); + if (!result.invalid) { + const prefix_node = analyzer.nodes.items[result.unwrap()]; + logln(.parser, .precedence, "Prefix: {}\n", .{prefix_node.id}); + } var banned_precedence: i32 = -1; while (analyzer.token_i < analyzer.tokens.len) { - const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period => -1, - else => |t| @panic(@tagName(t)), + const token = analyzer.tokens[analyzer.token_i]; + // logln("Looping in expression precedence with token {}\n", .{token}); + const precedence: i32 = switch (token.id) { + .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period, .fixed_keyword_const, .fixed_keyword_var => -1, + .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .equal => 30, + else => unreachable, + }, + else => |t| { + const start = token.start; + logln(.parser, .precedence, "Source file:\n```\n{s}\n```\n", .{analyzer.source_file[start..]}); + @panic(@tagName(t)); + }, }; + logln(.parser, .precedence, "Precedence: {} ({s}) (file #{})\n", .{ precedence, @tagName(token.id), analyzer.file_index.uniqueInteger() }); if (precedence < minimum_precedence) { + logln(.parser, .precedence, "Breaking for minimum_precedence\n", .{}); break; } if (precedence == banned_precedence) { + logln(.parser, .precedence, "Breaking for banned precedence\n", .{}); break; } + const operator_token = analyzer.token_i; + const is_bang_equal = analyzer.tokens[operator_token].id == .bang and analyzer.tokens[operator_token + 1].id == .equal; + analyzer.token_i += @as(u32, 1) + @intFromBool(is_bang_equal); + // TODO: fix this - const node_index = try analyzer.expressionPrecedence(1); - _ = node_index; - unreachable; + const right = try analyzer.expressionPrecedence(precedence + 1); + + const operation_id: Node.Id = switch (is_bang_equal) { + true => .compare_not_equal, + false => switch (analyzer.tokens[operator_token].id) { + else => |t| @panic(@tagName(t)), + }, + }; + + result = try analyzer.addNode(.{ + .id = operation_id, + .token = operator_token, + .left = result, + .right = right, + }); + + const associativity: Associativity = switch (operation_id) { + .compare_equal, .compare_not_equal, .compare_less_than, .compare_greater_than, .compare_less_or_equal, .compare_greater_or_equal => .none, + else => .left, + }; + + if (associativity == .none) { + banned_precedence = precedence; + } } return result; } fn prefixExpression(analyzer: *Analyzer) !Node.Index { - switch (analyzer.tokens[analyzer.token_i].id) { - // .bang => .bool_not, - // .minus => .negation, - // .tilde => .bit_not, - // .minus_percent => .negation_wrap, - // .ampersand => .address_of, - // .keyword_try => .@"try", - // .keyword_await => .@"await", - + const token = analyzer.token_i; + // logln("Prefix...\n", .{}); + const node_id: Node.Id = switch (analyzer.tokens[token].id) { else => |pref| { _ = pref; - return analyzer.primaryExpression(); + return try analyzer.primaryExpression(); }, - } + .at => .address_of, + .bang => switch (analyzer.tokens[token + 1].id) { + .equal => return try analyzer.primaryExpression(), + else => unreachable, + }, + .minus, .tilde => |t| @panic(@tagName(t)), + }; - return error.not_implemented; + return try analyzer.addNode(.{ + .id = node_id, + .token = blk: { + analyzer.token_i += 1; + break :blk token; + }, + .left = try analyzer.prefixExpression(), + .right = Node.Index.invalid, + }); } fn primaryExpression(analyzer: *Analyzer) !Node.Index { @@ -464,8 +795,8 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable => try analyzer.curlySuffixExpression(), - .fixed_keyword_fn => analyzer.function(), + .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable, .fixed_keyword_switch, .period, .fixed_keyword_enum, .keyword_signed_integer, .keyword_unsigned_integer => try analyzer.curlySuffixExpression(), + .fixed_keyword_fn => try analyzer.function(), .fixed_keyword_return => try analyzer.addNode(.{ .id = .@"return", .token = blk: { @@ -477,11 +808,8 @@ const Analyzer = struct { .right = Node.Index.invalid, }), // todo:? - // .left_brace => try analyzer.block(), - else => |id| { - log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); - unreachable; - }, + .left_brace => try analyzer.block(.{ .is_comptime = false }), + else => |id| std.debug.panic("WARN: By default, calling curlySuffixExpression with {s}", .{@tagName(id)}), }; return result; @@ -500,7 +828,7 @@ const Analyzer = struct { const token_i = analyzer.token_i; assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); analyzer.token_i += 1; - return analyzer.addNode(.{ + return try analyzer.addNode(.{ .id = .keyword_noreturn, .token = token_i, .left = Node.Index.invalid, @@ -508,12 +836,15 @@ const Analyzer = struct { }); } - fn boolTrue(analyzer: *Analyzer) !Node.Index { + fn boolLiteral(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; - assert(analyzer.tokens[token_i].id == .fixed_keyword_true); analyzer.token_i += 1; - return analyzer.addNode(.{ - .id = .keyword_true, + return try analyzer.addNode(.{ + .id = switch (analyzer.tokens[token_i].id) { + .fixed_keyword_true => .keyword_true, + .fixed_keyword_false => .keyword_false, + else => unreachable, + }, .token = token_i, .left = Node.Index.invalid, .right = Node.Index.invalid, @@ -521,9 +852,54 @@ const Analyzer = struct { } fn typeExpression(analyzer: *Analyzer) !Node.Index { - return switch (analyzer.tokens[analyzer.token_i].id) { - .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash, .string_literal, .number_literal, .fixed_keyword_unreachable, .keyword_unsigned_integer, .keyword_signed_integer => try analyzer.errorUnionExpression(), - else => |id| @panic(@tagName(id)), + const first = analyzer.token_i; + return switch (analyzer.tokens[first].id) { + else => try analyzer.errorUnionExpression(), + .at => unreachable, // pointer + .bang => unreachable, // error + .left_bracket => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .at => { + // many item pointer + analyzer.token_i += 2; + _ = try analyzer.expectToken(.right_bracket); + + const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const; + analyzer.token_i += @intFromBool(is_const); + + const pointer_element_type = try analyzer.typeExpression(); + + return try analyzer.addNode(.{ + .id = .many_pointer_type, + .token = first, + .left = pointer_element_type, + .right = Node.Index.invalid, + }); + }, + else => { + const left_bracket = analyzer.token_i; + analyzer.token_i += 1; + // TODO: compute length + const length_expression = false; + _ = try analyzer.expectToken(.right_bracket); + + // Slice + if (!length_expression) { + // TODO: modifiers + const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const; + analyzer.token_i += @intFromBool(is_const); + + const slice_type = try analyzer.typeExpression(); + return try analyzer.addNode(.{ + .id = .slice_type, + .token = left_bracket, + .left = Node.Index.invalid, + .right = slice_type, + }); + } else { + unreachable; + } + }, + }, }; } @@ -531,7 +907,10 @@ const Analyzer = struct { const suffix_expression = try analyzer.suffixExpression(); return switch (analyzer.tokens[analyzer.token_i].id) { - .bang => unreachable, + .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .equal => suffix_expression, + else => unreachable, + }, else => suffix_expression, }; } @@ -541,7 +920,7 @@ const Analyzer = struct { while (true) { const suffix_operator = try analyzer.suffixOperator(result); - if (suffix_operator.valid) { + if (!suffix_operator.invalid) { result = suffix_operator; } else { if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { @@ -550,26 +929,43 @@ const Analyzer = struct { var expression_list = ArrayList(Node.Index){}; while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { - std.debug.print("Loop\n", .{}); + const current_token = analyzer.tokens[analyzer.token_i]; + logln(.parser, .suffix, "Current token: {s}\n", .{@tagName(current_token.id)}); const parameter = try analyzer.expression(); try expression_list.append(analyzer.allocator, parameter); - analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { - .comma, .right_parenthesis => true, + const parameter_node = analyzer.nodes.items[parameter.unwrap()]; + logln(.parser, .suffix, "Paremeter node: {s}\n", .{@tagName(parameter_node.id)}); + const next_token = analyzer.tokens[analyzer.token_i]; + logln(.parser, .suffix, "next token: {s}\n", .{@tagName(next_token.id)}); + analyzer.token_i += @intFromBool(switch (next_token.id) { + .comma => true, .colon, .right_brace, .right_bracket => unreachable, - else => unreachable, + .right_parenthesis => false, + else => |t| @panic(@tagName(t)), }); } _ = try analyzer.expectToken(.right_parenthesis); // const is_comma = analyzer.tokens[analyzer.token_i].id == .comma; - return analyzer.addNode(switch (expression_list.items.len) { + return try analyzer.addNode(switch (expression_list.items.len) { 0 => .{ .id = .call_one, .token = left_parenthesis, .left = result, .right = Node.Index.invalid, }, - else => |len| std.debug.panic("len: {}", .{len}), + 1 => .{ + .id = .call_two, + .token = left_parenthesis, + .left = result, + .right = expression_list.items[0], + }, + else => .{ + .id = .call, + .token = left_parenthesis, + .left = result, + .right = try analyzer.nodeList(expression_list.items), + }, }); } else { return result; @@ -583,7 +979,7 @@ const Analyzer = struct { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; const token = analyzer.tokens[token_i]; - return switch (token.id) { + return try switch (token.id) { .string_literal => blk: { analyzer.token_i += 1; break :blk analyzer.addNode(.{ @@ -606,7 +1002,7 @@ const Analyzer = struct { .colon => unreachable, else => blk: { const identifier = analyzer.bytes(token_i); - // std.debug.print("identifier: {s}\n", .{identifier}); + // logln("identifier: {s}\n", .{identifier}); analyzer.token_i += 1; if (equal(u8, identifier, "_")) { break :blk Node.Index.invalid; @@ -618,9 +1014,9 @@ const Analyzer = struct { }); }, }, - .fixed_keyword_noreturn => try analyzer.noReturn(), - .fixed_keyword_true => try analyzer.boolTrue(), - .fixed_keyword_unreachable => try analyzer.addNode(.{ + .fixed_keyword_noreturn => analyzer.noReturn(), + .fixed_keyword_true, .fixed_keyword_false => analyzer.boolLiteral(), + .fixed_keyword_unreachable => analyzer.addNode(.{ .id = .@"unreachable", .token = blk: { analyzer.token_i += 1; @@ -630,7 +1026,7 @@ const Analyzer = struct { .right = Node.Index.invalid, }), .hash => analyzer.compilerIntrinsic(), - .keyword_unsigned_integer, .keyword_signed_integer => |signedness| try analyzer.addNode(.{ + .keyword_unsigned_integer, .keyword_signed_integer => |signedness| analyzer.addNode(.{ .id = switch (signedness) { .keyword_unsigned_integer => .unsigned_integer_type, .keyword_signed_integer => .signed_integer_type, @@ -640,9 +1036,78 @@ const Analyzer = struct { analyzer.token_i += 1; break :blk token_i; }, - .left = @bitCast(@as(u32, std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10) catch unreachable)), + .left = @bitCast(@as(u32, try std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10))), .right = Node.Index.invalid, }), + .fixed_keyword_usize, .fixed_keyword_ssize => |size_type| analyzer.addNode(.{ + .id = switch (size_type) { + .fixed_keyword_usize => .usize_type, + .fixed_keyword_ssize => .ssize_type, + else => unreachable, + }, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + .fixed_keyword_void => analyzer.addNode(.{ + .id = .void_type, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + .fixed_keyword_switch => try analyzer.switchExpression(), + .period => switch (analyzer.tokens[token_i + 1].id) { + .identifier => try analyzer.addNode(.{ + .id = .enum_literal, + .token = blk: { + analyzer.token_i += 2; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + else => |t| @panic(@tagName(t)), + }, + .fixed_keyword_enum => blk: { + analyzer.token_i += 1; + _ = try analyzer.expectToken(.left_brace); + + var enum_field_list = Node.List{}; + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const enum_name = try analyzer.expectToken(.identifier); + const value_associated = switch (analyzer.tokens[analyzer.token_i].id) { + .comma => comma: { + analyzer.token_i += 1; + break :comma Node.Index.invalid; + }, + else => |t| @panic(@tagName(t)), + }; + + const enum_field_node = try analyzer.addNode(.{ + .id = .enum_field, + .token = enum_name, + .left = value_associated, + .right = Node.Index.invalid, + }); + + try enum_field_list.append(analyzer.allocator, enum_field_node); + } + + analyzer.token_i += 1; + + break :blk try analyzer.addNode(.{ + .id = .enum_type, + .token = token_i, + .left = try analyzer.nodeList(enum_field_list.items), + .right = Node.Index.invalid, + }); + }, else => |foo| { switch (foo) { .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }), @@ -658,7 +1123,7 @@ const Analyzer = struct { return switch (token.id) { .left_bracket => unreachable, .period => switch (analyzer.tokens[analyzer.token_i + 1].id) { - .identifier => analyzer.addNode(.{ + .identifier => try analyzer.addNode(.{ .id = .field_access, .token = blk: { const main_token = analyzer.token_i; @@ -671,7 +1136,7 @@ const Analyzer = struct { const right_token = analyzer.token_i; analyzer.token_i += 1; const result: Node.Index = @bitCast(right_token); - std.debug.print("WARNING: rhs has node index {} but it's token #{}\n", .{ result, right_token }); + logln(.parser, .suffix, "WARNING: rhs has node index {} but it's token #{}\n", .{ result, right_token }); break :blk result; }, }), @@ -684,7 +1149,10 @@ const Analyzer = struct { fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - std.debug.print("Adding node #{} {s}\n", .{ index, @tagName(node.id) }); + logln(.parser, .node_creation, "Adding node #{} (0x{x}) {s} to file #{}\n", .{ index, @intFromPtr(&analyzer.nodes.items[index]), @tagName(node.id), analyzer.file_index.uniqueInteger() }); + // if (node.id == .identifier) { + // logln("Node identifier: {s}\n", .{analyzer.bytes(node.token)}); + // } return Node.Index{ .value = @intCast(index), }; @@ -695,10 +1163,12 @@ const Analyzer = struct { var new_node_list = try ArrayList(Node.Index).initCapacity(analyzer.allocator, input.len); try new_node_list.appendSlice(analyzer.allocator, input); try analyzer.node_lists.append(analyzer.allocator, new_node_list); - - return .{ - .value = @intCast(index), - }; + return try analyzer.addNode(.{ + .id = .node_list, + .token = 0, + .left = .{ .value = @intCast(index) }, + .right = Node.Index.invalid, + }); } }; @@ -708,11 +1178,12 @@ const Members = struct { right: Node.Index, }; -pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { +pub fn analyze(allocator: Allocator, tokens: []const Token, source_file: []const u8, file_index: File.Index) !Result { const start = std.time.Instant.now() catch unreachable; var analyzer = Analyzer{ .tokens = tokens, - .file = file, + .source_file = source_file, + .file_index = file_index, .allocator = allocator, }; const node_index = try analyzer.addNode(.{ @@ -723,12 +1194,14 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R }); assert(node_index.value == 0); - assert(node_index.valid); + assert(!node_index.invalid); + logln(.parser, .main_node, "Start Parsing file root members\n", .{}); const members = try analyzer.containerMembers(); + logln(.parser, .main_node, "End Parsing file root members\n", .{}); switch (members.len) { - 0 => unreachable, + 0 => analyzer.nodes.items[0].id = .main_zero, 1 => { analyzer.nodes.items[0].id = .main_one; analyzer.nodes.items[0].left = members.left; @@ -738,7 +1211,10 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R analyzer.nodes.items[0].left = members.left; analyzer.nodes.items[0].right = members.right; }, - else => unreachable, + else => { + analyzer.nodes.items[0].id = .main; + analyzer.nodes.items[0].left = members.left; + }, } const end = std.time.Instant.now() catch unreachable; @@ -775,3 +1251,8 @@ pub const SymbolDeclaration = struct { initialization_node: Node.Index, mutability_token: Token.Index, }; + +const Associativity = enum { + none, + left, +}; diff --git a/src/main.zig b/src/main.zig index 583297d..89c7c0e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,28 +1,12 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const assert = std.debug.assert; const Compilation = @import("Compilation.zig"); - -pub const seed = std.math.maxInt(u64); -const default_src_file = "src/test/main.nat"; +pub const panic = Compilation.panic; pub fn main() !void { const allocator = std.heap.page_allocator; - const arguments = try std.process.argsAlloc(allocator); - if (arguments.len == 2) { - try singleCompilation(allocator, arguments[1]); - } else { - @panic("Wrong arguments"); - } -} - -fn singleCompilation(allocator: Allocator, main_file_path: []const u8) !void { - const compilation = try Compilation.init(allocator); - - try compilation.compileModule(.{ - .main_package_path = main_file_path, - }); + try Compilation.init(allocator); } test { diff --git a/test/first/main.nat b/test/first/main.nat index 45bfaac..34ec86b 100644 --- a/test/first/main.nat +++ b/test/first/main.nat @@ -1,3 +1,3 @@ const main = fn() s32 { return 0; -}; +} diff --git a/test/hello_world/main.nat b/test/hello_world/main.nat new file mode 100644 index 0000000..595a9f5 --- /dev/null +++ b/test/hello_world/main.nat @@ -0,0 +1,6 @@ +const std = #import("std"); + +const main = fn() s32 { + std.print("Hello world!\n", 13); + return 0; +} diff --git a/test/stack/main.nat b/test/stack/main.nat new file mode 100644 index 0000000..4574781 --- /dev/null +++ b/test/stack/main.nat @@ -0,0 +1,4 @@ +const main = fn() s32 { + var a : s32 = 0; + return a; +}