From c7bcfa1de92c1d96ae63671d3448d9660af9b602 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sun, 1 Oct 2023 14:01:59 -0600 Subject: [PATCH] instruction selection and register allocation --- src/Compilation.zig | 35 +- src/backend/emit.zig | 78 ++-- src/backend/intermediate_representation.zig | 381 +++++++++++++----- src/backend/x86_64.zig | 403 +++++++++++++++++++- src/data_structures.zig | 1 + src/frontend/semantic_analyzer.zig | 46 ++- 6 files changed, 777 insertions(+), 167 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 0c56802..54f89fa 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -71,12 +71,25 @@ pub const Type = union(enum) { void, noreturn, bool, - integer: Integer, + integer: Type.Integer, @"struct": Struct.Index, pub const List = BlockList(@This()); pub const Index = List.Index; pub const Allocation = List.Allocation; + pub const Integer = struct { + bit_count: u16, + signedness: Signedness, + pub const Signedness = enum(u1) { + unsigned = 0, + signed = 1, + }; + + pub fn getSize(integer: Type.Integer) u64 { + return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); + } + }; + pub fn getSize(type_info: Type) u64 { return switch (type_info) { .integer => |integer| integer.getSize(), @@ -92,19 +105,6 @@ pub const Type = union(enum) { } }; -pub const Integer = struct { - bit_count: u16, - signedness: Signedness, - pub const Signedness = enum(u1) { - unsigned = 0, - signed = 1, - }; - - pub fn getSize(integer: Integer) u64 { - return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); - } -}; - /// A scope contains a bunch of declarations pub const Scope = struct { declarations: AutoHashMap(u32, Declaration.Index) = .{}, @@ -253,7 +253,7 @@ pub const Value = union(enum) { runtime: Runtime, assign: Assignment.Index, type: Type.Index, - integer: u64, + integer: Integer, syscall: Syscall.Index, call: Call.Index, argument_list: ArgumentList, @@ -278,6 +278,11 @@ pub const Value = union(enum) { } }; +pub const Integer = struct { + value: u64, + type: Type.Integer, +}; + pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 5299d57..97c4c2e 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -107,16 +107,14 @@ pub fn InstructionSelector(comptime Instruction: type) type { pub const Function = struct { instructions: ArrayList(Instruction) = .{}, - block_byte_counts: ArrayList(u16), - block_offsets: ArrayList(u32), relocations: ArrayList(u32) = .{}, block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, - byte_count: u32 = 0, - block_byte_count: u16 = 0, - pub fn selectInstruction(function: *Function, allocator: Allocator, instruction: Instruction) !void { + pub fn addInstruction(function: *Function, allocator: Allocator, instruction: Instruction) !u32 { + const index = function.instructions.items.len; try function.instructions.append(allocator, instruction); - function.block_byte_count += Instruction.descriptors.get(instruction).size; + + return @intCast(index); } }; @@ -130,45 +128,43 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { else => @compileError("Architecture not supported"), }; const Instruction = backend.Instruction; + _ = Instruction; return struct { pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { var result = try Result.create(); - var function_iterator = intermediate.functions.iterator(); - const IS = InstructionSelector(Instruction); - var instruction_selector = IS{ - .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), - .allocator = allocator, - }; - - while (function_iterator.next()) |ir_function| { - const function = instruction_selector.functions.addOneAssumeCapacity(); - function.* = .{ - .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), - .block_offsets = try ArrayList(u32).initCapacity(allocator, ir_function.blocks.items.len), - }; - try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); - for (ir_function.blocks.items, 0..) |block_index, index| { - function.block_map.putAssumeCapacity(block_index, @intCast(index)); - } - - for (ir_function.blocks.items) |block_index| { - const block = intermediate.blocks.get(block_index); - function.block_offsets.appendAssumeCapacity(function.byte_count); - function.block_byte_count = 0; - for (block.instructions.items) |instruction_index| { - const instruction = intermediate.instructions.get(instruction_index).*; - try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); - } - - function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); - function.byte_count += function.block_byte_count; - } - } - - for (instruction_selector.functions.items) |function| { - for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); - } + var mir = try backend.MIR.generate(allocator, intermediate); + try mir.allocateRegisters(allocator, intermediate); + // var function_iterator = intermediate.functions.iterator(); + // const IS = InstructionSelector(Instruction); + // var instruction_selector = IS{ + // .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), + // .allocator = allocator, + // }; + // + // while (function_iterator.next()) |ir_function| { + // const function = instruction_selector.functions.addOneAssumeCapacity(); + // function.* = .{}; + // try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); + // for (ir_function.blocks.items, 0..) |block_index, index| { + // function.block_map.putAssumeCapacity(block_index, @intCast(index)); + // } + // + // for (ir_function.blocks.items) |block_index| { + // const block = intermediate.blocks.get(block_index); + // for (block.instructions.items) |instruction_index| { + // const instruction = intermediate.instructions.get(instruction_index).*; + // try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); + // } + // + // // function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); + // // function.byte_count += function.block_byte_count; + // } + // } + // + // for (instruction_selector.functions.items) |function| { + // for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); + // } // for (instruction_selector.functions.items) |function| { // var fix_size: bool = false; diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 643609f..0ff9b3e 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -10,6 +10,7 @@ const Package = Compilation.Package; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; const BlockList = data_structures.BlockList; +const AutoArrayHashMap = data_structures.AutoArrayHashMap; const AutoHashMap = data_structures.AutoHashMap; pub const Result = struct { @@ -38,9 +39,13 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, }; while (function_iterator.next()) |sema_function| { - print("\nFunction: {}\n", .{sema_function}); + const function_index = try builder.buildFunction(sema_function); + try builder.optimizeFunction(function_index); + } - try builder.function(sema_function); + var ir_function_iterator = builder.ir.functions.iterator(); + while (ir_function_iterator.nextPointer()) |function| { + print("\n{}\n", .{function}); } return builder.ir; @@ -70,9 +75,10 @@ pub const Instruction = union(enum) { jump: Jump.Index, load: Load.Index, phi: Phi.Index, - ret: Ret, + ret: Value.Index, store: Store.Index, - syscall: Syscall.Index, + syscall: Value.Index, + copy: Value.Index, @"unreachable", pub const List = BlockList(@This()); @@ -88,10 +94,6 @@ const Phi = struct { pub const Index = List.Index; }; -const Ret = struct { - value: Instruction.Index, -}; - pub const Jump = struct { source: BasicBlock.Index, destination: BasicBlock.Index, @@ -114,12 +116,12 @@ const Load = struct { const Store = struct { source: Value.Index, - destination: StackReference.Index, + destination: Value.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; -const StackReference = struct { +pub const StackReference = struct { size: u64, alignment: u64, offset: u64, @@ -127,7 +129,7 @@ const StackReference = struct { pub const Index = List.Index; }; -const Call = struct { +pub const Call = struct { function: Function.Index, pub const List = BlockList(@This()); @@ -136,10 +138,13 @@ const Call = struct { }; pub const Value = union(enum) { - integer: Integer, + integer: Compilation.Integer, load: Load.Index, call: Call.Index, stack_reference: StackReference.Index, + phi: Phi.Index, + instruction: Instruction.Index, + syscall: Syscall.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -149,88 +154,280 @@ pub const Value = union(enum) { .load => true, .call => true, .stack_reference => true, + .phi => unreachable, + .instruction => unreachable, + .syscall => unreachable, }; } }; -const Integer = struct { - value: u64, - sign: bool, -}; - -const Function = struct { +pub const Function = struct { blocks: ArrayList(BasicBlock.Index) = .{}, + stack_map: AutoHashMap(Compilation.Declaration.Index, Value.Index) = .{}, + current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, + return_phi_node: Instruction.Index = Instruction.Index.invalid, + return_phi_block: BasicBlock.Index = BasicBlock.Index.invalid, + ir: *Result, + current_stack_offset: usize = 0, pub const List = BlockList(@This()); pub const Index = List.Index; + + pub fn format(function: *const Function, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try writer.writeAll("Function:\n"); + for (function.blocks.items, 0..) |block_index, function_block_index| { + try writer.print("#{}:\n", .{function_block_index}); + const block = function.ir.blocks.get(block_index); + for (block.instructions.items, 0..) |instruction_index, block_instruction_index| { + try writer.print("%{}: ", .{block_instruction_index}); + const instruction = function.ir.instructions.get(instruction_index).*; + try writer.print("{s}", .{@tagName(instruction)}); + try writer.writeByte('\n'); + } + try writer.writeByte('\n'); + } + _ = options; + _ = fmt; + } }; pub const Builder = struct { allocator: Allocator, ir: Result = .{}, module: *Module, - current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, current_function_index: Function.Index = Function.Index.invalid, - return_phi_node: Instruction.Index = Instruction.Index.invalid, - current_stack_offset: usize = 0, - stack_map: AutoHashMap(Compilation.Declaration.Index, StackReference.Index) = .{}, - fn function(builder: *Builder, sema_function: Compilation.Function) !void { - builder.current_function_index = (try builder.ir.functions.append(builder.allocator, .{})).index; + fn currentFunction(builder: *Builder) *Function { + return builder.ir.functions.get(builder.current_function_index); + } + + fn buildFunction(builder: *Builder, sema_function: Compilation.Function) !Function.Index { + const function_allocation = try builder.ir.functions.append(builder.allocator, .{ + .ir = &builder.ir, + }); + builder.current_function_index = function_allocation.index; + const function = function_allocation.ptr; // TODO: arguments - builder.current_basic_block = try builder.newBlock(); - builder.current_stack_offset = 0; - builder.stack_map = .{}; + function.current_basic_block = try builder.newBlock(); const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); const is_noreturn = return_type.* == .noreturn; if (!is_noreturn) { const exit_block = try builder.newBlock(); - const phi = try builder.ir.phis.addOne(builder.allocator); const phi_instruction = try builder.appendToBlock(exit_block, .{ - .phi = phi.index, + .phi = Phi.Index.invalid, }); - phi.ptr.* = .{ - .value = Value.Index.invalid, - .jump = Jump.Index.invalid, - .block = exit_block, - .next = Phi.Index.invalid, - }; + // phi.ptr.* = .{ + // .value = Value.Index.invalid, + // .jump = Jump.Index.invalid, + // .block = exit_block, + // .next = Phi.Index.invalid, + // }; const ret = try builder.appendToBlock(exit_block, .{ - .ret = .{ - .value = phi_instruction, - }, + .ret = (try builder.ir.values.append(builder.allocator, .{ + .instruction = phi_instruction, + })).index, }); _ = ret; - builder.return_phi_node = phi_instruction; + function.return_phi_node = phi_instruction; + function.return_phi_block = exit_block; } const sema_block = sema_function.getBodyBlock(builder.module); try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); - try builder.dumpFunction(std.io.getStdErr().writer(), builder.current_function_index); + builder.currentFunction().current_stack_offset = std.mem.alignForward(usize, builder.currentFunction().current_stack_offset, 0x10); + + return builder.current_function_index; } - fn dumpFunction(builder: *Builder, writer: anytype, index: Function.Index) !void { - const f = builder.ir.functions.get(index); - try writer.writeAll("Hello world!\n"); - print("Function blocks: {}\n", .{f.blocks.items.len}); - var function_instruction_index: usize = 0; - for (f.blocks.items, 0..) |block_index, function_block_index| { - print("#{}:\n", .{function_block_index}); - const function_block = builder.ir.blocks.get(block_index); - for (function_block.instructions.items) |instruction_index| { - const instruction = builder.ir.instructions.get(instruction_index); - print("%{}: {}\n", .{ function_instruction_index, instruction }); - function_instruction_index += 1; - } + const BlockSearcher = struct { + to_visit: ArrayList(BasicBlock.Index) = .{}, + visited: AutoArrayHashMap(BasicBlock.Index, void) = .{}, + }; - print("\n", .{}); + fn findReachableBlocks(builder: *Builder, first: BasicBlock.Index) ![]const BasicBlock.Index { + var searcher = BlockSearcher{}; + try searcher.to_visit.append(builder.allocator, first); + try searcher.visited.put(builder.allocator, first, {}); + + while (searcher.to_visit.items.len > 0) { + const block_index = searcher.to_visit.swapRemove(0); + const block_to_visit = builder.ir.blocks.get(block_index); + const last_instruction_index = block_to_visit.instructions.items[block_to_visit.instructions.items.len - 1]; + const last_instruction = builder.ir.instructions.get(last_instruction_index); + switch (last_instruction.*) { + .jump => |jump_index| { + const ir_jump = builder.ir.jumps.get(jump_index); + assert(ir_jump.source.eq(block_index)); + const new_block = ir_jump.destination; + if (searcher.visited.get(new_block) == null) { + try searcher.to_visit.append(builder.allocator, new_block); + try searcher.visited.put(builder.allocator, new_block, {}); + } + }, + .@"unreachable", .ret => {}, + else => |t| @panic(@tagName(t)), + } + } + + return searcher.visited.keys(); + } + + fn optimizeFunction(builder: *Builder, function_index: Function.Index) !void { + const function = builder.ir.functions.get(function_index); + const reachable_blocks = try builder.findReachableBlocks(function.blocks.items[0]); + var did_something = true; + + while (did_something) { + did_something = false; + for (reachable_blocks) |basic_block_index| { + const basic_block = builder.ir.blocks.get(basic_block_index); + for (basic_block.instructions.items) |instruction_index| { + did_something = did_something or try builder.removeUnreachablePhis(reachable_blocks, instruction_index); + did_something = did_something or try builder.removeTrivialPhis(instruction_index); + did_something = did_something or try builder.removeCopies(instruction_index); + } + } } } + fn removeUnreachablePhis(builder: *Builder, reachable_blocks: []const BasicBlock.Index, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .phi => blk: { + var did_something = false; + var head = &instruction.phi; + next: while (head.valid) { + const phi = builder.ir.phis.get(head.*); + const phi_jump = builder.ir.jumps.get(phi.jump); + assert(phi_jump.source.valid); + + for (reachable_blocks) |block_index| { + if (phi_jump.source.eq(block_index)) { + head = &phi.next; + continue :next; + } + } + + head.* = phi.next; + did_something = true; + } + + break :blk did_something; + }, + else => false, + }; + } + + fn removeTrivialPhis(builder: *Builder, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .phi => |phi_index| blk: { + const trivial_phi: ?Value.Index = trivial_blk: { + var only_value = Value.Index.invalid; + var it = phi_index; + + while (it.valid) { + const phi = builder.ir.phis.get(it); + const phi_value = builder.ir.values.get(phi.value); + if (phi_value.* == .phi) unreachable; + // TODO: undefined + if (only_value.valid) { + if (!only_value.eq(phi.value)) { + break :trivial_blk null; + } + } else { + only_value = phi.value; + } + + it = phi.next; + } + + break :trivial_blk only_value; + }; + + if (trivial_phi) |trivial_value| { + if (trivial_value.valid) { + // Option to delete + const delete = false; + if (delete) { + unreachable; + } else { + instruction.* = .{ + .copy = trivial_value, + }; + } + } else { + unreachable; + } + } + + break :blk instruction.* != .phi; + }, + else => false, + }; + } + + fn removeCopies(builder: *Builder, instruction_index: Instruction.Index) !bool { + const instruction = builder.ir.instructions.get(instruction_index); + return switch (instruction.*) { + .copy => false, + else => { + var did_something = false; + + const operands: []const *Value.Index = switch (instruction.*) { + .jump, .@"unreachable" => &.{}, + .ret => &.{&instruction.ret}, + // TODO: arguments + .call => blk: { + var list = ArrayList(*Value.Index){}; + break :blk list.items; + }, + .store => |store_index| blk: { + const store_instr = builder.ir.stores.get(store_index); + break :blk &.{ &store_instr.source, &store_instr.destination }; + }, + .syscall => |syscall_value_index| blk: { + const syscall_value = builder.ir.values.get(syscall_value_index); + const syscall = builder.ir.syscalls.get(syscall_value.syscall); + var list = ArrayList(*Value.Index){}; + try list.ensureTotalCapacity(builder.allocator, syscall.arguments.items.len); + for (syscall.arguments.items) |*arg| { + list.appendAssumeCapacity(arg); + } + + break :blk list.items; + }, + else => |t| @panic(@tagName(t)), + }; + + for (operands) |operand_value_index| { + const operand_value = builder.ir.values.get(operand_value_index.*); + switch (operand_value.*) { + .instruction => |operand_instruction_index| { + const operand_instruction = builder.ir.instructions.get(operand_instruction_index); + switch (operand_instruction.*) { + .copy => |copy_value| { + operand_value_index.* = copy_value; + did_something = true; + }, + else => |t| @panic(@tagName(t)), + } + }, + .integer, .stack_reference, .call => {}, + else => |t| @panic(@tagName(t)), + } + } + + return did_something; + }, + }; + } + fn blockInsideBasicBlock(builder: *Builder, sema_block: *Compilation.Block, block_index: BasicBlock.Index) !BasicBlock.Index { - builder.current_basic_block = block_index; + const current_function = builder.currentFunction(); + current_function.current_basic_block = block_index; try builder.block(sema_block, .{}); - return builder.current_basic_block; + return current_function.current_basic_block; } const BlockOptions = packed struct { @@ -253,7 +450,7 @@ pub const Builder = struct { else => |t| @panic(@tagName(t)), }; - const original_block = builder.current_basic_block; + const original_block = builder.currentFunction().current_basic_block; const jump_to_loop = try builder.append(.{ .jump = undefined, }); @@ -271,7 +468,7 @@ pub const Builder = struct { }); const sema_body_block = builder.module.blocks.get(sema_loop_body.block); - builder.current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); + builder.currentFunction().current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); if (loop_prologue_block.valid) { builder.ir.blocks.get(loop_prologue_block).seal(); } @@ -279,20 +476,20 @@ pub const Builder = struct { if (sema_body_block.reaches_end) { _ = try builder.append(.{ .jump = try builder.jump(.{ - .source = builder.current_basic_block, + .source = builder.currentFunction().current_basic_block, .destination = loop_head_block, }), }); } - builder.ir.blocks.get(builder.current_basic_block).filled = true; + builder.ir.blocks.get(builder.currentFunction().current_basic_block).filled = true; builder.ir.blocks.get(loop_body_block).seal(); if (!loop_head_block.eq(loop_body_block)) { unreachable; } if (loop_prologue_block.valid) { - builder.current_basic_block = loop_prologue_block; + builder.currentFunction().current_basic_block = loop_prologue_block; } }, .syscall => |syscall_index| { @@ -307,13 +504,16 @@ pub const Builder = struct { for (sema_syscall.getArguments()) |sema_syscall_argument| { assert(sema_syscall_argument.valid); - const argument_value_index = try builder.emitValue(sema_syscall_argument); + var argument_value_index = try builder.emitValue(sema_syscall_argument); arguments.appendAssumeCapacity(argument_value_index); } + // TODO: undo this mess _ = try builder.append(.{ - .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ - .arguments = arguments, + .syscall = (try builder.ir.values.append(builder.allocator, .{ + .syscall = (try builder.ir.syscalls.append(builder.allocator, .{ + .arguments = arguments, + })).index, })).index, }); }, @@ -323,9 +523,19 @@ pub const Builder = struct { .@"return" => |sema_ret_index| { const sema_ret = builder.module.returns.get(sema_ret_index); const return_value = try builder.emitValue(sema_ret.value); - const phi_instruction = builder.ir.instructions.get(builder.return_phi_node); - const phi = builder.ir.phis.get(phi_instruction.phi); - const exit_jump = try builder.jump(.{ .source = builder.current_basic_block, .destination = phi.block }); + const phi_instruction = builder.ir.instructions.get(builder.currentFunction().return_phi_node); + const phi = switch (phi_instruction.phi.valid) { + true => unreachable, + false => (try builder.ir.phis.append(builder.allocator, std.mem.zeroes(Phi))).ptr, + }; //builder.ir.phis.get(phi_instruction.phi); + const exit_jump = try builder.jump(.{ + .source = builder.currentFunction().current_basic_block, + .destination = switch (phi_instruction.phi.valid) { + true => phi.block, + false => builder.currentFunction().return_phi_block, + }, + }); + print("Previous phi: {}\n", .{phi_instruction.phi}); phi_instruction.phi = (try builder.ir.phis.append(builder.allocator, .{ .value = return_value, .jump = exit_jump, @@ -368,18 +578,20 @@ pub const Builder = struct { } } - fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, value: Compilation.Declaration.Index) !StackReference.Index { + fn stackReference(builder: *Builder, stack_offset: u64, t: Compilation.Type, sema_declaration: Compilation.Declaration.Index) !Value.Index { const stack_reference_allocation = try builder.ir.stack_references.append(builder.allocator, .{ .offset = stack_offset, .size = t.getSize(), .alignment = t.getAlignment(), }); - const index = stack_reference_allocation.index; + const value_allocation = try builder.ir.values.append(builder.allocator, .{ + .stack_reference = stack_reference_allocation.index, + }); - try builder.stack_map.put(builder.allocator, value, index); + try builder.currentFunction().stack_map.put(builder.allocator, sema_declaration, value_allocation.index); - return index; + return value_allocation.index; } fn store(builder: *Builder, descriptor: Store) !void { @@ -390,9 +602,9 @@ pub const Builder = struct { } fn allocateStack(builder: *Builder, size: u64, alignment: u64) u64 { - builder.current_stack_offset = std.mem.alignForward(u64, builder.current_stack_offset, alignment); - builder.current_stack_offset += size; - return builder.current_stack_offset; + builder.currentFunction().current_stack_offset = std.mem.alignForward(u64, builder.currentFunction().current_stack_offset, alignment); + builder.currentFunction().current_stack_offset += size; + return builder.currentFunction().current_stack_offset; } fn load(builder: *Builder, value_index: Value.Index) !Value.Index { @@ -416,10 +628,7 @@ pub const Builder = struct { return switch (sema_value) { // TODO .integer => |integer| (try builder.ir.values.append(builder.allocator, .{ - .integer = .{ - .value = integer, - .sign = false, - }, + .integer = integer, })).index, .call => |sema_call_index| { const sema_call = builder.module.calls.get(sema_call_index); @@ -455,11 +664,8 @@ pub const Builder = struct { _ = init_type; switch (sema_declaration.scope_type) { .local => { - const stack_reference = builder.stack_map.get(sema_declaration_index).?; - const value = try builder.ir.values.append(builder.allocator, .{ - .stack_reference = stack_reference, - }); - return value.index; + const stack_reference = builder.currentFunction().stack_map.get(sema_declaration_index).?; + return stack_reference; }, .global => unreachable, } @@ -484,11 +690,16 @@ pub const Builder = struct { } fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { - assert(builder.current_basic_block.valid); - return builder.appendToBlock(builder.current_basic_block, instruction); + assert(builder.current_function_index.valid); + const current_function = builder.currentFunction(); + assert(current_function.current_basic_block.valid); + return builder.appendToBlock(current_function.current_basic_block, instruction); } fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { + if (instruction == .phi) { + print("Adding phi: {}\n", .{instruction}); + } const instruction_allocation = try builder.ir.instructions.append(builder.allocator, instruction); try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_allocation.index); diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index 1963e19..7e316ab 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -1,10 +1,18 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; const assert = std.debug.assert; const print = std.debug.print; const emit = @import("emit.zig"); const ir = @import("./intermediate_representation.zig"); +const Compilation = @import("../Compilation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const AutoArrayHashMap = data_structures.AutoArrayHashMap; + const InstructionSelector = emit.InstructionSelector(Instruction); +const x86_64 = @This(); const Size = enum(u2) { one = 0, @@ -13,9 +21,372 @@ const Size = enum(u2) { eight = 3, }; +pub const MIR = struct { + functions: ArrayList(Function) = .{}, + const GPRegister = struct { + value: ?x86_64.GPRegister = null, + can_omit_if_present: bool = true, + }; + const Stack = struct { + offset: u64, + }; + const Function = struct { + instructions: ArrayList(MIR.Instruction) = .{}, + blocks: AutoArrayHashMap(ir.BasicBlock.Index, u32) = .{}, + }; + const Instruction = struct { + operands: [4]Operand, + ir: ir.Instruction.Index, + id: Id, + operand_count: u8 = 0, + + pub fn getOperands(instruction: *MIR.Instruction) []Operand { + return instruction.operands[0..instruction.operand_count]; + } + + const Id = enum(u16) { + call, + jmp, + mov, + push, + ret, + sub, + syscall, + ud2, + }; + + fn new(id: Id, reference: ir.Instruction.Index, operands: []const Operand) MIR.Instruction { + var out_operands: [4]Operand = undefined; + @memset(std.mem.asBytes(&out_operands), 0); + @memcpy(out_operands[0..operands.len], operands); + + return .{ + .operands = out_operands, + .ir = reference, + .id = id, + .operand_count = @intCast(operands.len), + }; + } + + const Operand = union(enum) { + gp_register: MIR.GPRegister, + fp_register, + memory, + relative: union(enum) { + block: ir.BasicBlock.Index, + function: ir.Function.Index, + }, + immediate: Compilation.Integer, + stack: Stack, + }; + }; + + const RegisterUse = union(enum) { + general, + ret, + param: x86_64.GPRegister, + syscall_param: x86_64.GPRegister, + }; + + fn movRegImm(function: *Function, allocator: Allocator, integer: Compilation.Integer, instruction_index: ir.Instruction.Index, use: RegisterUse) !void { + if (integer.type.bit_count <= @bitSizeOf(u64)) { + switch (integer.type.signedness) { + .signed, .unsigned => { + if (integer.value <= std.math.maxInt(u32)) { + try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ + .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => .a, + .param => unreachable, + .syscall_param => |register| register, + }, + }, + }, + .{ .immediate = integer }, + })); + } else { + unreachable; + } + }, + } + } else { + unreachable; + } + } + + fn movRegStack(function: *Function, allocator: Allocator, use: RegisterUse, stack_reference: ir.StackReference, instruction_index: ir.Instruction.Index) !void { + if (stack_reference.size <= @sizeOf(u64)) { + switch (stack_reference.size) { + @sizeOf(u8) => unreachable, + @sizeOf(u16) => unreachable, + @sizeOf(u32) => { + try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction_index, &.{ + .{ + .gp_register = .{ + .value = switch (use) { + .general => null, + .ret => unreachable, + .param => unreachable, + .syscall_param => |syscall_register| syscall_register, + }, + }, + }, + .{ + .stack = .{ + .offset = stack_reference.offset, + }, + }, + })); + }, + @sizeOf(u64) => unreachable, + else => unreachable, + } + } else { + unreachable; + } + } + + pub fn generate(allocator: Allocator, intermediate: *ir.Result) !MIR { + var mir = MIR{}; + try mir.functions.ensureTotalCapacity(allocator, intermediate.functions.len); + var ir_function_it = intermediate.functions.iterator(); + + while (ir_function_it.nextPointer()) |ir_function| { + const function = mir.functions.addOneAssumeCapacity(); + function.* = .{}; + try function.blocks.ensureTotalCapacity(allocator, ir_function.blocks.items.len); + for (ir_function.blocks.items) |block_index| { + function.blocks.putAssumeCapacity(block_index, @intCast(function.instructions.items.len)); + const basic_block = intermediate.blocks.get(block_index); + + if (ir_function.current_stack_offset > 0) { + // TODO: switch on ABI + try function.instructions.append(allocator, MIR.Instruction.new(.push, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .bp } }, + })); + + try function.instructions.append(allocator, MIR.Instruction.new(.mov, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .bp } }, + .{ .gp_register = .{ .value = .sp } }, + })); + + try function.instructions.append(allocator, MIR.Instruction.new(.sub, ir.Instruction.Index.invalid, &.{ + .{ .gp_register = .{ .value = .sp } }, + .{ + .immediate = Compilation.Integer{ + .value = ir_function.current_stack_offset, + .type = .{ + .bit_count = 8, + .signedness = .unsigned, + }, + }, + }, + })); + } + + for (basic_block.instructions.items) |instruction_index| { + const instruction = intermediate.instructions.get(instruction_index); + switch (instruction.*) { + .jump => |jump_index| { + const jump = intermediate.jumps.get(jump_index); + try function.instructions.append(allocator, MIR.Instruction.new(.jmp, instruction_index, &.{ + .{ .relative = .{ .block = jump.destination } }, + })); + }, + .copy => |copy_value_index| { + const copy_value = intermediate.values.get(copy_value_index); + switch (copy_value.*) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .general), + else => |t| @panic(@tagName(t)), + } + }, + .ret => |ret_value_index| { + const ret_value = intermediate.values.get(ret_value_index); + switch (ret_value.*) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .ret), + else => |t| @panic(@tagName(t)), + } + + if (ir_function.current_stack_offset > 0) { + unreachable; + } + + try function.instructions.append(allocator, MIR.Instruction.new(.ret, instruction_index, &.{})); + }, + .call => |call_value_index| { + // TODO: args + const call = intermediate.calls.get(call_value_index); + try function.instructions.append(allocator, MIR.Instruction.new(.call, instruction_index, &.{ + .{ .relative = .{ .function = call.function } }, + })); + }, + .store => |store_index| { + const store = intermediate.stores.get(store_index); + const source_value = intermediate.values.get(store.source); + const destination_value = intermediate.values.get(store.destination); + switch (destination_value.*) { + .stack_reference => |stack_reference_index| { + const stack_reference = intermediate.stack_references.get(stack_reference_index); + print("stack ref: {}\n", .{stack_reference}); + switch (source_value.*) { + .call => |call_index| { + try storeFunctionCallResult(allocator, function, intermediate, instruction_index, stack_reference.*, call_index); + }, + else => |t| @panic(@tagName(t)), + } + }, + else => |t| @panic(@tagName(t)), + } + }, + .syscall => |syscall_value_index| { + const syscall_value = intermediate.values.get(syscall_value_index); + const syscall = intermediate.syscalls.get(syscall_value.syscall); + for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { + const argument = intermediate.values.get(argument_index).*; + switch (argument) { + .integer => |integer| try movRegImm(function, allocator, integer, instruction_index, .{ .syscall_param = syscall_register }), + .stack_reference => |stack_reference_index| { + const stack_reference = intermediate.stack_references.get(stack_reference_index); + try movRegStack(function, allocator, .{ .syscall_param = syscall_register }, stack_reference.*, instruction_index); + }, + else => |t| @panic(@tagName(t)), + } + } + + try function.instructions.append(allocator, MIR.Instruction.new(.syscall, instruction_index, &.{})); + }, + .@"unreachable" => try function.instructions.append(allocator, MIR.Instruction.new(.ud2, instruction_index, &.{})), + else => |t| @panic(@tagName(t)), + } + } + } + } + + return mir; + } + + const RegisterAllocator = struct { + gp_registers: RegisterSet(x86_64.GPRegister) = .{}, + + fn init(allocator: Allocator) !RegisterAllocator { + var register_allocator = RegisterAllocator{}; + try register_allocator.gp_registers.free.ensureTotalCapacity(allocator, @typeInfo(x86_64.GPRegister).Enum.fields.len); + inline for (@typeInfo(x86_64.GPRegister).Enum.fields) |enum_field| { + register_allocator.gp_registers.free.putAssumeCapacity(@field(x86_64.GPRegister, enum_field.name), {}); + } + + return register_allocator; + } + }; + + fn RegisterSet(comptime RegisterEnum: type) type { + return struct { + used: AutoArrayHashMap(RegisterEnum, ir.Value.Index) = .{}, + free: AutoArrayHashMap(RegisterEnum, void) = .{}, + + fn allocate(register_set: *@This(), allocator: Allocator, register: RegisterEnum, intermediate: *ir.Result, instruction: MIR.Instruction, value_index: ir.Value.Index) !void { + switch (intermediate.instructions.get(instruction.ir).*) { + .store => {}, + else => { + switch (register_set.free.orderedRemove(register)) { + true => try register_set.used.put(allocator, register, value_index), + false => unreachable, + } + }, + } + } + }; + } + + fn getValueFromInstruction(intermediate: *ir.Result, instruction_index: ir.Instruction.Index) ir.Value.Index { + const instruction = intermediate.instructions.get(instruction_index); + const value_index: ir.Value.Index = switch (instruction.*) { + .copy, .ret, .syscall => |value_index| value_index, + .store => |store_index| blk: { + const store = intermediate.stores.get(store_index); + break :blk store.source; + }, + else => |t| @panic(@tagName(t)), + }; + + return value_index; + } + + pub fn allocateRegisters(mir: *MIR, allocator: Allocator, intermediate: *ir.Result) !void { + for (mir.functions.items) |*function| { + var register_allocator = try RegisterAllocator.init(allocator); + for (function.instructions.items) |*instruction| { + for (instruction.getOperands()) |*operand| { + switch (operand.*) { + .relative, .immediate, .stack => {}, + .gp_register => |gp_register| switch (instruction.ir.valid) { + true => operand.gp_register.value = blk: { + const value_index = getValueFromInstruction(intermediate, instruction.ir); + + if (gp_register.value) |expected_register| { + if (register_allocator.gp_registers.used.get(expected_register)) |allocated_value| { + const allocated = intermediate.values.get(allocated_value); + const value = intermediate.values.get(value_index); + print("\nAllocated: {}.\nValue: {}\n", .{ allocated.*, value.* }); + switch (value_index.eq(allocated_value)) { + true => {}, + false => unreachable, + } + } else { + if (register_allocator.gp_registers.free.get(expected_register)) |_| { + try register_allocator.gp_registers.allocate(allocator, expected_register, intermediate, instruction.*, value_index); + } else { + unreachable; + } + } + + break :blk expected_register; + } else { + for (register_allocator.gp_registers.free.keys()) |register| { + try register_allocator.gp_registers.allocate(allocator, register, intermediate, instruction.*, value_index); + break :blk register; + } else { + unreachable; + } + } + }, + false => {}, + }, + else => |t| @panic(@tagName(t)), + } + } + } + } + } + + fn storeFunctionCallResult(allocator: Allocator, function: *MIR.Function, intermediate: *ir.Result, instruction: ir.Instruction.Index, stack_reference: ir.StackReference, call_index: ir.Call.Index) !void { + _ = call_index; + _ = intermediate; + if (stack_reference.size <= @sizeOf(u64)) { + switch (stack_reference.size) { + @sizeOf(u8) => unreachable, + @sizeOf(u16) => unreachable, + @sizeOf(u32) => try function.instructions.append(allocator, MIR.Instruction.new(.mov, instruction, &.{ + .{ .stack = .{ .offset = stack_reference.offset } }, .{ .gp_register = .{ .value = .a } }, + })), + @sizeOf(u64) => unreachable, + else => unreachable, + } + } else { + unreachable; + } + } +}; + pub fn selectInstruction(instruction_selector: *InstructionSelector, function: *InstructionSelector.Function, intermediate: *ir.Result, instruction: ir.Instruction) !void { switch (instruction) { - .@"unreachable" => try function.instructions.append(instruction_selector.allocator, .{ .ud2 = {} }), + .copy => |copy_value| { + _ = copy_value; + unreachable; + }, + .@"unreachable" => _ = try function.addInstruction(instruction_selector.allocator, .{ .ud2 = {} }), .load => |load_index| { const load = intermediate.loads.get(load_index).*; const load_value = intermediate.values.get(load.value).*; @@ -35,7 +406,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * switch (argument) { .integer => |integer| { if (integer.value == 0) { - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .xor_rm_r = .{ .destination = @enumFromInt(@intFromEnum(syscall_register)), .source = @enumFromInt(@intFromEnum(syscall_register)), @@ -44,7 +415,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * }, }); } else if (integer.value <= std.math.maxInt(u32)) { - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .mov_r_imm = .{ .register_size = .four, .register = @enumFromInt(@intFromEnum(syscall_register)), @@ -59,7 +430,7 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * } } - try function.instructions.append(instruction_selector.allocator, .{ + _ = try function.addInstruction(instruction_selector.allocator, .{ .syscall = {}, }); }, @@ -67,19 +438,15 @@ pub fn selectInstruction(instruction_selector: *InstructionSelector, function: * .ret => unreachable, .jump => |jump_index| { const jump = intermediate.jumps.get(jump_index); - const relocation = Displacement{ - .size = .one, - .source = @intCast(function.block_map.get(jump.source) orelse unreachable), - .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), - .offset_in_block = function.block_byte_count, - }; - _ = relocation; - // const index = function.instructions.items.len; - // try function.relocations.append(instruction_selector.allocator, @intCast(index)); - // try function.instructions.append(instruction_selector.allocator, .{ - // .jmp_rel_8 = relocation, - // }); - unreachable; + const instruction_index = try function.addInstruction(instruction_selector.allocator, .{ + .jmp_rel = Displacement{ + .size = .one, + .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + .instruction_index = @intCast(function.instructions.items.len), + }, + }); + try function.relocations.append(instruction_selector.allocator, instruction_index); }, .call => unreachable, .store => unreachable, @@ -101,10 +468,10 @@ const RegisterMemoryRegister = struct { }; const Displacement = struct { + instruction_index: u16, size: Size, source: u16, destination: u16, - offset_in_block: u16, }; const RmResult = struct { diff --git a/src/data_structures.zig b/src/data_structures.zig index 7afff5d..39b0df9 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -2,6 +2,7 @@ const std = @import("std"); const assert = std.debug.assert; pub const Allocator = std.mem.Allocator; +pub const AutoArrayHashMap = std.AutoArrayHashMapUnmanaged; pub const ArrayList = std.ArrayListUnmanaged; pub const AutoHashMap = std.AutoHashMapUnmanaged; pub const HashMap = std.HashMapUnmanaged; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 289862b..1ad2c3d 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -94,7 +94,6 @@ const Analyzer = struct { } fn block(analyzer: *Analyzer, scope_index: Scope.Index, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { - _ = expect_type; var reaches_end = true; const block_node = analyzer.getNode(scope_index, node_index); var statement_nodes = ArrayList(Node.Index){}; @@ -142,9 +141,9 @@ const Analyzer = struct { }, }; try analyzer.resolveNode(right_value_allocation.ptr, scope_index, ExpectType.none, statement_node.right); - switch (right_value_allocation.ptr.*) { - else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), - } + // switch (right_value_allocation.ptr.*) { + // else => |t| std.debug.print("\n\n\n\n\nASSIGN RIGHT: {s}\n\n\n\n", .{@tagName(t)}), + // } try statements.append(analyzer.allocator, right_value_allocation.index); continue; }, @@ -208,7 +207,7 @@ const Analyzer = struct { .node_index = statement_node.left, }, }; - try analyzer.resolveNode(return_value_allocation.ptr, scope_index, ExpectType.none, statement_node.left); + try analyzer.resolveNode(return_value_allocation.ptr, scope_index, expect_type, statement_node.left); break :ret return_value_allocation.index; }, false => @panic("TODO: ret void"), @@ -299,6 +298,7 @@ const Analyzer = struct { @panic("TODO: compile error"); } }, + else => unreachable, } // TODO @@ -337,7 +337,11 @@ const Analyzer = struct { var argument_nodes = try analyzer.getArguments(scope_index, node_index); print("Argument count: {}\n", .{argument_nodes.items.len}); if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { - const number_allocation = try analyzer.unresolvedAllocate(scope_index, ExpectType.none, argument_nodes.items[0]); + const number_allocation = try analyzer.unresolvedAllocate(scope_index, .{ + .flexible_integer = .{ + .byte_count = 8, + }, + }, argument_nodes.items[0]); const number = number_allocation.index; assert(number.valid); var arguments = std.mem.zeroes([6]Value.Index); @@ -388,8 +392,28 @@ const Analyzer = struct { }; }, .number_literal => switch (std.zig.parseNumberLiteral(analyzer.numberBytes(scope_index, node.token))) { - .int => |integer| .{ - .integer = integer, + .int => |integer| blk: { + assert(expect_type != .none); + const int_type = switch (expect_type) { + .flexible_integer => |flexible_integer_type| Compilation.Type.Integer{ + .bit_count = flexible_integer_type.byte_count << 3, + .signedness = .unsigned, + }, + .type_index => |type_index| a: { + const type_info = analyzer.module.types.get(type_index); + break :a switch (type_info.*) { + .integer => |int| int, + else => |t| @panic(@tagName(t)), + }; + }, + else => |t| @panic(@tagName(t)), + }; + break :blk .{ + .integer = .{ + .value = integer, + .type = int_type, + }, + }; }, else => |t| @panic(@tagName(t)), }, @@ -744,6 +768,7 @@ const Analyzer = struct { const ExpectType = union(enum) { none, type_index: Type.Index, + flexible_integer: FlexibleInteger, pub const none = ExpectType{ .none = {}, @@ -751,6 +776,11 @@ const ExpectType = union(enum) { pub const boolean = ExpectType{ .type_index = type_boolean, }; + + const FlexibleInteger = struct { + byte_count: u8, + sign: ?bool = null, + }; }; const type_boolean = Type.Index{