diff --git a/lib/std/start.nat b/lib/std/start.nat index 199fd13..81360d0 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -2,6 +2,7 @@ comptime { _ = _start; } -const _start = () noreturn { - while (true) {} +const _start = fn () noreturn { + _ = #syscall(231, 0); + unreachable; }; diff --git a/src/Compilation.zig b/src/Compilation.zig index f471fcc..a844add 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -174,18 +174,34 @@ pub const Assignment = struct { pub const Index = List.Index; }; +pub const Syscall = struct { + number: Value.Index, + arguments: [6]Value.Index, + argument_count: u8, + + pub fn getArguments(syscall: Syscall) []const Value.Index { + return syscall.arguments[0..syscall.argument_count]; + } + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + pub const Value = union(enum) { unresolved: Unresolved, declaration: Declaration.Index, void, bool: bool, undefined, + @"unreachable", loop: Loop.Index, function: Function.Index, block: Block.Index, runtime: Runtime, assign: Assignment.Index, type: Type.Index, + integer: u64, + syscall: Syscall.Index, pub const List = BlockList(@This()); pub const Index = List.Index; @@ -221,6 +237,7 @@ pub const Module = struct { blocks: BlockList(Block) = .{}, loops: BlockList(Loop) = .{}, assignments: BlockList(Assignment) = .{}, + syscalls: BlockList(Syscall) = .{}, pub const Descriptor = struct { main_package_path: []const u8, @@ -238,6 +255,7 @@ pub const Module = struct { }; pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult { + print("import: '{s}'\n", .{import_name}); if (equal(u8, import_name, "std")) { return module.importPackage(allocator, module.main_package.dependencies.get("std").?); } diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 768b33d..04a2add 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -19,7 +19,7 @@ const Section = struct { index: usize = 0, }; -const Result = struct { +pub const Result = struct { sections: struct { text: Section, rodata: Section, @@ -60,24 +60,30 @@ const Result = struct { }; } - fn appendCode(image: *Result, code: []const u8) void { + pub fn appendCode(image: *Result, code: []const u8) void { + std.debug.print("New code: ", .{}); + for (code) |byte| { + std.debug.print("0x{x} ", .{byte}); + } + std.debug.print("\n", .{}); const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; @memcpy(destination, code); image.sections.text.index += code.len; } - fn appendCodeByte(image: *Result, code_byte: u8) void { + pub fn appendCodeByte(image: *Result, code_byte: u8) void { + std.debug.print("New code: 0x{x}\n", .{code_byte}); image.sections.text.content[image.sections.text.index] = code_byte; image.sections.text.index += 1; } - fn appendOnlyOpcodeSkipInstructionBytes(image: *Result, instruction: Instruction) void { - const instruction_descriptor = instruction_descriptors.get(instruction); - assert(instruction_descriptor.opcode_byte_count == instruction_descriptor.operand_offset); - image.appendCode(instruction_descriptor.getOpcode()); - - image.sections.text.index += instruction_descriptor.size - instruction_descriptor.opcode_byte_count; - } + // fn appendOnlyOpcodeSkipInstructionBytes(image: *Result, instruction: Instruction) void { + // const instruction_descriptor = instruction_descriptors.get(instruction); + // assert(instruction_descriptor.opcode_byte_count == instruction_descriptor.operand_offset); + // image.appendCode(instruction_descriptor.getOpcode()); + // + // image.sections.text.index += instruction_descriptor.size - instruction_descriptor.opcode_byte_count; + // } fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { comptime { @@ -89,101 +95,45 @@ const Result = struct { } }; -const Instruction = enum { - jmp_rel_8, +pub fn InstructionSelector(comptime Instruction: type) type { + return struct { + functions: ArrayList(Function), + allocator: Allocator, - const Descriptor = struct { - operands: [4]Operand, - operand_count: u3, - operand_offset: u5, - size: u8, - opcode: [2]u8, - opcode_byte_count: u8, + pub const Function = struct { + instructions: ArrayList(Instruction) = .{}, + block_byte_counts: ArrayList(u16), + block_offsets: ArrayList(u32), + relocations: ArrayList(u32) = .{}, + block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, + byte_count: u32 = 0, + block_byte_count: u16 = 0, - fn getOperands(descriptor: Descriptor) []const Operand { - return descriptor.operands[0..descriptor.operand_count]; - } - - fn getOpcode(descriptor: Descriptor) []const u8 { - return descriptor.opcode[0..descriptor.opcode_byte_count]; - } - - fn new(opcode_bytes: []const u8, operands: []const Operand) Descriptor { - // TODO: prefixes - var result = Descriptor{ - .operands = undefined, - .operand_count = @intCast(operands.len), - .operand_offset = opcode_bytes.len, - .size = opcode_bytes.len, - .opcode = undefined, - .opcode_byte_count = opcode_bytes.len, - }; - - for (opcode_bytes, result.opcode[0..opcode_bytes.len]) |opcode_byte, *out_opcode| { - out_opcode.* = opcode_byte; + pub fn selectInstruction(function: *Function, allocator: Allocator, instruction: Instruction) !void { + try function.instructions.append(allocator, instruction); + function.block_byte_count += Instruction.descriptors.get(instruction).size; } - - for (operands, result.operands[0..operands.len]) |operand, *out_operand| { - out_operand.* = operand; - result.size += operand.size; - } - - return result; - } - }; - - const Operand = struct { - type: Type, - size: u8, - - const Type = enum { - rel, }; + + const Selector = @This(); }; -}; - -const rel8 = Instruction.Operand{ - .type = .rel, - .size = @sizeOf(u8), -}; - -const instruction_descriptors = blk: { - var result = std.EnumArray(Instruction, Instruction.Descriptor).initUndefined(); - result.getPtr(.jmp_rel_8).* = Instruction.Descriptor.new(&.{0xeb}, &[_]Instruction.Operand{rel8}); - break :blk result; -}; - -const InstructionSelector = struct { - functions: ArrayList(Function), - const Function = struct { - instructions: ArrayList(Instruction) = .{}, - block_byte_counts: ArrayList(u16), - block_offsets: ArrayList(u32), - byte_count: u32 = 0, - relocations: ArrayList(Relocation) = .{}, - block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, - const Relocation = struct { - instruction: Instruction, - source: u16, - destination: u16, - block_offset: u16, - }; - }; -}; +} pub fn get(comptime arch: std.Target.Cpu.Arch) type { const backend = switch (arch) { .x86_64 => @import("x86_64.zig"), else => @compileError("Architecture not supported"), }; - _ = backend; + const Instruction = backend.Instruction; return struct { pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { var result = try Result.create(); var function_iterator = intermediate.functions.iterator(); - var instruction_selector = InstructionSelector{ - .functions = try ArrayList(InstructionSelector.Function).initCapacity(allocator, intermediate.functions.len), + const IS = InstructionSelector(Instruction); + var instruction_selector = IS{ + .functions = try ArrayList(IS.Function).initCapacity(allocator, intermediate.functions.len), + .allocator = allocator, }; while (function_iterator.next()) |ir_function| { @@ -200,72 +150,55 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { for (ir_function.blocks.items) |block_index| { const block = intermediate.blocks.get(block_index); function.block_offsets.appendAssumeCapacity(function.byte_count); - var block_byte_count: u16 = 0; + function.block_byte_count = 0; for (block.instructions.items) |instruction_index| { const instruction = intermediate.instructions.get(instruction_index).*; - switch (instruction) { - .phi => unreachable, - .ret => unreachable, - .jump => |jump_index| { - const jump = intermediate.jumps.get(jump_index); - const relocation = InstructionSelector.Function.Relocation{ - .instruction = .jmp_rel_8, - .source = @intCast(function.block_map.get(jump.source) orelse unreachable), - .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), - .block_offset = block_byte_count, - }; - try function.relocations.append(allocator, relocation); - block_byte_count += instruction_descriptors.get(.jmp_rel_8).size; - try function.instructions.append(allocator, .jmp_rel_8); - }, - } + try backend.selectInstruction(&instruction_selector, function, intermediate, instruction); } - function.block_byte_counts.appendAssumeCapacity(block_byte_count); - function.byte_count += block_byte_count; + + function.block_byte_counts.appendAssumeCapacity(function.block_byte_count); + function.byte_count += function.block_byte_count; } } for (instruction_selector.functions.items) |function| { - for (function.instructions.items) |instruction| switch (instruction) { - .jmp_rel_8 => result.appendOnlyOpcodeSkipInstructionBytes(instruction), - - // else => unreachable, - }; + for (function.instructions.items) |instruction| backend.emitInstruction(&result, instruction, intermediate); } - for (instruction_selector.functions.items) |function| { - var fix_size: bool = false; - _ = fix_size; - for (function.relocations.items) |relocation| { - std.debug.print("RELOC: {}\n", .{relocation}); - const source_block = relocation.source; - const destination_block = relocation.destination; - const source_offset = function.block_offsets.items[source_block]; - const destination_offset = function.block_offsets.items[destination_block]; - std.debug.print("Source offset: {}. Destination: {}\n", .{ source_offset, destination_offset }); - const instruction_descriptor = instruction_descriptors.get(relocation.instruction); - const instruction_offset = source_offset + relocation.block_offset; - const really_source_offset = instruction_offset + instruction_descriptor.size; - const displacement = @as(i64, destination_offset) - @as(i64, really_source_offset); - - const operands = instruction_descriptor.getOperands(); - switch (operands.len) { - 1 => switch (operands[0].size) { - @sizeOf(u8) => { - if (displacement >= std.math.minInt(i8) and displacement <= std.math.maxInt(i8)) { - const writer_index = instruction_offset + instruction_descriptor.operand_offset; - std.debug.print("Instruction offset: {}. Operand offset: {}. Writer index: {}. displacement: {}\n", .{ instruction_offset, instruction_descriptor.operand_offset, writer_index, displacement }); - result.sections.text.content[writer_index] = @bitCast(@as(i8, @intCast(displacement))); - } else { - unreachable; - } - }, - else => unreachable, - }, - else => unreachable, - } - } - } + // for (instruction_selector.functions.items) |function| { + // var fix_size: bool = false; + // _ = fix_size; + // for (function.relocations.items) |instruction_index| { + // const instruction = function.instructions.items[instruction_index]; + // const relative = instruction.jmp_rel_8; + // const source_block = relative.source; + // const destination_block = relative.destination; + // const source_offset = function.block_offsets.items[source_block]; + // const destination_offset = function.block_offsets.items[destination_block]; + // std.debug.print("Source offset: {}. Destination: {}\n", .{ source_offset, destination_offset }); + // const instruction_descriptor = instruction_descriptors.get(relative.instruction); + // const instruction_offset = source_offset + relative.block_offset; + // const really_source_offset = instruction_offset + instruction_descriptor.size; + // const displacement = @as(i64, destination_offset) - @as(i64, really_source_offset); + // + // const operands = instruction_descriptor.getOperands(); + // switch (operands.len) { + // 1 => switch (operands[0].size) { + // @sizeOf(u8) => { + // if (displacement >= std.math.minInt(i8) and displacement <= std.math.maxInt(i8)) { + // const writer_index = instruction_offset + instruction_descriptor.operand_offset; + // std.debug.print("Instruction offset: {}. Operand offset: {}. Writer index: {}. displacement: {}\n", .{ instruction_offset, instruction_descriptor.operand_offset, writer_index, displacement }); + // result.sections.text.content[writer_index] = @bitCast(@as(i8, @intCast(displacement))); + // } else { + // unreachable; + // } + // }, + // else => unreachable, + // }, + // else => unreachable, + // } + // } + // } const text_section = result.sections.text.content[0..result.sections.text.index]; for (text_section) |byte| { @@ -274,328 +207,3 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { } }; } - -const Rex = enum(u8) { - b = upper_4_bits | (1 << 0), - x = upper_4_bits | (1 << 1), - r = upper_4_bits | (1 << 2), - w = upper_4_bits | (1 << 3), - - const upper_4_bits = 0b100_0000; -}; - -const GPRegister = enum(u4) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, - r8 = 8, - r9 = 9, - r10 = 10, - r11 = 11, - r12 = 12, - r13 = 13, - r14 = 14, - r15 = 15, -}; - -pub const BasicGPRegister = enum(u3) { - a = 0, - c = 1, - d = 2, - b = 3, - sp = 4, - bp = 5, - si = 6, - di = 7, -}; - -const prefix_lock = 0xf0; -const prefix_repne_nz = 0xf2; -const prefix_rep = 0xf3; -const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)}; -const prefix_16_bit_operand = [1]u8{0x66}; - -const jmp_rel_32 = 0xe9; -const ret = 0xc3; -const mov_a_imm = [1]u8{0xb8}; -const mov_reg_imm8: u8 = 0xb0; - -fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { - comptime { - assert(@typeInfo(@TypeOf(integer)) == .Int); - } - - return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer)); -} - -fn movAImm(image: *Result, integer: anytype) void { - const T = @TypeOf(integer); - image.appendCode(&(switch (T) { - u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)}, - u16, i16 => prefix_16_bit_operand ++ mov_a_imm, - u32, i32 => mov_a_imm, - u64, i64 => prefix_rex_w ++ mov_a_imm, - else => @compileError("Unsupported"), - } ++ intToArrayOfBytes(integer))); -} - -test "ret void" { - var image = try Result.create(); - image.appendCodeByte(ret); - - const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) void); - function_pointer(); -} - -const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 }; - -fn getMaxInteger(comptime T: type) T { - comptime { - assert(@typeInfo(T) == .Int); - } - - return switch (@typeInfo(T).Int.signedness) { - .unsigned => std.math.maxInt(T), - .signed => std.math.minInt(T), - }; -} - -test "ret integer" { - inline for (integer_types_to_test) |Int| { - var image = try Result.create(); - const expected_number = getMaxInteger(Int); - - movAImm(&image, expected_number); - image.appendCodeByte(ret); - - const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) Int); - const result = function_pointer(); - try expect(result == expected_number); - } -} - -const LastByte = packed struct(u8) { - dst: BasicGPRegister, - src: BasicGPRegister, - always_on: u2 = 0b11, -}; - -fn movRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { - dstRmSrcR(image, T, .mov, dst, src); -} - -fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { - const last_byte: u8 = @bitCast(LastByte{ - .dst = dst, - .src = src, - }); - const opcode_byte = @intFromEnum(opcode); - - const bytes = switch (T) { - u8, i8 => blk: { - const base = [_]u8{ opcode_byte - 1, last_byte }; - if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) { - image.appendCodeByte(0x40); - } - - break :blk base; - }, - u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte }, - u32, i32 => .{ opcode_byte, last_byte }, - u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte }, - else => @compileError("Not supported"), - }; - - image.appendCode(&bytes); -} - -test "ret integer argument" { - inline for (integer_types_to_test) |Int| { - var image = try Result.create(); - const number = getMaxInteger(Int); - - movRmR(&image, Int, .a, .di); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (Int) callconv(jit_callconv) Int); - const result = functionPointer(number); - try expectEqual(number, result); - } -} - -var r = std.rand.Pcg.init(0xffffffffffffffff); - -fn getRandomNumberRange(comptime T: type, min: T, max: T) T { - const random = r.random(); - return switch (@typeInfo(T).Int.signedness) { - .signed => random.intRangeAtMost(T, min, max), - .unsigned => random.uintAtMost(T, max), - }; -} - -fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { - dstRmSrcR(image, T, .sub, dst, src); -} - -test "ret sub arguments" { - inline for (integer_types_to_test) |Int| { - var image = try Result.create(); - const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); - const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); - - movRmR(&image, Int, .a, .di); - subRmR(&image, Int, .a, .si); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(jit_callconv) Int); - const result = functionPointer(a, b); - try expectEqual(a - b, result); - } -} - -const OpcodeRmR = enum(u8) { - add = 0x01, - @"or" = 0x09, - @"and" = 0x21, - sub = 0x29, - xor = 0x31, - @"test" = 0x85, - mov = 0x89, -}; - -test "test binary operations" { - inline for (integer_types_to_test) |T| { - const test_cases = [_]TestIntegerBinaryOperation(T){ - .{ - .opcode = .add, - .callback = struct { - fn callback(a: T, b: T) T { - return @addWithOverflow(a, b)[0]; - } - }.callback, - }, - .{ - .opcode = .sub, - .callback = struct { - fn callback(a: T, b: T) T { - return @subWithOverflow(a, b)[0]; - } - }.callback, - }, - .{ - .opcode = .@"or", - .callback = struct { - fn callback(a: T, b: T) T { - return a | b; - } - }.callback, - }, - .{ - .opcode = .@"and", - .callback = struct { - fn callback(a: T, b: T) T { - return a & b; - } - }.callback, - }, - .{ - .opcode = .xor, - .callback = struct { - fn callback(a: T, b: T) T { - return a ^ b; - } - }.callback, - }, - }; - - for (test_cases) |test_case| { - try test_case.runTest(); - } - } -} - -fn TestIntegerBinaryOperation(comptime T: type) type { - const should_log = false; - return struct { - callback: *const fn (a: T, b: T) T, - opcode: OpcodeRmR, - - pub fn runTest(test_case: @This()) !void { - for (0..10) |_| { - var image = try Result.create(); - const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); - const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); - movRmR(&image, T, .a, .di); - dstRmSrcR(&image, T, test_case.opcode, .a, .si); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn (T, T) callconv(jit_callconv) T); - const expected = test_case.callback(a, b); - const result = functionPointer(a, b); - if (should_log) { - log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T }); - } - try expectEqual(expected, result); - } - } - }; -} - -test "call after" { - var image = try Result.create(); - const jump_patch_offset = image.sections.text.index + 1; - image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); - const jump_source = image.sections.text.index; - image.appendCodeByte(ret); - const jump_target = image.sections.text.index; - @as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); - functionPointer(); -} - -test "call before" { - var image = try Result.create(); - const first_jump_patch_offset = image.sections.text.index + 1; - const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; - image.appendCode(&first_call); - const first_jump_source = image.sections.text.index; - image.appendCodeByte(ret); - const second_jump_target = image.sections.text.index; - image.appendCodeByte(ret); - const first_jump_target = image.sections.text.index; - @as(*align(1) i32, @ptrCast(&image.sections.text.content[first_jump_patch_offset])).* = @intCast(first_jump_target - first_jump_source); - const second_call = .{0xe8} ++ @as([4]u8, @bitCast(@as(i32, @intCast(@as(i64, @intCast(second_jump_target)) - @as(i64, @intCast(image.sections.text.index + 5)))))); - image.appendCode(&second_call); - image.appendCodeByte(ret); - - const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); - functionPointer(); -} - -pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result { - _ = allocator; - - var image = try Result.create(); - - var entry_point: u32 = 0; - _ = entry_point; - - for (ir_result.functions.items) |*function| { - for (function.instructions.items) |instruction| { - switch (instruction.id) { - .ret_void => { - image.appendCodeByte(ret); - }, - } - } - } - - return image; -} diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 501319f..f96baca 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -16,6 +16,9 @@ pub const Result = struct { blocks: BlockList(BasicBlock) = .{}, instructions: BlockList(Instruction) = .{}, jumps: BlockList(Jump) = .{}, + values: BlockList(Value) = .{}, + syscalls: BlockList(Syscall) = .{}, + loads: BlockList(Load) = .{}, }; pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { @@ -57,10 +60,13 @@ pub const BasicBlock = struct { } }; -const Instruction = union(enum) { +pub const Instruction = union(enum) { jump: Jump.Index, + load: Load.Index, phi: Phi.Index, ret: Ret, + syscall: Syscall.Index, + @"unreachable", pub const List = BlockList(@This()); pub const Index = List.Index; @@ -83,6 +89,38 @@ pub const Jump = struct { pub const Index = List.Index; }; +const Syscall = struct { + arguments: ArrayList(Value.Index), + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Load = struct { + value: Value.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Value = union(enum) { + integer: Integer, + load: Load.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; + + pub fn isInMemory(value: Value) bool { + return switch (value) { + .integer => false, + .load => true, + }; + } +}; + +const Integer = struct { + value: u64, + sign: bool, +}; + const Function = struct { blocks: ArrayList(BasicBlock.Index) = .{}, pub const List = BlockList(@This()); @@ -207,11 +245,66 @@ pub const Builder = struct { builder.current_basic_block = loop_prologue_block; } }, + .syscall => |syscall_index| { + const sema_syscall = builder.module.syscalls.get(syscall_index); + var arguments = try ArrayList(Value.Index).initCapacity(builder.allocator, sema_syscall.argument_count + 1); + + const sema_syscall_number = sema_syscall.number; + assert(sema_syscall_number.valid); + const number_value_index = try builder.emitValue(sema_syscall_number); + + arguments.appendAssumeCapacity(number_value_index); + + for (sema_syscall.getArguments()) |sema_syscall_argument| { + assert(sema_syscall_argument.valid); + const argument_value_index = try builder.emitValue(sema_syscall_argument); + arguments.appendAssumeCapacity(argument_value_index); + } + + _ = try builder.append(.{ + .syscall = try builder.ir.syscalls.append(builder.allocator, .{ + .arguments = arguments, + }), + }); + }, + .@"unreachable" => _ = try builder.append(.{ + .@"unreachable" = {}, + }), else => |t| @panic(@tagName(t)), } } } + fn load(builder: *Builder, value_index: Value.Index) !Value.Index { + print("Doing load!\n", .{}); + + const load_index = try builder.ir.loads.append(builder.allocator, .{ + .value = value_index, + }); + const instruction_index = try builder.append(.{ + .load = load_index, + }); + _ = instruction_index; + const result = try builder.ir.values.append(builder.allocator, .{ + .load = load_index, + }); + return result; + } + + fn emitValue(builder: *Builder, sema_value_index: Compilation.Value.Index) !Value.Index { + const sema_value = builder.module.values.get(sema_value_index).*; + return switch (sema_value) { + // TODO + .integer => |integer| try builder.ir.values.append(builder.allocator, .{ + .integer = .{ + .value = integer, + .sign = false, + }, + }), + else => |t| @panic(@tagName(t)), + }; + } + fn jump(builder: *Builder, jump_descriptor: Jump) !Jump.Index { const destination_block = builder.ir.blocks.get(jump_descriptor.destination); assert(!destination_block.sealed); diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index e69de29..9f473ec 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -0,0 +1,414 @@ +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; +const emit = @import("emit.zig"); +const ir = @import("./intermediate_representation.zig"); + +const InstructionSelector = emit.InstructionSelector(Instruction); + +const Size = enum(u2) { + one = 0, + two = 1, + four = 2, + eight = 3, +}; + +pub fn selectInstruction(instruction_selector: *InstructionSelector, function: *InstructionSelector.Function, intermediate: *ir.Result, instruction: ir.Instruction) !void { + switch (instruction) { + .@"unreachable" => try function.instructions.append(instruction_selector.allocator, .{ .ud2 = {} }), + .load => |load_index| { + const load = intermediate.loads.get(load_index).*; + const load_value = intermediate.values.get(load.value).*; + switch (load_value) { + .integer => |integer| { + _ = integer; + unreachable; + }, + else => |t| @panic(@tagName(t)), + } + unreachable; + }, + .syscall => |syscall_index| { + const syscall = intermediate.syscalls.get(syscall_index); + for (syscall.arguments.items, syscall_registers[0..syscall.arguments.items.len]) |argument_index, syscall_register| { + const argument = intermediate.values.get(argument_index).*; + switch (argument) { + .integer => |integer| { + if (integer.value == 0) { + try function.instructions.append(instruction_selector.allocator, .{ + .xor_rm_r = .{ + .destination = @enumFromInt(@intFromEnum(syscall_register)), + .source = @enumFromInt(@intFromEnum(syscall_register)), + .size = .four, + .direct = true, + }, + }); + } else if (integer.value <= std.math.maxInt(u32)) { + try function.instructions.append(instruction_selector.allocator, .{ + .mov_r_imm = .{ + .register_size = .four, + .register = @enumFromInt(@intFromEnum(syscall_register)), + .immediate = argument_index, + .immediate_size = .four, + }, + }); + // TODO + } else unreachable; + // if (integer.value == 0) { + // try function.instructions.append(instruction_selector.allocator, .{ + // .xor_reg32_reg32 = .{ + // .destination = syscall_register, + // .source = syscall_register, + // }, + // }); + // } else if (integer.value < std.math.maxInt(u32)) { + // try function.instructions.append(instruction_selector.allocator, .{ + // .mov_reg_imm32 = .{ + // .destination = syscall_register, + // .source = @intCast(integer.value), + // }, + // }); + // } else { + // unreachable; + // } + }, + else => |t| @panic(@tagName(t)), + } + } + + try function.instructions.append(instruction_selector.allocator, .{ + .syscall = {}, + }); + }, + .phi => unreachable, + .ret => unreachable, + .jump => |jump_index| { + _ = jump_index; + // const jump = intermediate.jumps.get(jump_index); + // const relocation = LocalRelative{ + // .instruction = .jmp_rel_8, + // .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + // .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + // .offset_in_block = function.block_byte_count, + // }; + // const index = function.instructions.items.len; + // try function.relocations.append(instruction_selector.allocator, @intCast(index)); + // try function.instructions.append(instruction_selector.allocator, .{ + // .jmp_rel_8 = relocation, + // }); + unreachable; + }, + } +} + +const RegisterImmediate = struct { + immediate: ir.Value.Index, + register: GPRegister, + register_size: Size, + immediate_size: Size, +}; + +const RegisterMemoryRegister = struct { + destination: GPRegister, + source: GPRegister, + size: Size, + direct: bool, +}; + +const RmResult = struct { + rex: Rex, + mod_rm: ModRm, +}; + +const RmAndRexArguments = packed struct { + rm: GPRegister, + reg: GPRegister, + direct: bool, + bit64: bool, + sib: bool, +}; + +// fn computeRmAndRex(args: RmAndRexArguments) RmResult { +// _ = register_memory_register; +// const rex_byte = Rex{ +// .b = @intFromEnum(args.rm) > std.math.maxInt(u3), +// .x = args.sib, +// .r = @intFromEnum(args.reg) > std.math.maxInt(u3), +// .w = args.bit64, +// }; +// var rex_byte = std.mem.zeroes(Rex); +// if (@intFromEnum(rm) > std.math.maxInt(u3)) +// } +fn emitImmediate(result: *emit.Result, intermediate: *ir.Result, value_index: ir.Value.Index, size: Size) void { + const value = intermediate.values.get(value_index); + const integer = value.integer.value; + const integer_bytes = switch (size) { + .one => std.mem.asBytes(&@as(u8, @intCast(integer))), + .two => std.mem.asBytes(&@as(u16, @intCast(integer))), + .four => std.mem.asBytes(&@as(u32, @intCast(integer))), + .eight => std.mem.asBytes(&@as(u64, @intCast(integer))), + }; + result.appendCode(integer_bytes); +} + +const ModRm = packed struct(u8) { + rm: u3, + reg: u3, + mod: u2, +}; + +pub fn emitInstruction(result: *emit.Result, instruction: Instruction, intermediate: *ir.Result) void { + switch (instruction) { + inline .xor_rm_r => |register_memory_register, tag| { + const rm = register_memory_register.destination; + const reg = register_memory_register.source; + const rex_byte = Rex{ + .b = @intFromEnum(rm) > std.math.maxInt(u3), + .x = false, //args.sib, + .r = @intFromEnum(reg) > std.math.maxInt(u3), + .w = register_memory_register.size == .eight, + }; + + if (@as(u4, @truncate(@as(u8, @bitCast(rex_byte)))) != 0) { + result.appendCodeByte(@bitCast(rex_byte)); + } + + const modrm = ModRm{ + .rm = @truncate(@intFromEnum(rm)), + .reg = @truncate(@intFromEnum(reg)), + .mod = @as(u2, @intFromBool(register_memory_register.direct)) << 1 | @intFromBool(register_memory_register.direct), + }; + // _ = modrm; + const opcode = tag.getOpcode(&.{ + .{ + .register_memory = .{ + .value = register_memory_register.destination, + .size = register_memory_register.size, + .direct = register_memory_register.direct, + }, + }, + .{ + .register = .{ + .value = register_memory_register.source, + .size = register_memory_register.size, + }, + }, + }); + + result.appendCode(opcode); + result.appendCodeByte(@bitCast(modrm)); + }, + inline .mov_r_imm => |register_immediate, tag| { + const opcode = tag.getOpcode(&.{ + .{ + .register = .{ + .value = register_immediate.register, + .size = register_immediate.register_size, + }, + }, + .{ + .immediate = register_immediate.immediate_size, + }, + }); + assert(opcode.len == 1); + const opcode_byte = opcode[0] | @intFromEnum(register_immediate.register); + result.appendCodeByte(opcode_byte); + emitImmediate(result, intermediate, register_immediate.immediate, register_immediate.immediate_size); + }, + // .jmp_rel_8 => unreachable, //result.appendOnlyOpcodeSkipInstructionBytes(instruction), + // inline .mov_reg_imm32 => |content, tag| { + // _ = tag; + // _ = content; + // // const descriptor = instruction_descriptors.get(tag); + // // result.writeOpcode(descriptor.opcode); + // // result.appendCodeByte(descriptor.getOpcode()[0] | @intFromEnum(content.destination)); + // // result.appendCode(std.mem.asBytes(&content.source)); + // unreachable; + // }, + // inline .xor_reg32_reg32 => |content, tag| { + // _ = tag; + // _ = content; + // // const descriptor = instruction_descriptors.get(tag); + // // result.appendCodeByte(descriptor.getOpcode()[0]); + // // result.appendCodeByte(0xc0 | @as(u8, @intFromEnum(content.source)) << 4 | @intFromEnum(content.destination)); + // unreachable; + // }, + inline .syscall, .ud2 => |_, tag| { + const opcode = tag.getOpcode(&.{}); + result.appendCode(opcode); + }, + // else => unreachable, + } +} + +pub const Instruction = union(Id) { + xor_rm_r: RegisterMemoryRegister, + mov_r_imm: RegisterImmediate, + // jmp_rel_8: LocalRelative, + // mov_reg_imm32: struct { + // destination: GPRegister, + // source: u32, + // }, + // xor_reg32_reg32: struct { + // destination: GPRegister, + // source: GPRegister, + // }, + syscall, + ud2, + + const Id = enum { + xor_rm_r, + mov_r_imm, + // jmp_rel_8, + // mov_reg_imm32, + // xor_reg32_reg32, + syscall, + ud2, + + fn getOpcode(comptime instruction: Instruction.Id, operands: []const Operand) []const u8 { + return switch (instruction) { + .mov_r_imm => switch (operands[0].register.size) { + .one => &.{0xb0}, + .two, .four, .eight => &.{0xb8}, + }, + .syscall => &.{ 0x0f, 0x05 }, + .ud2 => &.{ 0x0f, 0x0b }, + .xor_rm_r => switch (operands[0].register_memory.size) { + .one => &.{0x30}, + .two, .four, .eight => &.{0x31}, + }, + }; + } + }; + + const Operand = union(enum) { + displacement, + register: struct { + value: GPRegister, + size: Size, + }, + // TODO + register_memory: struct { + value: GPRegister, + size: Size, + direct: bool, + }, + immediate: Size, + + const Id = enum { + displacement, + register, + register_memory, + immediate, + }; + }; + + pub const descriptors = blk: { + var result = std.EnumArray(Instruction.Id, Instruction.Descriptor).initUndefined(); + result.getPtr(.jmp_rel_8).* = Instruction.Descriptor.new(&.{0xeb}, &[_]Instruction.Operand{rel8}); + result.getPtr(.mov_reg_imm32).* = Instruction.Descriptor.new(&.{0xb8}, &[_]Instruction.Operand{ reg32, imm32 }); + result.getPtr(.xor_reg_reg).* = Instruction.Descriptor.new(&.{0x31}, &[_]Instruction.Operand{ reg32, reg32 }); + result.getPtr(.syscall).* = Instruction.Descriptor.new(&.{ 0x0f, 0x05 }, &.{}); + result.getPtr(.ud2).* = Instruction.Descriptor.new(&.{ 0x0f, 0x0b }, &.{}); + break :blk result; + }; + + const Descriptor = struct { + operands: [4]Operand, + operand_count: u3, + operand_offset: u5, + size: u8, + opcode: [3]u8, + opcode_byte_count: u8, + + fn getOperands(descriptor: Descriptor) []const Operand { + return descriptor.operands[0..descriptor.operand_count]; + } + + fn new(opcode_bytes: []const u8, operands: []const Operand) Descriptor { + // TODO: prefixes + var result = Descriptor{ + .operands = undefined, + .operand_count = @intCast(operands.len), + .operand_offset = opcode_bytes.len, + .size = opcode_bytes.len, + .opcode = .{ 0, 0 }, + .opcode_byte_count = opcode_bytes.len, + }; + + if (opcode_bytes.len == 1) { + result.opcode[1] = opcode_bytes[0]; + } else for (opcode_bytes, result.opcode[0..opcode_bytes.len]) |opcode_byte, *out_opcode| { + out_opcode.* = opcode_byte; + } + + for (operands, result.operands[0..operands.len]) |operand, *out_operand| { + out_operand.* = operand; + result.size += operand.size; + } + + return result; + } + }; +}; +const LocalRelative = struct { + instruction: Instruction.Id, + source: u16, + destination: u16, + offset_in_block: u16, +}; + +const rel8 = Instruction.Operand{ + .type = .relative, + .size = @sizeOf(u8), +}; + +const reg32 = Instruction.Operand{ + .type = .register, + .size = @sizeOf(u32), +}; + +const imm32 = Instruction.Operand{ + .type = .immediate, + .size = @sizeOf(u32), +}; + +const Rex = packed struct(u8) { + b: bool, + x: bool, + r: bool, + w: bool, + fixed: u4 = 0b0100, +}; + +const GPRegister = enum(u4) { + a = 0, + c = 1, + d = 2, + b = 3, + sp = 4, + bp = 5, + si = 6, + di = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, +}; + +// pub const BasicGPRegister = enum(u3) { +// a = 0, +// c = 1, +// d = 2, +// b = 3, +// sp = 4, +// bp = 5, +// si = 6, +// di = 7, +// }; + +const syscall_registers = [7]GPRegister{ .a, .di, .si, .d, .r10, .r8, .r9 }; diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index a5f26ce..c5745fc 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -20,7 +20,7 @@ pub const Token = packed struct(u64) { pub const Id = enum(u8) { eof = 0x00, identifier = 0x01, - number = 0x02, + number_literal = 0x02, string_literal = 0x03, fixed_keyword_function = 0x04, fixed_keyword_const = 0x05, @@ -32,6 +32,8 @@ pub const Token = packed struct(u64) { fixed_keyword_bool = 0x0b, fixed_keyword_true = 0x0c, fixed_keyword_false = 0x0d, + fixed_keyword_fn = 0x0e, + fixed_keyword_unreachable = 0x0f, bang = '!', // 0x21 hash = '#', // 0x23 dollar_sign = '$', // 0x24 @@ -78,6 +80,8 @@ pub const FixedKeyword = enum { bool, true, false, + @"fn", + @"unreachable", }; pub const Result = struct { @@ -123,7 +127,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), } else .identifier; }, - '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { + '(', ')', '{', '}', '-', '=', ';', '#', '@', ',' => |operator| blk: { index += 1; break :blk @enumFromInt(operator); }, @@ -132,7 +136,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { index += 1; } - break :blk .number; + break :blk .number_literal; }, '\'' => { unreachable; diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 65be4e4..a01deac 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -54,41 +54,9 @@ const Analyzer = struct { } fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Assignment.Index { - print("Assign: #{}", .{node_index.value}); - const node = analyzer.nodes[node_index.unwrap()]; - assert(node.id == .assign); - const Result = struct { - left: Value.Index, - right: Value.Index, - }; - const result: Result = switch (node.left.valid) { - // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` - false => .{ - .left = Value.Index.invalid, - .right = try analyzer.expression(scope, ExpectType.none, node.right), - }, - true => { - const left_node = analyzer.nodes[node.left.unwrap()]; - print("left node index: {}. Left node: {}", .{ node.left, left_node }); - // const id = analyzer.tokenIdentifier(.token); - // print("id: {s}\n", .{id}); - const left = try analyzer.expression(scope, ExpectType.none, node.left); - _ = left; - unreachable; - }, - }; - - print("Assignment: L: {}. R: {}\n", .{ result.left, result.right }); - - if (result.left.valid and analyzer.module.values.get(result.left).isComptime() and analyzer.module.values.get(result.right).isComptime()) { - unreachable; - } else { - const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ - .store = result.left, - .load = result.right, - }); - return assignment_index; - } + _ = node_index; + _ = scope; + _ = analyzer; } fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { @@ -100,15 +68,19 @@ const Analyzer = struct { try statement_nodes.append(analyzer.allocator, block_node.left); }, .block_zero, .comptime_block_zero => {}, + .block_two, .comptime_block_two => { + try statement_nodes.append(analyzer.allocator, block_node.left); + try statement_nodes.append(analyzer.allocator, block_node.right); + }, else => |t| @panic(@tagName(t)), } const is_comptime = switch (block_node.id) { - .comptime_block_zero, .comptime_block_one => true, - .block_zero, .block_one => false, + .comptime_block_zero, .comptime_block_one, .comptime_block_two => true, + .block_zero, .block_one, .block_two => false, else => |t| @panic(@tagName(t)), }; - _ = is_comptime; + print("Is comptime: {}\n", .{is_comptime}); var statements = ArrayList(Value.Index){}; @@ -121,7 +93,37 @@ const Analyzer = struct { const statement_value = switch (statement_node.id) { inline .assign, .simple_while => |statement_id| blk: { const specific_value_index = switch (statement_id) { - .assign => try analyzer.assign(scope, statement_node_index), + .assign => { + print("Assign: #{}\n", .{node_index.value}); + assert(statement_node.id == .assign); + switch (statement_node.left.valid) { + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + false => { + const right = try analyzer.expression(scope, ExpectType.none, statement_node.right); + try statements.append(analyzer.allocator, right); + continue; + }, + true => { + const left_node = analyzer.nodes[statement_node.left.unwrap()]; + print("left node index: {}. Left node: {}\n", .{ statement_node.left, left_node }); + // const id = analyzer.tokenIdentifier(.token); + // print("id: {s}\n", .{id}); + const left = try analyzer.expression(scope, ExpectType.none, statement_node.left); + _ = left; + + // if (analyzer.module.values.get(left).isComptime() and analyzer.module.values.get(right).isComptime()) { + // unreachable; + // } else { + // const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ + // .store = result.left, + // .load = result.right, + // }); + // return assignment_index; + // } + unreachable; + }, + } + }, .simple_while => statement: { const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{ .condition = Value.Index.invalid, @@ -148,6 +150,10 @@ const Analyzer = struct { const value_index = try analyzer.module.values.append(analyzer.allocator, value); break :blk value_index; }, + .@"unreachable" => blk: { + reaches_end = false; + break :blk Values.@"unreachable".getIndex(); + }, else => |t| @panic(@tagName(t)), }; try statements.append(analyzer.allocator, statement_value); @@ -194,16 +200,31 @@ const Analyzer = struct { } } + fn getArguments(analyzer: *Analyzer, node_index: Node.Index) !ArrayList(Node.Index) { + var arguments = ArrayList(Node.Index){}; + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .compiler_intrinsic_two => { + try arguments.append(analyzer.allocator, node.left); + try arguments.append(analyzer.allocator, node.right); + }, + else => |t| @panic(@tagName(t)), + } + + return arguments; + } + fn resolveNode(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Value { const node = analyzer.nodes[node_index.unwrap()]; return switch (node.id) { .identifier => unreachable, - .compiler_intrinsic_one => blk: { + .compiler_intrinsic_one, .compiler_intrinsic_two => blk: { const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; - print("Intrinsic: {s}", .{@tagName(intrinsic)}); + print("Intrinsic: {s}\n", .{@tagName(intrinsic)}); switch (intrinsic) { .import => { + assert(node.id == .compiler_intrinsic_one); const import_argument = analyzer.nodes[node.left.unwrap()]; switch (import_argument.id) { .string_literal => { @@ -212,7 +233,7 @@ const Analyzer = struct { if (imported_file.is_new) { // TODO: fix error - analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file) catch return error.OutOfMemory; + try analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file); } else { unreachable; } @@ -224,6 +245,35 @@ const Analyzer = struct { else => unreachable, } }, + .syscall => { + var argument_nodes = try analyzer.getArguments(node_index); + print("Argument count: {}\n", .{argument_nodes.items.len}); + if (argument_nodes.items.len > 0 and argument_nodes.items.len <= 6 + 1) { + const number = try analyzer.expression(scope, ExpectType.none, argument_nodes.items[0]); + assert(number.valid); + var arguments = std.mem.zeroes([6]Value.Index); + for (argument_nodes.items[1..], 0..) |argument_node_index, argument_index| { + const argument = try analyzer.expression(scope, ExpectType.none, argument_node_index); + print("Index: {}. Argument: {}\n", .{ argument_index, argument }); + arguments[argument_index] = argument; + } + + // TODO: typecheck for usize + for (arguments[0..argument_nodes.items.len]) |argument| { + _ = argument; + } + + break :blk .{ + .syscall = try analyzer.module.syscalls.append(analyzer.allocator, .{ + .number = number, + .arguments = arguments, + .argument_count = @intCast(argument_nodes.items.len - 1), + }), + }; + } else { + unreachable; + } + }, } unreachable; }, @@ -244,13 +294,18 @@ const Analyzer = struct { }, .keyword_true => unreachable, .simple_while => unreachable, - // .assign => try analyzer.assign(scope, node_index), .block_zero, .block_one => blk: { const block_index = try analyzer.block(scope, expect_type, node_index); break :blk .{ .block = block_index, }; }, + .number_literal => switch (std.zig.parseNumberLiteral(analyzer.tokenBytes(analyzer.tokens[node.token]))) { + .int => |integer| .{ + .integer = integer, + }, + else => |t| @panic(@tagName(t)), + }, else => |t| @panic(@tagName(t)), }; } @@ -269,12 +324,9 @@ const Analyzer = struct { }, } - break :blk bool_true; + break :blk Values.getIndex(.bool_true); }, - .block_zero => try analyzer.module.values.append(analyzer.allocator, .{ - .block = try analyzer.block(scope, expect_type, node_index), - }), - else => |t| @panic(@tagName(t)), + else => try analyzer.module.values.append(analyzer.allocator, try analyzer.resolveNode(scope, expect_type, node_index)), }; } @@ -342,58 +394,6 @@ const Analyzer = struct { @panic("TODO: analyzeDeclaration"); } - fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void { - if (symbol_declaration.type_node.get()) |type_node_index| { - _ = type_node_index; - @panic("TODO: type node"); - } - const initialization_node = analyzer.nodes[symbol_declaration.initialization_node.unwrap()]; - switch (initialization_node.id) { - .compiler_intrinsic_one => { - const intrinsic_name = analyzer.tokenIdentifier(initialization_node.token + 1); - const intrinsic = inline for (@typeInfo(Intrinsic).Enum.fields) |intrinsic_enum_field| { - if (equal(u8, intrinsic_name, intrinsic_enum_field.name)) { - break @field(Intrinsic, intrinsic_enum_field.name); - } - } else unreachable; - print("Intrinsic: {s}", .{@tagName(intrinsic)}); - switch (intrinsic) { - .import => { - const import_argument = analyzer.nodes[initialization_node.left.get()]; - switch (import_argument.id) { - .string_literal => unreachable, - else => unreachable, - } - }, - } - // const intrinsic_node_index = initialization_node.left.unwrap(); - // const intrinsic_node = analyzer.nodes[intrinsic_node_index]; - // - // switch (intrinsic_node.id) { - // .string_literal => - // } - // print("intrinsic: {}", .{intrinsic_node.id}); - - // _ = a; - }, - else => unreachable, - } - print("Init node: {}\n", .{initialization_node}); - @panic("TODO"); - } - - fn symbolDeclaration(analyzer: *Analyzer, node_index: Node.Index) SymbolDeclaration { - const node = analyzer.nodes[node_index.unwrap()]; - return switch (node.id) { - .simple_variable_declaration => .{ - .type_node = node.left, - .initialization_node = node.right, - .mutability_token = node.token, - }, - else => unreachable, - }; - } - fn structType(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Type.Index { _ = index; const new_scope = try analyzer.allocateScope(.{ .parent = parent_scope }); @@ -544,18 +544,20 @@ const Analyzer = struct { fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 { const token = analyzer.tokens[token_index]; assert(token.id == .identifier); - const identifier = analyzer.source_code[token.start..][0..token.len]; + const identifier = analyzer.tokenBytes(token); return identifier; } + fn tokenBytes(analyzer: *Analyzer, token: Token) []const u8 { + return analyzer.source_code[token.start..][0..token.len]; + } + fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 { const token = analyzer.tokens[token_index]; assert(token.id == .string_literal); // Eat double quotes - const start = token.start + 1; - const len = token.len - 2; - const string_literal = analyzer.source_code[start..][0..len]; + const string_literal = analyzer.tokenBytes(token)[1..][0 .. token.len - 2]; return string_literal; } @@ -593,18 +595,30 @@ const type_boolean = Type.Index{ .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), }; -const bool_false = Value.Index{ - .block = 0, - .index = 1, -}; +// Each time an enum is added here, a corresponding insertion in the initialization must be made +const Values = enum { + bool_false, + bool_true, + @"unreachable", -const bool_true = Value.Index{ - .block = 0, - .index = 1, + fn getIndex(value: Values) Value.Index { + const absolute: u32 = @intFromEnum(value); + const foo = @as(Value.Index, undefined); + const ElementT = @TypeOf(@field(foo, "index")); + const BlockT = @TypeOf(@field(foo, "block")); + const divider = std.math.maxInt(ElementT); + const element_index: ElementT = @intCast(absolute % divider); + const block_index: BlockT = @intCast(absolute / divider); + return .{ + .index = element_index, + .block = block_index, + }; + } }; const Intrinsic = enum { import, + syscall, }; const FixedTypeKeyword = enum { @@ -674,6 +688,10 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) .bool = true, }); + _ = try module.values.append(compilation.base_allocator, .{ + .@"unreachable" = {}, + }); + return analyzeExistingPackage(compilation, module, package); } diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 36f7fcd..a6b00a7 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -28,6 +28,8 @@ pub const Node = packed struct(u128) { left: Node.Index, right: Node.Index, + pub const List = ArrayList(Node.Index); + pub const Index = packed struct(u32) { value: u31, valid: bool = true, @@ -74,6 +76,11 @@ pub const Node = packed struct(u128) { keyword_true = 18, comptime_block_zero = 19, comptime_block_one = 20, + number_literal = 21, + compiler_intrinsic_two = 22, + comptime_block_two = 23, + block_two = 24, + @"unreachable" = 25, }; }; @@ -90,6 +97,7 @@ const Analyzer = struct { file: []const u8, allocator: Allocator, temporal_node_heap: ArrayList(Node.Index) = .{}, + node_lists: ArrayList(Node.List) = .{}, fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { if (analyzer.tokens[analyzer.token_i].id == token_id) { @@ -136,12 +144,13 @@ const Analyzer = struct { _ = try analyzer.expectToken(.equal); // TODO: do this in a function - const init_node = switch (analyzer.tokens[analyzer.token_i].id) { - .identifier => unreachable, - .hash => try analyzer.compilerIntrinsic(), - .left_parenthesis => try analyzer.function(), - else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), - }; + const init_node = try analyzer.expression(); + // const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + // .identifier => unreachable, + // .hash => try analyzer.compilerIntrinsic(), + // .left_parenthesis => try analyzer.function(), + // else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + // }; _ = try analyzer.expectToken(.semicolon); @@ -186,6 +195,8 @@ const Analyzer = struct { fn function(analyzer: *Analyzer) !Node.Index { const token = analyzer.token_i; + assert(analyzer.tokens[token].id == .fixed_keyword_fn); + analyzer.token_i += 1; const function_prototype = try analyzer.functionPrototype(); const is_comptime = false; _ = is_comptime; @@ -200,6 +211,7 @@ const Analyzer = struct { fn functionPrototype(analyzer: *Analyzer) !Node.Index { const token = analyzer.token_i; + assert(analyzer.tokens[token].id == .left_parenthesis); const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); const return_type = try analyzer.typeExpression(); @@ -231,6 +243,12 @@ const Analyzer = struct { } } + fn assignExpressionStatement(analyzer: *Analyzer) !Node.Index { + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + return result; + } + fn block(analyzer: *Analyzer, options: Options) !Node.Index { const left_brace = try analyzer.expectToken(.left_brace); const node_heap_top = analyzer.temporal_node_heap.items.len; @@ -243,14 +261,9 @@ const Analyzer = struct { .colon => { unreachable; }, - else => blk: { - const identifier = analyzer.getIdentifier(first_statement_token); - std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); - const result = try analyzer.assignExpression(); - _ = try analyzer.expectToken(.semicolon); - break :blk result; - }, + else => try analyzer.assignExpressionStatement(), }, + .fixed_keyword_unreachable => try analyzer.assignExpressionStatement(), .fixed_keyword_while => try analyzer.whileStatement(options), else => unreachable, }; @@ -279,6 +292,15 @@ const Analyzer = struct { .left = statement_array[0], .right = Node.Index.invalid, }, + 2 => .{ + .id = switch (options.is_comptime) { + true => .comptime_block_two, + false => .block_two, + }, + .token = left_brace, + .left = statement_array[0], + .right = statement_array[1], + }, else => |len| std.debug.panic("len: {}", .{len}), }; return analyzer.addNode(node); @@ -355,6 +377,12 @@ const Analyzer = struct { .left = parameters[0], .right = Node.Index.invalid, }), + 2 => analyzer.addNode(.{ + .id = .compiler_intrinsic_two, + .token = hash, + .left = parameters[0], + .right = parameters[1], + }), else => unreachable, }; } @@ -370,7 +398,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis, .right_brace => -1, + .equal, .semicolon, .right_parenthesis, .right_brace, .comma => -1, else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), }; @@ -416,7 +444,8 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(), + .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable => try analyzer.curlySuffixExpression(), + .fixed_keyword_fn => analyzer.function(), // todo:? // .left_brace => try analyzer.block(), else => |id| { @@ -463,7 +492,7 @@ const Analyzer = struct { fn typeExpression(analyzer: *Analyzer) !Node.Index { return switch (analyzer.tokens[analyzer.token_i].id) { - .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false => try analyzer.errorUnionExpression(), + .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash => try analyzer.errorUnionExpression(), else => |id| blk: { log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); @@ -528,6 +557,15 @@ const Analyzer = struct { .right = Node.Index.invalid, }); }, + .number_literal => blk: { + analyzer.token_i += 1; + break :blk analyzer.addNode(.{ + .id = .number_literal, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + }, .identifier => switch (analyzer.tokens[token_i + 1].id) { .colon => unreachable, else => blk: { @@ -546,6 +584,16 @@ const Analyzer = struct { }, .fixed_keyword_noreturn => try analyzer.noReturn(), .fixed_keyword_true => try analyzer.boolTrue(), + .fixed_keyword_unreachable => try analyzer.addNode(.{ + .id = .@"unreachable", + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }), + .hash => analyzer.compilerIntrinsic(), else => |foo| { switch (foo) { .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }),