diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..27965bb --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "cppvsdbg", + "request": "launch", + "name": "Debug", + "program": "${workspaceFolder}/zig-out/bin/compiler.exe", + "args": [], + "cwd": "${workspaceFolder}", + "preLaunchTask": "zig build" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..6d91faf --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,12 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "zig build", + "type": "shell", + "command": "zig build" + } + ] +} \ No newline at end of file diff --git a/src/compiler.zig b/src/compiler.zig new file mode 100644 index 0000000..b7c8214 --- /dev/null +++ b/src/compiler.zig @@ -0,0 +1,21 @@ +const std = @import("std"); + +const Allocator = std.mem.Allocator; + +const data_structures = @import("data_structures.zig"); + +const lexer = @import("lexer.zig"); +const parser = @import("parser.zig"); + +test { + _ = lexer; + _ = parser; +} + +pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void { + const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); + std.debug.print("File:\n\n```\n{s}\n```\n", .{file}); + const lexer_result = try lexer.lex(allocator, file); + const parser_result = try parser.parse(allocator, &lexer_result); + _ = parser_result; +} diff --git a/src/data_structures.zig b/src/data_structures.zig index 7696d38..58fbed7 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,3 +1,4 @@ const std = @import("std"); pub const ArrayList = std.ArrayListUnmanaged; +pub const HashMap = std.AutoHashMap; diff --git a/src/emit.zig b/src/emit.zig index 6be2371..b5d64e3 100644 --- a/src/emit.zig +++ b/src/emit.zig @@ -8,19 +8,12 @@ const expectEqual = std.testing.expectEqual; const ir = @import("ir.zig"); -pub const Result = struct { - pub fn free(result: *Result, allocator: Allocator) void { - _ = allocator; - _ = result; - } -}; - const Section = struct { content: []align(page_size) u8, index: usize = 0, }; -const Image = struct { +const Result = struct { sections: struct { text: Section, rodata: Section, @@ -28,8 +21,8 @@ const Image = struct { }, entry_point: u32 = 0, - fn create() !Image { - return Image{ + fn create() !Result { + return Result{ .sections = .{ .text = .{ .content = try mmap(page_size, .{ .executable = true }) }, .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, @@ -38,33 +31,47 @@ const Image = struct { }; } - fn destroy(image: *Image) void { + fn destroy(image: *Result) void { inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { - std.os.munmap(@field(image.sections, field_name).content); + const section_bytes = @field(image.sections, field_name).content; + switch (@import("builtin").os.tag) { + .linux => std.os.munmap(section_bytes), + .windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE), + else => @compileError("OS not supported"), + } } } - inline fn mmap(size: usize, flags: packed struct { + fn mmap(size: usize, flags: packed struct { executable: bool, }) ![]align(page_size) u8 { - const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; - const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; + return switch (@import("builtin").os.tag) { + .windows => blk: { + const windows = std.os.windows; + break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; + }, + .linux => blk: { + const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; + const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; - return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); + break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); + }, + else => @compileError("OS not supported"), + }; } - fn appendCode(image: *Image, code: []const u8) void { + fn appendCode(image: *Result, code: []const u8) void { const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; @memcpy(destination, code); image.sections.text.index += code.len; } - fn appendCodeByte(image: *Image, code_byte: u8) void { + fn appendCodeByte(image: *Result, code_byte: u8) void { image.sections.text.content[image.sections.text.index] = code_byte; image.sections.text.index += 1; } - fn getEntryPoint(image: *const Image, comptime Function: type) *const Function { + fn getEntryPoint(image: *const Result, comptime Function: type) *const Function { comptime { assert(@typeInfo(Function) == .Fn); } @@ -72,6 +79,16 @@ const Image = struct { assert(image.sections.text.content.len > 0); return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); } + + pub fn free(result: *Result, allocator: Allocator) void { + _ = allocator; + inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| { + switch (@import("builtin").os.tag) { + .windows => unreachable, + else => std.os.munmap(@field(result.sections, field_name).content), + } + } + } }; const Rex = enum(u8) { @@ -123,7 +140,7 @@ const ret = 0xc3; const mov_a_imm = [1]u8{0xb8}; const mov_reg_imm8: u8 = 0xb0; -inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { +fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { comptime { assert(@typeInfo(@TypeOf(integer)) == .Int); } @@ -131,7 +148,7 @@ inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer)); } -inline fn movAImm(image: *Image, integer: anytype) void { +fn movAImm(image: *Result, integer: anytype) void { const T = @TypeOf(integer); image.appendCode(&(switch (T) { u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)}, @@ -143,8 +160,9 @@ inline fn movAImm(image: *Image, integer: anytype) void { } test "ret void" { - var image = try Image.create(); - defer image.destroy(); + const allocator = std.testing.allocator; + var image = try Result.create(); + defer image.free(allocator); image.appendCodeByte(ret); const function_pointer = image.getEntryPoint(fn () callconv(.C) void); @@ -166,8 +184,8 @@ fn getMaxInteger(comptime T: type) T { test "ret integer" { inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); + var image = try Result.create(); + defer image.free(std.testing.allocator); const expected_number = getMaxInteger(Int); movAImm(&image, expected_number); @@ -185,11 +203,11 @@ const LastByte = packed struct(u8) { always_on: u2 = 0b11, }; -fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { +fn movRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { dstRmSrcR(image, T, .mov, dst, src); } -fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { +fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { const last_byte: u8 = @bitCast(LastByte{ .dst = dst, .src = src, @@ -216,8 +234,9 @@ fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPReg test "ret integer argument" { inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); + const allocator = std.testing.allocator; + var image = try Result.create(); + defer image.free(allocator); const number = getMaxInteger(Int); movRmR(&image, Int, .a, .di); @@ -239,14 +258,15 @@ fn getRandomNumberRange(comptime T: type, min: T, max: T) T { }; } -fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { +fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { dstRmSrcR(image, T, .sub, dst, src); } test "ret sub arguments" { inline for (integer_types_to_test) |Int| { - var image = try Image.create(); - defer image.destroy(); + const allocator = std.testing.allocator; + var image = try Result.create(); + defer image.free(allocator); const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); @@ -328,10 +348,10 @@ fn TestIntegerBinaryOperation(comptime T: type) type { opcode: OpcodeRmR, pub fn runTest(test_case: @This()) !void { + const allocator = std.testing.allocator; for (0..10) |_| { - var image = try Image.create(); - defer image.destroy(); - errdefer image.destroy(); + var image = try Result.create(); + defer image.free(allocator); const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); movRmR(&image, T, .a, .di); @@ -351,9 +371,9 @@ fn TestIntegerBinaryOperation(comptime T: type) type { } test "call after" { - var image = try Image.create(); - defer image.destroy(); - errdefer image.destroy(); + const allocator = std.testing.allocator; + var image = try Result.create(); + defer image.free(allocator); const jump_patch_offset = image.sections.text.index + 1; image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); const jump_source = image.sections.text.index; @@ -367,9 +387,9 @@ test "call after" { } test "call before" { - var image = try Image.create(); - defer image.destroy(); - errdefer image.destroy(); + const allocator = std.testing.allocator; + var image = try Result.create(); + defer image.free(allocator); const first_jump_patch_offset = image.sections.text.index + 1; const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; image.appendCode(&first_call); @@ -390,9 +410,20 @@ test "call before" { pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result { _ = allocator; + var image = try Result.create(); + + var entry_point: u32 = 0; + _ = entry_point; + for (ir_result.functions.items) |*function| { - _ = function; + for (function.instructions.items) |instruction| { + switch (instruction.id) { + .ret_void => { + image.appendCodeByte(ret); + }, + } + } } - return Result{}; + return image; } diff --git a/src/ir.zig b/src/ir.zig index c5d7992..20b0eba 100644 --- a/src/ir.zig +++ b/src/ir.zig @@ -14,7 +14,7 @@ const void_type = Type{ const Type = struct { id: Id, - inline fn isPrimitive(T: Type) bool { + fn isPrimitive(T: Type) bool { return switch (T.id) { .void => true, }; @@ -66,6 +66,7 @@ const Function = struct { pub const Result = struct { top_level_declarations: ArrayList(TopLevelDeclaration), functions: ArrayList(Function), + instructions: struct {} = .{}, pub fn free(result: *Result, allocator: Allocator) void { for (result.functions.items) |*function| { diff --git a/src/lexer.zig b/src/lexer.zig index be18d11..424a6a7 100644 --- a/src/lexer.zig +++ b/src/lexer.zig @@ -3,95 +3,90 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const log = std.log; +const equal = std.mem.eql; + const data_structures = @import("data_structures.zig"); const ArrayList = data_structures.ArrayList; const fs = @import("fs.zig"); +const parser = @import("parser.zig"); -pub inline fn rdtsc() u64 { - var edx: u32 = undefined; - var eax: u32 = undefined; +pub const TokenTypeMap = blk: { + var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined; - asm volatile ( - \\rdtsc - : [eax] "={eax}" (eax), - [edx] "={edx}" (edx), - ); + result[@intFromEnum(TokenId.identifier)] = Identifier; + result[@intFromEnum(TokenId.operator)] = Operator; + result[@intFromEnum(TokenId.number)] = Number; - return @as(u64, edx) << 32 | eax; -} - -inline fn rdtscFast() u32 { - return asm volatile ( - \\rdtsc - : [eax] "={eax}" (-> u32), - : - : "edx" - ); -} - -const vector_byte_count = 16; -// These two actually take less space due to how Zig handles bool as u1 -const VBool = @Vector(vector_byte_count, bool); -const VU1 = @Vector(vector_byte_count, u1); - -const VU8 = @Vector(vector_byte_count, u8); - -inline fn vand(v1: VBool, v2: VBool) VBool { - return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2))); -} - -inline fn byteMask(n: u8) VU8 { - return @splat(n); -} - -inline fn endOfIdentifier(ch: u8) bool { - // TODO: complete - return ch == ' ' or ch == '(' or ch == ')'; -} - -const Identifier = struct { - start: u32, - end: u32, + break :blk result; }; +pub const Identifier = parser.Node; + pub const TokenId = enum { identifier, - special_character, + operator, + number, }; -pub const SpecialCharacter = enum(u8) { - arrow = 0, +pub const Operator = enum(u8) { left_parenthesis = '(', right_parenthesis = ')', left_brace = '{', right_brace = '}', + equal = '=', + colon = ':', + semicolon = ';', +}; + +pub const Number = struct { + content: union(enum) { + float: f64, + integer: Integer, + }, + + const Integer = struct { + value: u64, + is_negative: bool, + }; }; pub const Result = struct { - identifiers: ArrayList(Identifier), - special_characters: ArrayList(SpecialCharacter), - ids: ArrayList(TokenId), + arrays: struct { + identifier: ArrayList(Identifier), + operator: ArrayList(Operator), + number: ArrayList(Number), + id: ArrayList(TokenId), + }, file: []const u8, time: u64 = 0, pub fn free(result: *Result, allocator: Allocator) void { - result.identifiers.clearAndFree(allocator); - result.special_characters.clearAndFree(allocator); - result.ids.clearAndFree(allocator); - allocator.free(result.file); + inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| { + @field(result.arrays, field.name).clearAndFree(allocator); + } + } + + fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void { + // const index = result.arrays.id.items.len; + @field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value); + result.arrays.id.appendAssumeCapacity(token_id); + // log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value }); } }; -fn lex(allocator: Allocator, text: []const u8) !Result { +pub fn lex(allocator: Allocator, text: []const u8) !Result { const time_start = std.time.Instant.now() catch unreachable; var index: usize = 0; var result = Result{ - .identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len), - .special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len), - .ids = try ArrayList(TokenId).initCapacity(allocator, text.len), + .arrays = .{ + .identifier = try ArrayList(Identifier).initCapacity(allocator, text.len), + .operator = try ArrayList(Operator).initCapacity(allocator, text.len), + .number = try ArrayList(Number).initCapacity(allocator, text.len), + .id = try ArrayList(TokenId).initCapacity(allocator, text.len), + }, .file = text, }; @@ -105,35 +100,47 @@ fn lex(allocator: Allocator, text: []const u8) !Result { switch (first_char) { 'a'...'z', 'A'...'Z', '_' => { const start = index; - // SIMD this - while (!endOfIdentifier(text[index])) { + while (true) { + const ch = text[index]; + if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { + index += 1; + continue; + } + break; + } + + result.appendToken(.identifier, .{ + .left = @intCast(start), + .right = @intCast(index), + .type = .identifier, + }); + }, + '(', ')', '{', '}', '-', '=', ';' => |operator| { + result.appendToken(.operator, @enumFromInt(operator)); + index += 1; + }, + '0'...'9' => { + const start = index; + + while (text[index] >= '0' and text[index] <= '9') { index += 1; } - - result.identifiers.appendAssumeCapacity(.{ - .start = @intCast(start), - .end = @intCast(index), + const end = index; + const number_slice = text[start..end]; + const number = try std.fmt.parseInt(u64, number_slice, 10); + result.appendToken(.number, .{ + .content = .{ + .integer = .{ + .value = number, + .is_negative = false, + }, + }, }); - - result.ids.appendAssumeCapacity(.identifier); - }, - '(', ')', '{', '}' => |special_character| { - result.special_characters.appendAssumeCapacity(@enumFromInt(special_character)); - result.ids.appendAssumeCapacity(.special_character); - index += 1; }, ' ', '\n' => index += 1, - '-' => { - if (text[index + 1] == '>') { - result.special_characters.appendAssumeCapacity(.arrow); - result.ids.appendAssumeCapacity(.special_character); - index += 2; - } else { - @panic("TODO"); - } - }, - else => { + else => |foo| { index += 1; + std.debug.panic("NI: {c}", .{foo}); }, } } @@ -141,16 +148,11 @@ fn lex(allocator: Allocator, text: []const u8) !Result { return result; } -pub fn runTest(allocator: Allocator, file: []const u8) !Result { - const result = try lex(allocator, file); - - return result; -} - test "lexer" { const allocator = std.testing.allocator; const file_path = fs.first; const file = try fs.readFile(allocator, file_path); - var result = try runTest(allocator, file); + defer allocator.free(file); + var result = try lex(allocator, file); defer result.free(allocator); } diff --git a/src/main.zig b/src/main.zig index 17ac21d..37ce80e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,44 +2,17 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const compiler = @import("compiler.zig"); const fs = @import("fs.zig"); -const lexer = @import("lexer.zig"); -const parser = @import("parser.zig"); -const ir = @import("ir.zig"); -const emit = @import("emit.zig"); - pub const seed = std.math.maxInt(u64); pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); - try behaviorTest(allocator, fs.first); -} - -fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void { - const file = try fs.readFile(allocator, file_relative_path); - var lexer_result = try lexer.runTest(allocator, file); - defer lexer_result.free(allocator); - var parser_result = parser.runTest(allocator, &lexer_result) catch |err| { - std.log.err("Lexer took {} ns", .{lexer_result.time}); - return err; - }; - defer parser_result.free(allocator); - var ir_result = try ir.runTest(allocator, &parser_result); - defer ir_result.free(allocator); - var emit_result = try emit.runTest(allocator, &ir_result); - defer emit_result.free(allocator); + try compiler.cycle(allocator, fs.first); } test { - _ = lexer; - _ = parser; - _ = ir; - _ = emit; -} - -test "behavior test 1" { - const allocator = std.testing.allocator; - try behaviorTest(allocator, fs.first); + _ = compiler; } diff --git a/src/parser.zig b/src/parser.zig index 4b56dcc..a64c0ed 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -1,195 +1,434 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const log = std.log; const data_structures = @import("data_structures.zig"); const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.HashMap; const lexer = @import("lexer.zig"); pub const Result = struct { - functions: ArrayList(Function), - strings: StringMap, + function_map: ArrayList(lexer.Identifier), + nodes: ArrayList(Node), pub fn free(result: *Result, allocator: Allocator) void { result.functions.clearAndFree(allocator); - result.strings.clearAndFree(allocator); } }; +pub const Node = packed struct(u64) { + type: Type, + left: Node.Index, + right: Node.Index, + + pub const Index = u27; + + pub const Type = enum(u10) { + root = 0, + identifier = 1, + number = 2, + @"return" = 3, + block_one = 4, + function_declaration_no_arguments = 5, + container_declaration = 6, + }; +}; + +const Error = error{ + unexpected_token, + not_implemented, + OutOfMemory, +}; + +pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result { + var parser = Parser{ + .allocator = allocator, + .nodes = ArrayList(Node){}, + .function_map = ArrayList(lexer.Identifier){}, + .lexer = .{ + .result = lexer_result, + }, + }; + errdefer parser.free(); + + const node_index = try parser.appendNode(Node{ + .type = .root, + .left = 0, + .right = 0, + }); + _ = node_index; + + const members = try parser.parseContainerMembers(); + _ = members; + + return Result{ + .function_map = parser.function_map, + .nodes = parser.nodes, + }; +} + +const ExpressionMutabilityQualifier = enum { + @"const", + @"var", +}; + +const Keyword = enum { + @"return", + @"fn", +}; + const PeekResult = union(lexer.TokenId) { - special_character: lexer.SpecialCharacter, - identifier: []const u8, + identifier: lexer.Identifier, + operator: lexer.Operator, + number: lexer.Number, }; -const Function = struct { - name: u32, - return_type: u32, - arguments: ArrayList(Argument), - statements: ArrayList(Statement), +const Lexer = struct { + result: *const lexer.Result, + indices: struct { + identifier: u32 = 0, + operator: u32 = 0, + number: u32 = 0, + id: u32 = 0, + } = .{}, - const Argument = struct { - foo: u32 = 0, - }; + fn hasTokens(l: *const Lexer) bool { + return l.indices.id < l.result.arrays.id.items.len; + } + + fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 { + assert(l.isCurrentToken(token_id)); + return @field(l.indices, @tagName(token_id)); + } + + fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void { + assert(l.isCurrentToken(token_id)); + l.indices.id += 1; + const index_ptr = &@field(l.indices, @tagName(token_id)); + const index = index_ptr.*; + const token_value = @field(l.result.arrays, @tagName(token_id)).items[index]; + log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value }); + + index_ptr.* += 1; + } + + fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool { + return l.result.arrays.id.items[l.indices.id] == token_id; + } + + fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 { + comptime { + assert(lexer.Identifier == Node); + } + + assert(identifier.type == .identifier); + + return l.result.file[identifier.left..][0 .. identifier.right - identifier.left]; + } + + fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] { + const peek_result = l.peek() orelse return error.not_implemented; + return switch (peek_result) { + expected_token_id => |token| blk: { + l.consume(expected_token_id); + break :blk token; + }, + else => error.not_implemented, + }; + } + + fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 { + const peek_result = l.peek() orelse return error.not_implemented; + return switch (peek_result) { + expected_token_id => blk: { + const index = l.currentTokenIndex(expected_token_id); + l.consume(expected_token_id); + break :blk index; + }, + else => error.not_implemented, + }; + } + + fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void { + const peek_result = l.peek() orelse return error.not_implemented; + switch (peek_result) { + expected_token_id => |token| { + if (expected_token != token) { + return error.not_implemented; + } + + l.consume(expected_token_id); + }, + else => |token| { + std.debug.panic("{s}", .{@tagName(token)}); + }, + } + } + + fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool { + return switch (l.peek() orelse unreachable) { + .operator => |operator| { + const result = operator == expected_operator; + if (result) { + l.consume(.operator); + } + return result; + }, + else => false, + }; + } + + fn peek(l: *const Lexer) ?PeekResult { + if (l.indices.id >= l.result.arrays.id.items.len) { + return null; + } + + return switch (l.result.arrays.id.items[l.indices.id]) { + inline else => |token| blk: { + const tag = @tagName(token); + const index = @field(l.indices, tag); + const array = &@field(l.result.arrays, tag); + + break :blk @unionInit(PeekResult, tag, array.items[index]); + }, + }; + } }; -const Statement = struct { - foo: u32 = 0, -}; - -const StringMap = std.AutoHashMapUnmanaged(u32, []const u8); - const Parser = struct { - id_index: u32 = 0, - identifier_index: u32 = 0, - special_character_index: u32 = 0, - strings: StringMap, + lexer: Lexer, + nodes: ArrayList(Node), + function_map: ArrayList(lexer.Identifier), allocator: Allocator, - functions: ArrayList(Function), - fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result { - while (parser.id_index < lexer_result.ids.items.len) { - try parser.parseTopLevelDeclaration(lexer_result); - } + fn appendNode(parser: *Parser, node: Node) !Node.Index { + const index = parser.nodes.items.len; + try parser.nodes.append(parser.allocator, node); + return @intCast(index); + } - return Result{ - .functions = parser.functions, - .strings = parser.strings, + fn getNode(parser: *Parser, node_index: Node.Index) *Node { + return &parser.nodes.items[node_index]; + } + + fn free(parser: *Parser) void { + _ = parser; + } + + fn parseTypeExpression(parser: *Parser) !Node.Index { + // TODO: make this decent + return switch (parser.lexer.peek() orelse unreachable) { + .identifier => parser.nodeFromToken(.identifier), + else => unreachable, }; } - fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function { - assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis); - parser.consume(lexer_result, .special_character); - - while (true) { - if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) { - break; - } else |_| {} - + fn parseFunctionDeclaration(parser: *Parser) !Node.Index { + try parser.lexer.expectSpecificToken(.operator, .left_parenthesis); + while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) { return error.not_implemented; } - try parser.expectSpecialCharacter(lexer_result, .arrow); - - const return_type_identifier = try parser.expectIdentifier(lexer_result); - - try parser.expectSpecialCharacter(lexer_result, .left_brace); - - while (true) { - if (parser.expectSpecialCharacter(lexer_result, .right_brace)) { - break; - } else |_| {} - - return error.not_implemented; - } - - return Function{ - .name = name, - .statements = ArrayList(Statement){}, - .arguments = ArrayList(Function.Argument){}, - .return_type = return_type_identifier, - }; + const t = try parser.parseTypeExpression(); + const function_declaration = try parser.appendNode(.{ + .type = .function_declaration_no_arguments, + .left = t, + .right = try parser.parseBlock(), + }); + return function_declaration; } - inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void { - assert(lexer_result.ids.items[parser.id_index] == token_id); - parser.id_index += 1; - switch (token_id) { - .special_character => parser.special_character_index += 1, - .identifier => parser.identifier_index += 1, + fn parseBlock(parser: *Parser) !Node.Index { + try parser.lexer.expectSpecificToken(.operator, .left_brace); + + var statements = ArrayList(Node.Index){}; + + while (!parser.lexer.maybeExpectOperator(.right_brace)) { + const statement = try parser.parseStatement(); + try statements.append(parser.allocator, statement); } - } - fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void { - const top_level_identifier = try parser.expectIdentifier(lexer_result); - const next_token = parser.peek(lexer_result); - - switch (next_token) { - .special_character => |special_character| switch (special_character) { - .left_parenthesis => { - const function = try parser.parseFunction(lexer_result, top_level_identifier); - try parser.functions.append(parser.allocator, function); - }, - else => return error.not_implemented, + const node: Node = switch (statements.items.len) { + 0 => unreachable, + 1 => .{ + .type = .block_one, + .left = statements.items[0], + .right = 0, }, + else => unreachable, + }; + log.debug("Parsed block!", .{}); + return parser.appendNode(node); + } + + fn parseStatement(parser: *Parser) !Node.Index { + // TODO: more stuff before + const expression = try parser.parseAssignExpression(); + try parser.lexer.expectSpecificToken(.operator, .semicolon); + + return expression; + } + + fn parseAssignExpression(parser: *Parser) !Node.Index { + const expression = try parser.parseExpression(); + switch (parser.lexer.peek() orelse unreachable) { + .operator => |operator| switch (operator) { + .semicolon => return expression, + else => unreachable, + }, + else => unreachable, + } + + return error.not_implemented; + } + + fn parseExpression(parser: *Parser) Error!Node.Index { + return parser.parseExpressionPrecedence(0); + } + + fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index { + var expr_index = try parser.parsePrefixExpression(); + log.debug("Expr index: {}", .{expr_index}); + + var banned_precedence: i32 = -1; + while (parser.lexer.hasTokens()) { + const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) { + .operator => |operator| switch (operator) { + .semicolon => -1, + else => @panic(@tagName(operator)), + }, + else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), + }; + + if (precedence < minimum_precedence) { + break; + } + + if (precedence == banned_precedence) { + unreachable; + } + + const node_index = try parser.parseExpressionPrecedence(1); + _ = node_index; + + unreachable; + } + + log.err("Parsed expression precedence", .{}); + + return expr_index; + } + + fn parsePrefixExpression(parser: *Parser) !Node.Index { + switch (parser.lexer.peek() orelse unreachable) { + // .bang => .bool_not, + // .minus => .negation, + // .tilde => .bit_not, + // .minus_percent => .negation_wrap, + // .ampersand => .address_of, + // .keyword_try => .@"try", + // .keyword_await => .@"await", + + else => |pref| { + log.err("Pref: {s}", .{@tagName(pref)}); + return parser.parsePrimaryExpression(); + }, + } + + return error.not_implemented; + } + + fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index { + const node = try parser.appendNode(.{ + .type = @field(Node.Type, @tagName(token_id)), + .left = @intCast(parser.lexer.currentTokenIndex(token_id)), + .right = 0, + }); + parser.lexer.consume(token_id); + + return node; + } + + fn parsePrimaryExpression(parser: *Parser) !Node.Index { + const result = switch (parser.lexer.peek() orelse unreachable) { + .number => try parser.nodeFromToken(.number), .identifier => |identifier| { - _ = identifier; - return error.not_implemented; - }, - } - } + const identifier_name = parser.lexer.getIdentifier(identifier); + inline for (@typeInfo(Keyword).Enum.fields) |keyword| { + if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) { + .@"return" => blk: { + parser.lexer.consume(.identifier); + const node_ref = try parser.appendNode(.{ + .type = .@"return", + .left = try parser.parseExpression(), + .right = 0, + }); + break :blk node_ref; + }, + .@"fn" => blk: { + parser.lexer.consume(.identifier); + // TODO: figure out name association + break :blk try parser.parseFunctionDeclaration(); + }, + }; + } - inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult { - return switch (lexer_result.ids.items[parser.id_index]) { - .special_character => .{ - .special_character = lexer_result.special_characters.items[parser.special_character_index], + unreachable; }, - .identifier => .{ - .identifier = blk: { - const identifier_range = lexer_result.identifiers.items[parser.identifier_index]; - break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end]; - }, + else => |foo| { + std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo }); }, }; + + return result; } - fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void { - const token_id = lexer_result.ids.items[parser.id_index]; - if (token_id != .special_character) { - return error.expected_special_character; + fn parseContainerMembers(parser: *Parser) !void { + var container_nodes = ArrayList(Node.Index){}; + while (parser.lexer.hasTokens()) { + const container_node = switch (parser.lexer.peek() orelse unreachable) { + .identifier => |first_identifier_ref| blk: { + parser.lexer.consume(.identifier); + + const first_identifier = parser.lexer.getIdentifier(first_identifier_ref); + + if (std.mem.eql(u8, first_identifier, "comptime")) { + unreachable; + } else { + const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier); + _ = mutability_qualifier; + + const identifier = try parser.appendNode(.{ + .type = .identifier, + .left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)), + .right = 0, + }); + + switch (parser.lexer.peek() orelse unreachable) { + .operator => |operator| switch (operator) { + .colon => unreachable, + .equal => { + parser.lexer.consume(.operator); + + const expression = try parser.parseExpression(); + break :blk try parser.appendNode(.{ + .type = .container_declaration, + .left = expression, + .right = identifier, + }); + }, + else => unreachable, + }, + else => |foo| std.debug.panic("WTF: {}", .{foo}), + } + } + }, + else => |a| std.debug.panic("{}", .{a}), + }; + + try container_nodes.append(parser.allocator, container_node); } - - defer parser.id_index += 1; - - const special_character = lexer_result.special_characters.items[parser.special_character_index]; - if (special_character != expected) { - return error.expected_different_special_character; - } - - parser.special_character_index += 1; } - - fn acceptSpecialCharacter() void {} - - fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 { - const token_id = lexer_result.ids.items[parser.id_index]; - if (token_id != .identifier) { - return Error.expected_identifier; - } - - parser.id_index += 1; - - const identifier_range = lexer_result.identifiers.items[parser.identifier_index]; - parser.identifier_index += 1; - const identifier = lexer_result.file[identifier_range.start..identifier_range.end]; - const Hash = std.hash.Wyhash; - const seed = @intFromPtr(identifier.ptr); - var hasher = Hash.init(seed); - std.hash.autoHash(&hasher, identifier.ptr); - const hash = hasher.final(); - const truncated_hash: u32 = @truncate(hash); - try parser.strings.put(parser.allocator, truncated_hash, identifier); - return truncated_hash; - } - - const Error = error{ - expected_identifier, - expected_special_character, - expected_different_special_character, - not_implemented, - }; }; - -pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result { - var parser = Parser{ - .allocator = allocator, - .strings = StringMap{}, - .functions = ArrayList(Function){}, - }; - - return parser.parse(lexer_result) catch |err| { - std.log.err("error: {}", .{err}); - return err; - }; -} diff --git a/src/test/main.b b/src/test/main.b index 41c31f9..8847d3f 100644 --- a/src/test/main.b +++ b/src/test/main.b @@ -1,3 +1,3 @@ -main() -> void { - +const main = fn() i32 { + return 0; }