diff --git a/.gitignore b/.gitignore index 4c82b07..212371e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ zig-cache zig-out +nat diff --git a/build.zig b/build.zig index dcb261d..453572c 100644 --- a/build.zig +++ b/build.zig @@ -28,6 +28,11 @@ pub fn build(b: *std.Build) void { // standard location when the user invokes the "install" step (the default // step when running `zig build`). b.installArtifact(exe); + b.installDirectory(.{ + .source_dir = std.Build.LazyPath.relative("lib"), + .install_dir = .bin, + .install_subdir = "lib", + }); // This *creates* a Run step in the build graph, to be executed when another // step is evaluated that depends on it. The next line below will establish @@ -62,6 +67,13 @@ pub fn build(b: *std.Build) void { const run_unit_tests = b.addRunArtifact(unit_tests); + const debug_unit_tests_cmd = b.addSystemCommand(&.{"gf2"}); + debug_unit_tests_cmd.addArtifactArg(unit_tests); + debug_unit_tests_cmd.addArgs(&.{ "-ex", "r" }); + + const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); + debug_test_step.dependOn(&debug_unit_tests_cmd.step); + // Similar to creating the run step earlier, this exposes a `test` step to // the `zig build --help` menu, providing a way for the user to request // running the unit tests. diff --git a/lib/std/start.nat b/lib/std/start.nat new file mode 100644 index 0000000..8661ac4 --- /dev/null +++ b/lib/std/start.nat @@ -0,0 +1 @@ +const builtin = #import("builtin"); diff --git a/lib/std/std.nat b/lib/std/std.nat new file mode 100644 index 0000000..3205fe7 --- /dev/null +++ b/lib/std/std.nat @@ -0,0 +1,4 @@ +const start = #import("start.nat"); +comptime { + _ = start; +} diff --git a/src/Compilation.zig b/src/Compilation.zig new file mode 100644 index 0000000..ed8421a --- /dev/null +++ b/src/Compilation.zig @@ -0,0 +1,300 @@ +const Compilation = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; + +const Allocator = std.mem.Allocator; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const StringHashMap = data_structures.StringHashMap; +const StringArrayHashMap = data_structures.StringArrayHashMap; + +const lexical_analyzer = @import("frontend/lexical_analyzer.zig"); +const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); +const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); + +base_allocator: Allocator, +cwd_absolute_path: []const u8, +directory_absolute_path: []const u8, +executable_absolute_path: []const u8, +build_directory: std.fs.Dir, + +const cache_dir_name = "cache"; +const installation_dir_name = "installation"; + +pub fn init(allocator: Allocator) !*Compilation { + const compilation: *Compilation = try allocator.create(Compilation); + + const self_exe_path = try std.fs.selfExePathAlloc(allocator); + const self_exe_dir_path = std.fs.path.dirname(self_exe_path).?; + compilation.* = .{ + .base_allocator = allocator, + .cwd_absolute_path = try realpathAlloc(allocator, "."), + .executable_absolute_path = self_exe_path, + .directory_absolute_path = self_exe_dir_path, + .build_directory = try std.fs.cwd().makeOpenPath("nat", .{}), + }; + + try compilation.build_directory.makePath(cache_dir_name); + try compilation.build_directory.makePath(installation_dir_name); + + return compilation; +} + +pub fn deinit(compilation: *Compilation) void { + const allocator = compilation.base_allocator; + allocator.free(compilation.cwd_absolute_path); + allocator.free(compilation.executable_absolute_path); + allocator.destroy(compilation); +} + +pub const Module = struct { + main_package: *Package, + import_table: StringArrayHashMap(*File) = .{}, + + pub const Descriptor = struct { + main_package_path: []const u8, + }; + + fn deinit(module: *Module, allocator: Allocator) void { + defer allocator.destroy(module); + + for (module.import_table.values()) |file| { + file.deinit(allocator); + } + + var iterator = module.main_package.dependencies.valueIterator(); + while (iterator.next()) |it| { + const package = it.*; + package.deinit(allocator); + } + + module.main_package.deinit(allocator); + + module.import_table.clearAndFree(allocator); + } + + fn importPackage(module: *Module, compilation: *Compilation, package: *Package) !ImportPackageResult { + const lookup_result = try module.import_table.getOrPut(compilation.base_allocator, package.directory.path); + errdefer _ = module.import_table.pop(); + if (lookup_result.found_existing) { + const file: *File = lookup_result.value_ptr.*; + try file.addPackageReference(compilation.base_allocator, package); + unreachable; + } + const file = try compilation.base_allocator.create(File); + lookup_result.value_ptr.* = file; + file.* = File{ + .relative_path = package.source_path, + .package = package, + }; + try file.addPackageReference(compilation.base_allocator, package); + + return .{ + .file = file, + .is_new = true, + }; + } + + fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { + _ = module; + const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); + defer source_file.close(); + + const file_size = try source_file.getEndPos(); + var file_buffer = try allocator.alloc(u8, file_size); + + const read_byte_count = try source_file.readAll(file_buffer); + assert(read_byte_count == file_size); + + //TODO: adjust file maximum size + file.source_code = file_buffer[0..read_byte_count]; + file.status = .loaded_into_memory; + + try file.lex(allocator); + try file.parse(allocator); + } +}; + +fn pathFromCwd(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.cwd_absolute_path, relative_path }); +} + +fn pathFromCompiler(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.directory_absolute_path, relative_path }); +} + +fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]const u8 { + var path_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const realpathInStack = try std.os.realpath(pathname, &path_buffer); + return allocator.dupe(u8, realpathInStack); +} + +pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !void { + // TODO: generate an actual file + const builtin_file_name = "builtin.nat"; + var cache_dir = try compilation.build_directory.openDir("cache", .{}); + const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); + builtin_file.close(); + + const module: *Module = try compilation.base_allocator.create(Module); + defer module.deinit(compilation.base_allocator); + module.* = Module{ + .main_package = blk: { + const result = try compilation.base_allocator.create(Package); + const main_package_absolute_directory_path = try compilation.pathFromCwd(std.fs.path.dirname(descriptor.main_package_path).?); + result.* = .{ + .directory = .{ + .handle = try std.fs.openDirAbsolute(main_package_absolute_directory_path, .{}), + .path = main_package_absolute_directory_path, + }, + .source_path = try compilation.base_allocator.dupe(u8, std.fs.path.basename(descriptor.main_package_path)), + }; + break :blk result; + }, + }; + + const std_package_dir = "lib/std"; + const package_descriptors = [2]struct { + name: []const u8, + directory_path: []const u8, + }{ + .{ + .name = "std", + .directory_path = try switch (@import("builtin").is_test) { + true => compilation.pathFromCwd(std_package_dir), + false => compilation.pathFromCompiler(std_package_dir), + }, + }, + .{ + .name = "builtin", + .directory_path = blk: { + const result = try cache_dir.realpathAlloc(compilation.base_allocator, "."); + cache_dir.close(); + break :blk result; + }, + }, + }; + + for (package_descriptors) |package_descriptor| { + const package = try compilation.base_allocator.create(Package); + package.* = .{ + .directory = .{ + .path = package_descriptor.directory_path, + .handle = try std.fs.openDirAbsolute(package_descriptor.directory_path, .{}), + }, + .source_path = try std.mem.concat(compilation.base_allocator, u8, &.{ package_descriptor.name, ".nat" }), + }; + + try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package); + } + + assert(module.main_package.dependencies.size == 2); + + _ = try module.importPackage(compilation, module.main_package.dependencies.get("std").?); + + for (module.import_table.values()) |import| { + try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); + } +} + +const ImportPackageResult = struct { + file: *File, + is_new: bool, +}; + +fn generateAST() !void {} + +pub const Directory = struct { + handle: std.fs.Dir, + path: []const u8, +}; + +pub const Package = struct { + directory: Directory, + /// Relative to the package main directory + source_path: []const u8, + dependencies: StringHashMap(*Package) = .{}, + + fn addDependency(package: *Package, allocator: Allocator, package_name: []const u8, new_dependency: *Package) !void { + try package.dependencies.ensureUnusedCapacity(allocator, 1); + package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency); + } + + fn deinit(package: *Package, allocator: Allocator) void { + if (package.dependencies.size > 0) { + assert(package.dependencies.size == 2); + } + package.dependencies.clearAndFree(allocator); + allocator.free(package.source_path); + allocator.free(package.directory.path); + package.directory.handle.close(); + allocator.destroy(package); + } +}; + +pub const File = struct { + status: Status = .not_loaded, + source_code: []const u8 = &.{}, + lexical_analyzer_result: lexical_analyzer.Result = undefined, + syntactic_analyzer_result: syntactic_analyzer.Result = undefined, + package_references: ArrayList(*Package) = .{}, + relative_path: []const u8, + package: *Package, + + const Status = enum { + not_loaded, + loaded_into_memory, + lexed, + parsed, + }; + + fn addPackageReference(file: *File, allocator: Allocator, package: *Package) !void { + for (file.package_references.items) |other| { + if (other == package) return; + } + + try file.package_references.insert(allocator, 0, package); + } + + pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { + const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); + _ = file_content; + const file = try allocator.create(File); + file.* = File{}; + + return file; + } + + fn lex(file: *File, allocator: Allocator) !void { + assert(file.status == .loaded_into_memory); + file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); + if (!@import("builtin").is_test) { + print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); + } + file.status = .lexed; + } + + fn parse(file: *File, allocator: Allocator) !void { + assert(file.status == .lexed); + file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); + if (!@import("builtin").is_test) { + print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); + } + file.status = .parsed; + } + + fn deinit(file: *File, allocator: Allocator) void { + defer allocator.destroy(file); + if (file.status == .parsed) { + file.syntactic_analyzer_result.free(allocator); + file.lexical_analyzer_result.free(allocator); + file.package_references.clearAndFree(allocator); + allocator.free(file.source_code); + } else { + unreachable; + } + } +}; diff --git a/src/emit.zig b/src/backend/emit.zig similarity index 100% rename from src/emit.zig rename to src/backend/emit.zig diff --git a/src/ir.zig b/src/backend/ir.zig similarity index 100% rename from src/ir.zig rename to src/backend/ir.zig diff --git a/src/compiler.zig b/src/compiler.zig deleted file mode 100644 index b7c8214..0000000 --- a/src/compiler.zig +++ /dev/null @@ -1,21 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -const data_structures = @import("data_structures.zig"); - -const lexer = @import("lexer.zig"); -const parser = @import("parser.zig"); - -test { - _ = lexer; - _ = parser; -} - -pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void { - const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); - std.debug.print("File:\n\n```\n{s}\n```\n", .{file}); - const lexer_result = try lexer.lex(allocator, file); - const parser_result = try parser.parse(allocator, &lexer_result); - _ = parser_result; -} diff --git a/src/data_structures.zig b/src/data_structures.zig index 58fbed7..f6a4bb1 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,4 +1,7 @@ const std = @import("std"); +pub const Allocator = std.mem.Allocator; pub const ArrayList = std.ArrayListUnmanaged; -pub const HashMap = std.AutoHashMap; +pub const HashMap = std.AutoHashMapUnmanaged; +pub const StringHashMap = std.StringHashMapUnmanaged; +pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged; diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig new file mode 100644 index 0000000..e485a5c --- /dev/null +++ b/src/frontend/lexical_analyzer.zig @@ -0,0 +1,120 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const log = std.log; + +const equal = std.mem.eql; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; + +const Compilation = @import("../Compilation.zig"); +const fs = @import("../fs.zig"); + +pub const Token = packed struct(u64) { + start: u32, + len: u24, + id: Id, + + pub const Id = enum(u8) { + identifier = 0, + number = 1, + string_literal = 2, + left_parenthesis = '(', + right_parenthesis = ')', + left_brace = '{', + right_brace = '}', + equal = '=', + colon = ':', + semicolon = ';', + hash = '#', + comma = ',', + bang = '!', + }; +}; + +pub const Result = struct { + tokens: ArrayList(Token), + time: u64, + + pub fn free(result: *Result, allocator: Allocator) void { + result.tokens.clearAndFree(allocator); + } +}; + +pub fn analyze(allocator: Allocator, text: []const u8) !Result { + const time_start = std.time.Instant.now() catch unreachable; + var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); + var index: usize = 0; + + while (index < text.len) { + const start_index = index; + const start_character = text[index]; + const token_id: Token.Id = switch (start_character) { + 'a'...'z', 'A'...'Z', '_' => blk: { + while (true) { + const ch = text[index]; + if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { + index += 1; + continue; + } + + break; + } + + break :blk .identifier; + }, + '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { + index += 1; + break :blk @enumFromInt(operator); + }, + '0'...'9' => blk: { + while (text[index] >= '0' and text[index] <= '9') { + index += 1; + } + + break :blk .number; + }, + '"' => blk: { + index += 1; + while (text[index] != '"') { + index += 1; + } + + index += 1; + + break :blk .string_literal; + }, + ' ', '\n', '\r', '\t' => { + index += 1; + continue; + }, + else => |foo| { + std.debug.panic("NI: '{c}'", .{foo}); + }, + }; + + const end_index = index; + + try tokens.append(allocator, .{ + .start = @intCast(start_index), + .len = @intCast(end_index - start_index), + .id = token_id, + }); + } + + const should_log = false; + if (should_log) { + for (tokens.items, 0..) |token, i| { + std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); + } + } + + const time_end = std.time.Instant.now() catch unreachable; + const time = time_end.since(time_start); + + return .{ + .tokens = tokens, + .time = time, + }; +} diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig new file mode 100644 index 0000000..54ae1ac --- /dev/null +++ b/src/frontend/syntactic_analyzer.zig @@ -0,0 +1,474 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const log = std.log; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.HashMap; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +pub const Result = struct { + nodes: ArrayList(Node), + time: u64, + + pub fn free(result: *Result, allocator: Allocator) void { + result.nodes.clearAndFree(allocator); + } +}; + +pub const Node = packed struct(u96) { + token: u32, + id: Id, + left: Node.Index, + right: Node.Index, + + pub const Index = u27; + + pub const Range = struct { + start: u32, + end: u32, + }; + + pub const Id = enum(u10) { + main = 0, + identifier = 1, + number = 2, + @"return" = 3, + block_one = 4, + function_declaration_no_arguments = 5, + container_declaration = 6, + string_literal = 7, + compiler_intrinsic_one = 8, + simple_variable_declaration = 9, + assign = 10, + @"comptime" = 11, + }; +}; + +const Error = error{ + unexpected_token, + not_implemented, + OutOfMemory, +}; + +const Analyzer = struct { + tokens: []const Token, + token_i: u32 = 0, + nodes: ArrayList(Node) = .{}, + file: []const u8, + allocator: Allocator, + temporal_node_heap: ArrayList(Node.Index) = .{}, + + fn free(analyzer: *Analyzer) void { + _ = analyzer; + } + + fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { + if (analyzer.tokens[analyzer.token_i].id == token_id) { + const result = analyzer.token_i; + analyzer.token_i += 1; + return result; + } else { + return error.unexpected_token; + } + } + + fn getIdentifier(analyzer: *const Analyzer, token: Token) []const u8 { + assert(token.id == .identifier); + const identifier = analyzer.file[token.start..][0..token.len]; + return identifier; + } + + fn containerMembers(analyzer: *Analyzer) !Members { + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.token_i < analyzer.tokens.len) { + const first = analyzer.token_i; + const member_node: Node = switch (analyzer.tokens[first].id) { + .identifier => blk: { + const first_identifier_token = analyzer.tokens[first]; + analyzer.token_i += 1; + + const identifier = analyzer.getIdentifier(first_identifier_token); + + if (equal(u8, identifier, "comptime")) { + switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => { + const comptime_block = try analyzer.block(); + + break :blk .{ + .id = .@"comptime", + .token = first, + .left = comptime_block, + .right = 0, + }; + }, + else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + } + } else { + const is_const = equal(u8, identifier, "const"); + const is_var = equal(u8, identifier, "var"); + assert(is_const or is_var); + + _ = try analyzer.expectToken(.identifier); + + // TODO: type + _ = try analyzer.expectToken(.equal); + + // TODO: do this in a function + const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => unreachable, + .hash => try analyzer.compilerIntrinsic(), + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = 0; + const top_level_decl = .{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + break :blk top_level_decl; + } + }, + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + const member_index = try analyzer.addNode(member_node); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_index); + } + + const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const members: Members = switch (members_array.len) { + 2 => .{ + .len = 2, + .left = members_array[0], + .right = members_array[1], + }, + else => |len| std.debug.panic("Len: {}", .{len}), + }; + + return members; + } + + fn block(analyzer: *Analyzer) !Node.Index { + const left_brace = try analyzer.expectToken(.left_brace); + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const statement_index = try analyzer.statement(); + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); + } + _ = try analyzer.expectToken(.right_brace); + + const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const node: Node = switch (statement_array.len) { + 1 => .{ + .id = .block_one, + .token = left_brace, + .left = statement_array[0], + .right = 0, + }, + else => |len| std.debug.panic("len: {}", .{len}), + }; + return analyzer.addNode(node); + } + + fn statement(analyzer: *Analyzer) !Node.Index { + // TODO: more stuff before + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + + return result; + } + + fn assignExpression(analyzer: *Analyzer) !Node.Index { + const expr = try analyzer.expression(); + const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon => return expr, + .equal => .assign, + else => unreachable, + }; + + return analyzer.addNode(.{ + .id = expression_id, + .token = blk: { + const token_i = analyzer.token_i; + analyzer.token_i += 1; + break :blk token_i; + }, + .left = expr, + .right = try analyzer.expression(), + }); + } + + fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { + const hash = try analyzer.expectToken(.hash); + _ = try analyzer.expectToken(.identifier); + _ = try analyzer.expectToken(.left_parenthesis); + + const temporal_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(temporal_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try analyzer.temporal_node_heap.append(analyzer.allocator, parameter); + + switch (analyzer.tokens[analyzer.token_i].id) { + .comma => analyzer.token_i += 1, + .right_parenthesis => continue, + else => unreachable, + } + } + + // Consume the right parenthesis + analyzer.token_i += 1; + + const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; + + return switch (parameters.len) { + 1 => analyzer.addNode(.{ + .id = .compiler_intrinsic_one, + .token = hash, + .left = parameters[0], + .right = 0, + }), + else => unreachable, + }; + } + + fn expression(analyzer: *Analyzer) !Node.Index { + return analyzer.expressionPrecedence(0); + } + + fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { + var result = try analyzer.prefixExpression(); + + var banned_precedence: i32 = -1; + + while (analyzer.token_i < analyzer.tokens.len) { + const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { + .equal, .semicolon, .right_parenthesis => -1, + else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), + }; + + if (precedence < minimum_precedence) { + break; + } + + if (precedence == banned_precedence) { + break; + } + + // TODO: fix this + const node_index = try analyzer.expressionPrecedence(1); + _ = node_index; + unreachable; + } + + return result; + } + + fn prefixExpression(analyzer: *Analyzer) !Node.Index { + switch (analyzer.tokens[analyzer.token_i].id) { + // .bang => .bool_not, + // .minus => .negation, + // .tilde => .bit_not, + // .minus_percent => .negation_wrap, + // .ampersand => .address_of, + // .keyword_try => .@"try", + // .keyword_await => .@"await", + + else => |pref| { + _ = pref; + return analyzer.primaryExpression(); + }, + } + + return error.not_implemented; + } + + fn primaryExpression(analyzer: *Analyzer) !Node.Index { + const result = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => unreachable, + else => try analyzer.curlySuffixExpression(), + }, + .string_literal => try analyzer.curlySuffixExpression(), + else => |id| { + log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); + unreachable; + }, + }; + + return result; + } + + fn curlySuffixExpression(analyzer: *Analyzer) !Node.Index { + const left = try analyzer.typeExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => unreachable, + else => left, + }; + } + + fn typeExpression(analyzer: *Analyzer) !Node.Index { + return switch (analyzer.tokens[analyzer.token_i].id) { + .string_literal, .identifier => try analyzer.errorUnionExpression(), + else => |id| blk: { + log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); + + const result = try analyzer.errorUnionExpression(); + + break :blk result; + }, + }; + } + + fn errorUnionExpression(analyzer: *Analyzer) !Node.Index { + const suffix_expression = try analyzer.suffixExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .bang => unreachable, + else => suffix_expression, + }; + } + + fn suffixExpression(analyzer: *Analyzer) !Node.Index { + var result = try analyzer.primaryTypeExpression(); + + while (true) { + if (analyzer.suffixOperator()) |_| { + unreachable; + } else { + if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { + unreachable; + } else { + return result; + } + } + } + + unreachable; + } + + fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + return switch (analyzer.tokens[token_i].id) { + .string_literal => blk: { + analyzer.token_i += 1; + break :blk analyzer.addNode(.{ + .id = .string_literal, + .token = token_i, + .left = 0, + .right = 0, + }); + }, + .identifier => switch (analyzer.tokens[token_i + 1].id) { + .colon => unreachable, + else => analyzer.addNode(.{ + .id = .identifier, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = 0, + .right = 0, + }), + }, + else => |foo| { + switch (foo) { + .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), + else => std.debug.panic("{s}", .{@tagName(foo)}), + } + }, + }; + } + + // TODO: + fn suffixOperator(analyzer: *Analyzer) ?bool { + _ = analyzer; + + return null; + } + + fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { + const index = analyzer.nodes.items.len; + try analyzer.nodes.append(analyzer.allocator, node); + return @intCast(index); + } +}; + +const Members = struct { + len: usize, + left: Node.Index, + right: Node.Index, + + pub fn toRange(members: Members) Node.Range { + return switch (members.len) { + 0 => unreachable, + 1 => .{ + .start = members.left, + .end = members.left, + }, + 2 => .{ + .start = members.left, + .end = members.right, + }, + else => unreachable, + }; + } +}; + +pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { + const start = std.time.Instant.now() catch unreachable; + var analyzer = Analyzer{ + .tokens = tokens, + .file = file, + .allocator = allocator, + }; + errdefer analyzer.free(); + const node_index = try analyzer.addNode(.{ + .id = .main, + .token = 0, + .left = 0, + .right = 0, + }); + + assert(node_index == 0); + const members = try analyzer.containerMembers(); + const member_range = members.toRange(); + analyzer.nodes.items[0].left = @intCast(member_range.start); + analyzer.nodes.items[0].right = @intCast(member_range.end); + + const end = std.time.Instant.now() catch unreachable; + + analyzer.temporal_node_heap.clearAndFree(allocator); + + return .{ + .nodes = analyzer.nodes, + .time = end.since(start), + }; +} + +const ExpressionMutabilityQualifier = enum { + @"const", + @"var", +}; + +const Keyword = enum { + @"return", + @"fn", +}; diff --git a/src/lexer.zig b/src/lexer.zig deleted file mode 100644 index ad9b440..0000000 --- a/src/lexer.zig +++ /dev/null @@ -1,158 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; - -const fs = @import("fs.zig"); -const parser = @import("parser.zig"); - -pub const TokenTypeMap = blk: { - var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined; - - result[@intFromEnum(TokenId.identifier)] = Identifier; - result[@intFromEnum(TokenId.operator)] = Operator; - result[@intFromEnum(TokenId.number)] = Number; - - break :blk result; -}; - -pub const Identifier = parser.Node; - -pub const TokenId = enum { - identifier, - operator, - number, -}; - -pub const Operator = enum(u8) { - left_parenthesis = '(', - right_parenthesis = ')', - left_brace = '{', - right_brace = '}', - equal = '=', - colon = ':', - semicolon = ';', -}; - -pub const Number = struct { - content: union(enum) { - float: f64, - integer: Integer, - }, - - const Integer = struct { - value: u64, - is_negative: bool, - }; -}; - -pub const Result = struct { - arrays: struct { - identifier: ArrayList(Identifier), - operator: ArrayList(Operator), - number: ArrayList(Number), - id: ArrayList(TokenId), - }, - file: []const u8, - time: u64 = 0, - - pub fn free(result: *Result, allocator: Allocator) void { - inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| { - @field(result.arrays, field.name).clearAndFree(allocator); - } - } - - fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void { - // const index = result.arrays.id.items.len; - @field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value); - result.arrays.id.appendAssumeCapacity(token_id); - // log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value }); - } -}; - -pub fn lex(allocator: Allocator, text: []const u8) !Result { - const time_start = std.time.Instant.now() catch unreachable; - - var index: usize = 0; - - var result = Result{ - .arrays = .{ - .identifier = try ArrayList(Identifier).initCapacity(allocator, text.len), - .operator = try ArrayList(Operator).initCapacity(allocator, text.len), - .number = try ArrayList(Number).initCapacity(allocator, text.len), - .id = try ArrayList(TokenId).initCapacity(allocator, text.len), - }, - .file = text, - }; - - defer { - const time_end = std.time.Instant.now() catch unreachable; - result.time = time_end.since(time_start); - } - - while (index < text.len) { - const first_char = text[index]; - switch (first_char) { - 'a'...'z', 'A'...'Z', '_' => { - const start = index; - while (true) { - const ch = text[index]; - if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { - index += 1; - continue; - } - break; - } - - result.appendToken(.identifier, .{ - .left = @intCast(start), - .right = @intCast(index), - .type = .identifier, - }); - }, - '(', ')', '{', '}', '-', '=', ';' => |operator| { - result.appendToken(.operator, @enumFromInt(operator)); - index += 1; - }, - '0'...'9' => { - const start = index; - - while (text[index] >= '0' and text[index] <= '9') { - index += 1; - } - const end = index; - const number_slice = text[start..end]; - const number = try std.fmt.parseInt(u64, number_slice, 10); - result.appendToken(.number, .{ - .content = .{ - .integer = .{ - .value = number, - .is_negative = false, - }, - }, - }); - }, - ' ', '\n', '\r', '\t' => index += 1, - else => |foo| { - index += 1; - std.debug.panic("NI: {c} 0x{x}", .{ foo, foo }); - }, - } - } - - return result; -} - -test "lexer" { - const allocator = std.testing.allocator; - const file_path = fs.first; - const file = try fs.readFile(allocator, file_path); - defer allocator.free(file); - var result = try lex(allocator, file); - defer result.free(allocator); -} diff --git a/src/main.zig b/src/main.zig index 37ce80e..ce0e6df 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,17 +2,28 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const compiler = @import("compiler.zig"); +const Compilation = @import("Compilation.zig"); const fs = @import("fs.zig"); pub const seed = std.math.maxInt(u64); +const default_src_file = "src/test/main.b"; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - try compiler.cycle(allocator, fs.first); + try singleCompilation(default_src_file); } -test { - _ = compiler; +fn singleCompilation(main_file_path: []const u8) !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + const compilation = try Compilation.init(gpa.allocator()); + defer compilation.deinit(); + + try compilation.compileModule(.{ + .main_package_path = main_file_path, + }); +} + +test "basic" { + try singleCompilation(default_src_file); } diff --git a/src/parser.zig b/src/parser.zig deleted file mode 100644 index a64c0ed..0000000 --- a/src/parser.zig +++ /dev/null @@ -1,434 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const HashMap = data_structures.HashMap; - -const lexer = @import("lexer.zig"); - -pub const Result = struct { - function_map: ArrayList(lexer.Identifier), - nodes: ArrayList(Node), - - pub fn free(result: *Result, allocator: Allocator) void { - result.functions.clearAndFree(allocator); - } -}; - -pub const Node = packed struct(u64) { - type: Type, - left: Node.Index, - right: Node.Index, - - pub const Index = u27; - - pub const Type = enum(u10) { - root = 0, - identifier = 1, - number = 2, - @"return" = 3, - block_one = 4, - function_declaration_no_arguments = 5, - container_declaration = 6, - }; -}; - -const Error = error{ - unexpected_token, - not_implemented, - OutOfMemory, -}; - -pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result { - var parser = Parser{ - .allocator = allocator, - .nodes = ArrayList(Node){}, - .function_map = ArrayList(lexer.Identifier){}, - .lexer = .{ - .result = lexer_result, - }, - }; - errdefer parser.free(); - - const node_index = try parser.appendNode(Node{ - .type = .root, - .left = 0, - .right = 0, - }); - _ = node_index; - - const members = try parser.parseContainerMembers(); - _ = members; - - return Result{ - .function_map = parser.function_map, - .nodes = parser.nodes, - }; -} - -const ExpressionMutabilityQualifier = enum { - @"const", - @"var", -}; - -const Keyword = enum { - @"return", - @"fn", -}; - -const PeekResult = union(lexer.TokenId) { - identifier: lexer.Identifier, - operator: lexer.Operator, - number: lexer.Number, -}; - -const Lexer = struct { - result: *const lexer.Result, - indices: struct { - identifier: u32 = 0, - operator: u32 = 0, - number: u32 = 0, - id: u32 = 0, - } = .{}, - - fn hasTokens(l: *const Lexer) bool { - return l.indices.id < l.result.arrays.id.items.len; - } - - fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 { - assert(l.isCurrentToken(token_id)); - return @field(l.indices, @tagName(token_id)); - } - - fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void { - assert(l.isCurrentToken(token_id)); - l.indices.id += 1; - const index_ptr = &@field(l.indices, @tagName(token_id)); - const index = index_ptr.*; - const token_value = @field(l.result.arrays, @tagName(token_id)).items[index]; - log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value }); - - index_ptr.* += 1; - } - - fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool { - return l.result.arrays.id.items[l.indices.id] == token_id; - } - - fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 { - comptime { - assert(lexer.Identifier == Node); - } - - assert(identifier.type == .identifier); - - return l.result.file[identifier.left..][0 .. identifier.right - identifier.left]; - } - - fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => |token| blk: { - l.consume(expected_token_id); - break :blk token; - }, - else => error.not_implemented, - }; - } - - fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => blk: { - const index = l.currentTokenIndex(expected_token_id); - l.consume(expected_token_id); - break :blk index; - }, - else => error.not_implemented, - }; - } - - fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void { - const peek_result = l.peek() orelse return error.not_implemented; - switch (peek_result) { - expected_token_id => |token| { - if (expected_token != token) { - return error.not_implemented; - } - - l.consume(expected_token_id); - }, - else => |token| { - std.debug.panic("{s}", .{@tagName(token)}); - }, - } - } - - fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool { - return switch (l.peek() orelse unreachable) { - .operator => |operator| { - const result = operator == expected_operator; - if (result) { - l.consume(.operator); - } - return result; - }, - else => false, - }; - } - - fn peek(l: *const Lexer) ?PeekResult { - if (l.indices.id >= l.result.arrays.id.items.len) { - return null; - } - - return switch (l.result.arrays.id.items[l.indices.id]) { - inline else => |token| blk: { - const tag = @tagName(token); - const index = @field(l.indices, tag); - const array = &@field(l.result.arrays, tag); - - break :blk @unionInit(PeekResult, tag, array.items[index]); - }, - }; - } -}; - -const Parser = struct { - lexer: Lexer, - nodes: ArrayList(Node), - function_map: ArrayList(lexer.Identifier), - allocator: Allocator, - - fn appendNode(parser: *Parser, node: Node) !Node.Index { - const index = parser.nodes.items.len; - try parser.nodes.append(parser.allocator, node); - return @intCast(index); - } - - fn getNode(parser: *Parser, node_index: Node.Index) *Node { - return &parser.nodes.items[node_index]; - } - - fn free(parser: *Parser) void { - _ = parser; - } - - fn parseTypeExpression(parser: *Parser) !Node.Index { - // TODO: make this decent - return switch (parser.lexer.peek() orelse unreachable) { - .identifier => parser.nodeFromToken(.identifier), - else => unreachable, - }; - } - - fn parseFunctionDeclaration(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_parenthesis); - while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) { - return error.not_implemented; - } - - const t = try parser.parseTypeExpression(); - const function_declaration = try parser.appendNode(.{ - .type = .function_declaration_no_arguments, - .left = t, - .right = try parser.parseBlock(), - }); - return function_declaration; - } - - fn parseBlock(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_brace); - - var statements = ArrayList(Node.Index){}; - - while (!parser.lexer.maybeExpectOperator(.right_brace)) { - const statement = try parser.parseStatement(); - try statements.append(parser.allocator, statement); - } - - const node: Node = switch (statements.items.len) { - 0 => unreachable, - 1 => .{ - .type = .block_one, - .left = statements.items[0], - .right = 0, - }, - else => unreachable, - }; - log.debug("Parsed block!", .{}); - return parser.appendNode(node); - } - - fn parseStatement(parser: *Parser) !Node.Index { - // TODO: more stuff before - const expression = try parser.parseAssignExpression(); - try parser.lexer.expectSpecificToken(.operator, .semicolon); - - return expression; - } - - fn parseAssignExpression(parser: *Parser) !Node.Index { - const expression = try parser.parseExpression(); - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => return expression, - else => unreachable, - }, - else => unreachable, - } - - return error.not_implemented; - } - - fn parseExpression(parser: *Parser) Error!Node.Index { - return parser.parseExpressionPrecedence(0); - } - - fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index { - var expr_index = try parser.parsePrefixExpression(); - log.debug("Expr index: {}", .{expr_index}); - - var banned_precedence: i32 = -1; - while (parser.lexer.hasTokens()) { - const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => -1, - else => @panic(@tagName(operator)), - }, - else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), - }; - - if (precedence < minimum_precedence) { - break; - } - - if (precedence == banned_precedence) { - unreachable; - } - - const node_index = try parser.parseExpressionPrecedence(1); - _ = node_index; - - unreachable; - } - - log.err("Parsed expression precedence", .{}); - - return expr_index; - } - - fn parsePrefixExpression(parser: *Parser) !Node.Index { - switch (parser.lexer.peek() orelse unreachable) { - // .bang => .bool_not, - // .minus => .negation, - // .tilde => .bit_not, - // .minus_percent => .negation_wrap, - // .ampersand => .address_of, - // .keyword_try => .@"try", - // .keyword_await => .@"await", - - else => |pref| { - log.err("Pref: {s}", .{@tagName(pref)}); - return parser.parsePrimaryExpression(); - }, - } - - return error.not_implemented; - } - - fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index { - const node = try parser.appendNode(.{ - .type = @field(Node.Type, @tagName(token_id)), - .left = @intCast(parser.lexer.currentTokenIndex(token_id)), - .right = 0, - }); - parser.lexer.consume(token_id); - - return node; - } - - fn parsePrimaryExpression(parser: *Parser) !Node.Index { - const result = switch (parser.lexer.peek() orelse unreachable) { - .number => try parser.nodeFromToken(.number), - .identifier => |identifier| { - const identifier_name = parser.lexer.getIdentifier(identifier); - inline for (@typeInfo(Keyword).Enum.fields) |keyword| { - if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) { - .@"return" => blk: { - parser.lexer.consume(.identifier); - const node_ref = try parser.appendNode(.{ - .type = .@"return", - .left = try parser.parseExpression(), - .right = 0, - }); - break :blk node_ref; - }, - .@"fn" => blk: { - parser.lexer.consume(.identifier); - // TODO: figure out name association - break :blk try parser.parseFunctionDeclaration(); - }, - }; - } - - unreachable; - }, - else => |foo| { - std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo }); - }, - }; - - return result; - } - - fn parseContainerMembers(parser: *Parser) !void { - var container_nodes = ArrayList(Node.Index){}; - while (parser.lexer.hasTokens()) { - const container_node = switch (parser.lexer.peek() orelse unreachable) { - .identifier => |first_identifier_ref| blk: { - parser.lexer.consume(.identifier); - - const first_identifier = parser.lexer.getIdentifier(first_identifier_ref); - - if (std.mem.eql(u8, first_identifier, "comptime")) { - unreachable; - } else { - const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier); - _ = mutability_qualifier; - - const identifier = try parser.appendNode(.{ - .type = .identifier, - .left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)), - .right = 0, - }); - - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .colon => unreachable, - .equal => { - parser.lexer.consume(.operator); - - const expression = try parser.parseExpression(); - break :blk try parser.appendNode(.{ - .type = .container_declaration, - .left = expression, - .right = identifier, - }); - }, - else => unreachable, - }, - else => |foo| std.debug.panic("WTF: {}", .{foo}), - } - } - }, - else => |a| std.debug.panic("{}", .{a}), - }; - - try container_nodes.append(parser.allocator, container_node); - } - } -}; diff --git a/src/test/main.b b/src/test/main.nat similarity index 92% rename from src/test/main.b rename to src/test/main.nat index 8847d3f..157bd8a 100644 --- a/src/test/main.b +++ b/src/test/main.nat @@ -1,3 +1,3 @@ const main = fn() i32 { return 0; -} +};