From 4e99ae0bf7a1efe03c15ddcb28112fb9e0597819 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 6 Sep 2023 15:22:32 -0600 Subject: [PATCH 1/6] rework parser and lexer --- .gitignore | 1 + build.zig | 12 + lib/std/start.nat | 1 + lib/std/std.nat | 4 + src/Compilation.zig | 300 ++++++++++++++++++ src/{ => backend}/emit.zig | 0 src/{ => backend}/ir.zig | 0 src/compiler.zig | 21 -- src/data_structures.zig | 5 +- src/frontend/lexical_analyzer.zig | 120 +++++++ src/frontend/semantic_analyzer.zig | 0 src/frontend/syntactic_analyzer.zig | 474 ++++++++++++++++++++++++++++ src/lexer.zig | 158 ---------- src/main.zig | 23 +- src/parser.zig | 434 ------------------------- src/test/{main.b => main.nat} | 2 +- 16 files changed, 934 insertions(+), 621 deletions(-) create mode 100644 lib/std/start.nat create mode 100644 lib/std/std.nat create mode 100644 src/Compilation.zig rename src/{ => backend}/emit.zig (100%) rename src/{ => backend}/ir.zig (100%) delete mode 100644 src/compiler.zig create mode 100644 src/frontend/lexical_analyzer.zig create mode 100644 src/frontend/semantic_analyzer.zig create mode 100644 src/frontend/syntactic_analyzer.zig delete mode 100644 src/lexer.zig delete mode 100644 src/parser.zig rename src/test/{main.b => main.nat} (92%) diff --git a/.gitignore b/.gitignore index 4c82b07..212371e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ zig-cache zig-out +nat diff --git a/build.zig b/build.zig index dcb261d..453572c 100644 --- a/build.zig +++ b/build.zig @@ -28,6 +28,11 @@ pub fn build(b: *std.Build) void { // standard location when the user invokes the "install" step (the default // step when running `zig build`). b.installArtifact(exe); + b.installDirectory(.{ + .source_dir = std.Build.LazyPath.relative("lib"), + .install_dir = .bin, + .install_subdir = "lib", + }); // This *creates* a Run step in the build graph, to be executed when another // step is evaluated that depends on it. The next line below will establish @@ -62,6 +67,13 @@ pub fn build(b: *std.Build) void { const run_unit_tests = b.addRunArtifact(unit_tests); + const debug_unit_tests_cmd = b.addSystemCommand(&.{"gf2"}); + debug_unit_tests_cmd.addArtifactArg(unit_tests); + debug_unit_tests_cmd.addArgs(&.{ "-ex", "r" }); + + const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); + debug_test_step.dependOn(&debug_unit_tests_cmd.step); + // Similar to creating the run step earlier, this exposes a `test` step to // the `zig build --help` menu, providing a way for the user to request // running the unit tests. diff --git a/lib/std/start.nat b/lib/std/start.nat new file mode 100644 index 0000000..8661ac4 --- /dev/null +++ b/lib/std/start.nat @@ -0,0 +1 @@ +const builtin = #import("builtin"); diff --git a/lib/std/std.nat b/lib/std/std.nat new file mode 100644 index 0000000..3205fe7 --- /dev/null +++ b/lib/std/std.nat @@ -0,0 +1,4 @@ +const start = #import("start.nat"); +comptime { + _ = start; +} diff --git a/src/Compilation.zig b/src/Compilation.zig new file mode 100644 index 0000000..ed8421a --- /dev/null +++ b/src/Compilation.zig @@ -0,0 +1,300 @@ +const Compilation = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; + +const Allocator = std.mem.Allocator; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const StringHashMap = data_structures.StringHashMap; +const StringArrayHashMap = data_structures.StringArrayHashMap; + +const lexical_analyzer = @import("frontend/lexical_analyzer.zig"); +const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); +const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); + +base_allocator: Allocator, +cwd_absolute_path: []const u8, +directory_absolute_path: []const u8, +executable_absolute_path: []const u8, +build_directory: std.fs.Dir, + +const cache_dir_name = "cache"; +const installation_dir_name = "installation"; + +pub fn init(allocator: Allocator) !*Compilation { + const compilation: *Compilation = try allocator.create(Compilation); + + const self_exe_path = try std.fs.selfExePathAlloc(allocator); + const self_exe_dir_path = std.fs.path.dirname(self_exe_path).?; + compilation.* = .{ + .base_allocator = allocator, + .cwd_absolute_path = try realpathAlloc(allocator, "."), + .executable_absolute_path = self_exe_path, + .directory_absolute_path = self_exe_dir_path, + .build_directory = try std.fs.cwd().makeOpenPath("nat", .{}), + }; + + try compilation.build_directory.makePath(cache_dir_name); + try compilation.build_directory.makePath(installation_dir_name); + + return compilation; +} + +pub fn deinit(compilation: *Compilation) void { + const allocator = compilation.base_allocator; + allocator.free(compilation.cwd_absolute_path); + allocator.free(compilation.executable_absolute_path); + allocator.destroy(compilation); +} + +pub const Module = struct { + main_package: *Package, + import_table: StringArrayHashMap(*File) = .{}, + + pub const Descriptor = struct { + main_package_path: []const u8, + }; + + fn deinit(module: *Module, allocator: Allocator) void { + defer allocator.destroy(module); + + for (module.import_table.values()) |file| { + file.deinit(allocator); + } + + var iterator = module.main_package.dependencies.valueIterator(); + while (iterator.next()) |it| { + const package = it.*; + package.deinit(allocator); + } + + module.main_package.deinit(allocator); + + module.import_table.clearAndFree(allocator); + } + + fn importPackage(module: *Module, compilation: *Compilation, package: *Package) !ImportPackageResult { + const lookup_result = try module.import_table.getOrPut(compilation.base_allocator, package.directory.path); + errdefer _ = module.import_table.pop(); + if (lookup_result.found_existing) { + const file: *File = lookup_result.value_ptr.*; + try file.addPackageReference(compilation.base_allocator, package); + unreachable; + } + const file = try compilation.base_allocator.create(File); + lookup_result.value_ptr.* = file; + file.* = File{ + .relative_path = package.source_path, + .package = package, + }; + try file.addPackageReference(compilation.base_allocator, package); + + return .{ + .file = file, + .is_new = true, + }; + } + + fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { + _ = module; + const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); + defer source_file.close(); + + const file_size = try source_file.getEndPos(); + var file_buffer = try allocator.alloc(u8, file_size); + + const read_byte_count = try source_file.readAll(file_buffer); + assert(read_byte_count == file_size); + + //TODO: adjust file maximum size + file.source_code = file_buffer[0..read_byte_count]; + file.status = .loaded_into_memory; + + try file.lex(allocator); + try file.parse(allocator); + } +}; + +fn pathFromCwd(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.cwd_absolute_path, relative_path }); +} + +fn pathFromCompiler(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.directory_absolute_path, relative_path }); +} + +fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]const u8 { + var path_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const realpathInStack = try std.os.realpath(pathname, &path_buffer); + return allocator.dupe(u8, realpathInStack); +} + +pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !void { + // TODO: generate an actual file + const builtin_file_name = "builtin.nat"; + var cache_dir = try compilation.build_directory.openDir("cache", .{}); + const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); + builtin_file.close(); + + const module: *Module = try compilation.base_allocator.create(Module); + defer module.deinit(compilation.base_allocator); + module.* = Module{ + .main_package = blk: { + const result = try compilation.base_allocator.create(Package); + const main_package_absolute_directory_path = try compilation.pathFromCwd(std.fs.path.dirname(descriptor.main_package_path).?); + result.* = .{ + .directory = .{ + .handle = try std.fs.openDirAbsolute(main_package_absolute_directory_path, .{}), + .path = main_package_absolute_directory_path, + }, + .source_path = try compilation.base_allocator.dupe(u8, std.fs.path.basename(descriptor.main_package_path)), + }; + break :blk result; + }, + }; + + const std_package_dir = "lib/std"; + const package_descriptors = [2]struct { + name: []const u8, + directory_path: []const u8, + }{ + .{ + .name = "std", + .directory_path = try switch (@import("builtin").is_test) { + true => compilation.pathFromCwd(std_package_dir), + false => compilation.pathFromCompiler(std_package_dir), + }, + }, + .{ + .name = "builtin", + .directory_path = blk: { + const result = try cache_dir.realpathAlloc(compilation.base_allocator, "."); + cache_dir.close(); + break :blk result; + }, + }, + }; + + for (package_descriptors) |package_descriptor| { + const package = try compilation.base_allocator.create(Package); + package.* = .{ + .directory = .{ + .path = package_descriptor.directory_path, + .handle = try std.fs.openDirAbsolute(package_descriptor.directory_path, .{}), + }, + .source_path = try std.mem.concat(compilation.base_allocator, u8, &.{ package_descriptor.name, ".nat" }), + }; + + try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package); + } + + assert(module.main_package.dependencies.size == 2); + + _ = try module.importPackage(compilation, module.main_package.dependencies.get("std").?); + + for (module.import_table.values()) |import| { + try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); + } +} + +const ImportPackageResult = struct { + file: *File, + is_new: bool, +}; + +fn generateAST() !void {} + +pub const Directory = struct { + handle: std.fs.Dir, + path: []const u8, +}; + +pub const Package = struct { + directory: Directory, + /// Relative to the package main directory + source_path: []const u8, + dependencies: StringHashMap(*Package) = .{}, + + fn addDependency(package: *Package, allocator: Allocator, package_name: []const u8, new_dependency: *Package) !void { + try package.dependencies.ensureUnusedCapacity(allocator, 1); + package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency); + } + + fn deinit(package: *Package, allocator: Allocator) void { + if (package.dependencies.size > 0) { + assert(package.dependencies.size == 2); + } + package.dependencies.clearAndFree(allocator); + allocator.free(package.source_path); + allocator.free(package.directory.path); + package.directory.handle.close(); + allocator.destroy(package); + } +}; + +pub const File = struct { + status: Status = .not_loaded, + source_code: []const u8 = &.{}, + lexical_analyzer_result: lexical_analyzer.Result = undefined, + syntactic_analyzer_result: syntactic_analyzer.Result = undefined, + package_references: ArrayList(*Package) = .{}, + relative_path: []const u8, + package: *Package, + + const Status = enum { + not_loaded, + loaded_into_memory, + lexed, + parsed, + }; + + fn addPackageReference(file: *File, allocator: Allocator, package: *Package) !void { + for (file.package_references.items) |other| { + if (other == package) return; + } + + try file.package_references.insert(allocator, 0, package); + } + + pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { + const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); + _ = file_content; + const file = try allocator.create(File); + file.* = File{}; + + return file; + } + + fn lex(file: *File, allocator: Allocator) !void { + assert(file.status == .loaded_into_memory); + file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); + if (!@import("builtin").is_test) { + print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); + } + file.status = .lexed; + } + + fn parse(file: *File, allocator: Allocator) !void { + assert(file.status == .lexed); + file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); + if (!@import("builtin").is_test) { + print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); + } + file.status = .parsed; + } + + fn deinit(file: *File, allocator: Allocator) void { + defer allocator.destroy(file); + if (file.status == .parsed) { + file.syntactic_analyzer_result.free(allocator); + file.lexical_analyzer_result.free(allocator); + file.package_references.clearAndFree(allocator); + allocator.free(file.source_code); + } else { + unreachable; + } + } +}; diff --git a/src/emit.zig b/src/backend/emit.zig similarity index 100% rename from src/emit.zig rename to src/backend/emit.zig diff --git a/src/ir.zig b/src/backend/ir.zig similarity index 100% rename from src/ir.zig rename to src/backend/ir.zig diff --git a/src/compiler.zig b/src/compiler.zig deleted file mode 100644 index b7c8214..0000000 --- a/src/compiler.zig +++ /dev/null @@ -1,21 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -const data_structures = @import("data_structures.zig"); - -const lexer = @import("lexer.zig"); -const parser = @import("parser.zig"); - -test { - _ = lexer; - _ = parser; -} - -pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void { - const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); - std.debug.print("File:\n\n```\n{s}\n```\n", .{file}); - const lexer_result = try lexer.lex(allocator, file); - const parser_result = try parser.parse(allocator, &lexer_result); - _ = parser_result; -} diff --git a/src/data_structures.zig b/src/data_structures.zig index 58fbed7..f6a4bb1 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,4 +1,7 @@ const std = @import("std"); +pub const Allocator = std.mem.Allocator; pub const ArrayList = std.ArrayListUnmanaged; -pub const HashMap = std.AutoHashMap; +pub const HashMap = std.AutoHashMapUnmanaged; +pub const StringHashMap = std.StringHashMapUnmanaged; +pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged; diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig new file mode 100644 index 0000000..e485a5c --- /dev/null +++ b/src/frontend/lexical_analyzer.zig @@ -0,0 +1,120 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const log = std.log; + +const equal = std.mem.eql; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; + +const Compilation = @import("../Compilation.zig"); +const fs = @import("../fs.zig"); + +pub const Token = packed struct(u64) { + start: u32, + len: u24, + id: Id, + + pub const Id = enum(u8) { + identifier = 0, + number = 1, + string_literal = 2, + left_parenthesis = '(', + right_parenthesis = ')', + left_brace = '{', + right_brace = '}', + equal = '=', + colon = ':', + semicolon = ';', + hash = '#', + comma = ',', + bang = '!', + }; +}; + +pub const Result = struct { + tokens: ArrayList(Token), + time: u64, + + pub fn free(result: *Result, allocator: Allocator) void { + result.tokens.clearAndFree(allocator); + } +}; + +pub fn analyze(allocator: Allocator, text: []const u8) !Result { + const time_start = std.time.Instant.now() catch unreachable; + var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); + var index: usize = 0; + + while (index < text.len) { + const start_index = index; + const start_character = text[index]; + const token_id: Token.Id = switch (start_character) { + 'a'...'z', 'A'...'Z', '_' => blk: { + while (true) { + const ch = text[index]; + if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { + index += 1; + continue; + } + + break; + } + + break :blk .identifier; + }, + '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { + index += 1; + break :blk @enumFromInt(operator); + }, + '0'...'9' => blk: { + while (text[index] >= '0' and text[index] <= '9') { + index += 1; + } + + break :blk .number; + }, + '"' => blk: { + index += 1; + while (text[index] != '"') { + index += 1; + } + + index += 1; + + break :blk .string_literal; + }, + ' ', '\n', '\r', '\t' => { + index += 1; + continue; + }, + else => |foo| { + std.debug.panic("NI: '{c}'", .{foo}); + }, + }; + + const end_index = index; + + try tokens.append(allocator, .{ + .start = @intCast(start_index), + .len = @intCast(end_index - start_index), + .id = token_id, + }); + } + + const should_log = false; + if (should_log) { + for (tokens.items, 0..) |token, i| { + std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); + } + } + + const time_end = std.time.Instant.now() catch unreachable; + const time = time_end.since(time_start); + + return .{ + .tokens = tokens, + .time = time, + }; +} diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig new file mode 100644 index 0000000..54ae1ac --- /dev/null +++ b/src/frontend/syntactic_analyzer.zig @@ -0,0 +1,474 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const log = std.log; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.HashMap; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +pub const Result = struct { + nodes: ArrayList(Node), + time: u64, + + pub fn free(result: *Result, allocator: Allocator) void { + result.nodes.clearAndFree(allocator); + } +}; + +pub const Node = packed struct(u96) { + token: u32, + id: Id, + left: Node.Index, + right: Node.Index, + + pub const Index = u27; + + pub const Range = struct { + start: u32, + end: u32, + }; + + pub const Id = enum(u10) { + main = 0, + identifier = 1, + number = 2, + @"return" = 3, + block_one = 4, + function_declaration_no_arguments = 5, + container_declaration = 6, + string_literal = 7, + compiler_intrinsic_one = 8, + simple_variable_declaration = 9, + assign = 10, + @"comptime" = 11, + }; +}; + +const Error = error{ + unexpected_token, + not_implemented, + OutOfMemory, +}; + +const Analyzer = struct { + tokens: []const Token, + token_i: u32 = 0, + nodes: ArrayList(Node) = .{}, + file: []const u8, + allocator: Allocator, + temporal_node_heap: ArrayList(Node.Index) = .{}, + + fn free(analyzer: *Analyzer) void { + _ = analyzer; + } + + fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { + if (analyzer.tokens[analyzer.token_i].id == token_id) { + const result = analyzer.token_i; + analyzer.token_i += 1; + return result; + } else { + return error.unexpected_token; + } + } + + fn getIdentifier(analyzer: *const Analyzer, token: Token) []const u8 { + assert(token.id == .identifier); + const identifier = analyzer.file[token.start..][0..token.len]; + return identifier; + } + + fn containerMembers(analyzer: *Analyzer) !Members { + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.token_i < analyzer.tokens.len) { + const first = analyzer.token_i; + const member_node: Node = switch (analyzer.tokens[first].id) { + .identifier => blk: { + const first_identifier_token = analyzer.tokens[first]; + analyzer.token_i += 1; + + const identifier = analyzer.getIdentifier(first_identifier_token); + + if (equal(u8, identifier, "comptime")) { + switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => { + const comptime_block = try analyzer.block(); + + break :blk .{ + .id = .@"comptime", + .token = first, + .left = comptime_block, + .right = 0, + }; + }, + else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + } + } else { + const is_const = equal(u8, identifier, "const"); + const is_var = equal(u8, identifier, "var"); + assert(is_const or is_var); + + _ = try analyzer.expectToken(.identifier); + + // TODO: type + _ = try analyzer.expectToken(.equal); + + // TODO: do this in a function + const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => unreachable, + .hash => try analyzer.compilerIntrinsic(), + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = 0; + const top_level_decl = .{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + break :blk top_level_decl; + } + }, + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + const member_index = try analyzer.addNode(member_node); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_index); + } + + const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const members: Members = switch (members_array.len) { + 2 => .{ + .len = 2, + .left = members_array[0], + .right = members_array[1], + }, + else => |len| std.debug.panic("Len: {}", .{len}), + }; + + return members; + } + + fn block(analyzer: *Analyzer) !Node.Index { + const left_brace = try analyzer.expectToken(.left_brace); + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const statement_index = try analyzer.statement(); + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); + } + _ = try analyzer.expectToken(.right_brace); + + const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const node: Node = switch (statement_array.len) { + 1 => .{ + .id = .block_one, + .token = left_brace, + .left = statement_array[0], + .right = 0, + }, + else => |len| std.debug.panic("len: {}", .{len}), + }; + return analyzer.addNode(node); + } + + fn statement(analyzer: *Analyzer) !Node.Index { + // TODO: more stuff before + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + + return result; + } + + fn assignExpression(analyzer: *Analyzer) !Node.Index { + const expr = try analyzer.expression(); + const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon => return expr, + .equal => .assign, + else => unreachable, + }; + + return analyzer.addNode(.{ + .id = expression_id, + .token = blk: { + const token_i = analyzer.token_i; + analyzer.token_i += 1; + break :blk token_i; + }, + .left = expr, + .right = try analyzer.expression(), + }); + } + + fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { + const hash = try analyzer.expectToken(.hash); + _ = try analyzer.expectToken(.identifier); + _ = try analyzer.expectToken(.left_parenthesis); + + const temporal_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(temporal_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try analyzer.temporal_node_heap.append(analyzer.allocator, parameter); + + switch (analyzer.tokens[analyzer.token_i].id) { + .comma => analyzer.token_i += 1, + .right_parenthesis => continue, + else => unreachable, + } + } + + // Consume the right parenthesis + analyzer.token_i += 1; + + const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; + + return switch (parameters.len) { + 1 => analyzer.addNode(.{ + .id = .compiler_intrinsic_one, + .token = hash, + .left = parameters[0], + .right = 0, + }), + else => unreachable, + }; + } + + fn expression(analyzer: *Analyzer) !Node.Index { + return analyzer.expressionPrecedence(0); + } + + fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { + var result = try analyzer.prefixExpression(); + + var banned_precedence: i32 = -1; + + while (analyzer.token_i < analyzer.tokens.len) { + const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { + .equal, .semicolon, .right_parenthesis => -1, + else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), + }; + + if (precedence < minimum_precedence) { + break; + } + + if (precedence == banned_precedence) { + break; + } + + // TODO: fix this + const node_index = try analyzer.expressionPrecedence(1); + _ = node_index; + unreachable; + } + + return result; + } + + fn prefixExpression(analyzer: *Analyzer) !Node.Index { + switch (analyzer.tokens[analyzer.token_i].id) { + // .bang => .bool_not, + // .minus => .negation, + // .tilde => .bit_not, + // .minus_percent => .negation_wrap, + // .ampersand => .address_of, + // .keyword_try => .@"try", + // .keyword_await => .@"await", + + else => |pref| { + _ = pref; + return analyzer.primaryExpression(); + }, + } + + return error.not_implemented; + } + + fn primaryExpression(analyzer: *Analyzer) !Node.Index { + const result = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => unreachable, + else => try analyzer.curlySuffixExpression(), + }, + .string_literal => try analyzer.curlySuffixExpression(), + else => |id| { + log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); + unreachable; + }, + }; + + return result; + } + + fn curlySuffixExpression(analyzer: *Analyzer) !Node.Index { + const left = try analyzer.typeExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => unreachable, + else => left, + }; + } + + fn typeExpression(analyzer: *Analyzer) !Node.Index { + return switch (analyzer.tokens[analyzer.token_i].id) { + .string_literal, .identifier => try analyzer.errorUnionExpression(), + else => |id| blk: { + log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); + + const result = try analyzer.errorUnionExpression(); + + break :blk result; + }, + }; + } + + fn errorUnionExpression(analyzer: *Analyzer) !Node.Index { + const suffix_expression = try analyzer.suffixExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .bang => unreachable, + else => suffix_expression, + }; + } + + fn suffixExpression(analyzer: *Analyzer) !Node.Index { + var result = try analyzer.primaryTypeExpression(); + + while (true) { + if (analyzer.suffixOperator()) |_| { + unreachable; + } else { + if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { + unreachable; + } else { + return result; + } + } + } + + unreachable; + } + + fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + return switch (analyzer.tokens[token_i].id) { + .string_literal => blk: { + analyzer.token_i += 1; + break :blk analyzer.addNode(.{ + .id = .string_literal, + .token = token_i, + .left = 0, + .right = 0, + }); + }, + .identifier => switch (analyzer.tokens[token_i + 1].id) { + .colon => unreachable, + else => analyzer.addNode(.{ + .id = .identifier, + .token = blk: { + analyzer.token_i += 1; + break :blk token_i; + }, + .left = 0, + .right = 0, + }), + }, + else => |foo| { + switch (foo) { + .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), + else => std.debug.panic("{s}", .{@tagName(foo)}), + } + }, + }; + } + + // TODO: + fn suffixOperator(analyzer: *Analyzer) ?bool { + _ = analyzer; + + return null; + } + + fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { + const index = analyzer.nodes.items.len; + try analyzer.nodes.append(analyzer.allocator, node); + return @intCast(index); + } +}; + +const Members = struct { + len: usize, + left: Node.Index, + right: Node.Index, + + pub fn toRange(members: Members) Node.Range { + return switch (members.len) { + 0 => unreachable, + 1 => .{ + .start = members.left, + .end = members.left, + }, + 2 => .{ + .start = members.left, + .end = members.right, + }, + else => unreachable, + }; + } +}; + +pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { + const start = std.time.Instant.now() catch unreachable; + var analyzer = Analyzer{ + .tokens = tokens, + .file = file, + .allocator = allocator, + }; + errdefer analyzer.free(); + const node_index = try analyzer.addNode(.{ + .id = .main, + .token = 0, + .left = 0, + .right = 0, + }); + + assert(node_index == 0); + const members = try analyzer.containerMembers(); + const member_range = members.toRange(); + analyzer.nodes.items[0].left = @intCast(member_range.start); + analyzer.nodes.items[0].right = @intCast(member_range.end); + + const end = std.time.Instant.now() catch unreachable; + + analyzer.temporal_node_heap.clearAndFree(allocator); + + return .{ + .nodes = analyzer.nodes, + .time = end.since(start), + }; +} + +const ExpressionMutabilityQualifier = enum { + @"const", + @"var", +}; + +const Keyword = enum { + @"return", + @"fn", +}; diff --git a/src/lexer.zig b/src/lexer.zig deleted file mode 100644 index ad9b440..0000000 --- a/src/lexer.zig +++ /dev/null @@ -1,158 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; - -const fs = @import("fs.zig"); -const parser = @import("parser.zig"); - -pub const TokenTypeMap = blk: { - var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined; - - result[@intFromEnum(TokenId.identifier)] = Identifier; - result[@intFromEnum(TokenId.operator)] = Operator; - result[@intFromEnum(TokenId.number)] = Number; - - break :blk result; -}; - -pub const Identifier = parser.Node; - -pub const TokenId = enum { - identifier, - operator, - number, -}; - -pub const Operator = enum(u8) { - left_parenthesis = '(', - right_parenthesis = ')', - left_brace = '{', - right_brace = '}', - equal = '=', - colon = ':', - semicolon = ';', -}; - -pub const Number = struct { - content: union(enum) { - float: f64, - integer: Integer, - }, - - const Integer = struct { - value: u64, - is_negative: bool, - }; -}; - -pub const Result = struct { - arrays: struct { - identifier: ArrayList(Identifier), - operator: ArrayList(Operator), - number: ArrayList(Number), - id: ArrayList(TokenId), - }, - file: []const u8, - time: u64 = 0, - - pub fn free(result: *Result, allocator: Allocator) void { - inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| { - @field(result.arrays, field.name).clearAndFree(allocator); - } - } - - fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void { - // const index = result.arrays.id.items.len; - @field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value); - result.arrays.id.appendAssumeCapacity(token_id); - // log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value }); - } -}; - -pub fn lex(allocator: Allocator, text: []const u8) !Result { - const time_start = std.time.Instant.now() catch unreachable; - - var index: usize = 0; - - var result = Result{ - .arrays = .{ - .identifier = try ArrayList(Identifier).initCapacity(allocator, text.len), - .operator = try ArrayList(Operator).initCapacity(allocator, text.len), - .number = try ArrayList(Number).initCapacity(allocator, text.len), - .id = try ArrayList(TokenId).initCapacity(allocator, text.len), - }, - .file = text, - }; - - defer { - const time_end = std.time.Instant.now() catch unreachable; - result.time = time_end.since(time_start); - } - - while (index < text.len) { - const first_char = text[index]; - switch (first_char) { - 'a'...'z', 'A'...'Z', '_' => { - const start = index; - while (true) { - const ch = text[index]; - if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { - index += 1; - continue; - } - break; - } - - result.appendToken(.identifier, .{ - .left = @intCast(start), - .right = @intCast(index), - .type = .identifier, - }); - }, - '(', ')', '{', '}', '-', '=', ';' => |operator| { - result.appendToken(.operator, @enumFromInt(operator)); - index += 1; - }, - '0'...'9' => { - const start = index; - - while (text[index] >= '0' and text[index] <= '9') { - index += 1; - } - const end = index; - const number_slice = text[start..end]; - const number = try std.fmt.parseInt(u64, number_slice, 10); - result.appendToken(.number, .{ - .content = .{ - .integer = .{ - .value = number, - .is_negative = false, - }, - }, - }); - }, - ' ', '\n', '\r', '\t' => index += 1, - else => |foo| { - index += 1; - std.debug.panic("NI: {c} 0x{x}", .{ foo, foo }); - }, - } - } - - return result; -} - -test "lexer" { - const allocator = std.testing.allocator; - const file_path = fs.first; - const file = try fs.readFile(allocator, file_path); - defer allocator.free(file); - var result = try lex(allocator, file); - defer result.free(allocator); -} diff --git a/src/main.zig b/src/main.zig index 37ce80e..ce0e6df 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,17 +2,28 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const compiler = @import("compiler.zig"); +const Compilation = @import("Compilation.zig"); const fs = @import("fs.zig"); pub const seed = std.math.maxInt(u64); +const default_src_file = "src/test/main.b"; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - try compiler.cycle(allocator, fs.first); + try singleCompilation(default_src_file); } -test { - _ = compiler; +fn singleCompilation(main_file_path: []const u8) !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + const compilation = try Compilation.init(gpa.allocator()); + defer compilation.deinit(); + + try compilation.compileModule(.{ + .main_package_path = main_file_path, + }); +} + +test "basic" { + try singleCompilation(default_src_file); } diff --git a/src/parser.zig b/src/parser.zig deleted file mode 100644 index a64c0ed..0000000 --- a/src/parser.zig +++ /dev/null @@ -1,434 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const HashMap = data_structures.HashMap; - -const lexer = @import("lexer.zig"); - -pub const Result = struct { - function_map: ArrayList(lexer.Identifier), - nodes: ArrayList(Node), - - pub fn free(result: *Result, allocator: Allocator) void { - result.functions.clearAndFree(allocator); - } -}; - -pub const Node = packed struct(u64) { - type: Type, - left: Node.Index, - right: Node.Index, - - pub const Index = u27; - - pub const Type = enum(u10) { - root = 0, - identifier = 1, - number = 2, - @"return" = 3, - block_one = 4, - function_declaration_no_arguments = 5, - container_declaration = 6, - }; -}; - -const Error = error{ - unexpected_token, - not_implemented, - OutOfMemory, -}; - -pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result { - var parser = Parser{ - .allocator = allocator, - .nodes = ArrayList(Node){}, - .function_map = ArrayList(lexer.Identifier){}, - .lexer = .{ - .result = lexer_result, - }, - }; - errdefer parser.free(); - - const node_index = try parser.appendNode(Node{ - .type = .root, - .left = 0, - .right = 0, - }); - _ = node_index; - - const members = try parser.parseContainerMembers(); - _ = members; - - return Result{ - .function_map = parser.function_map, - .nodes = parser.nodes, - }; -} - -const ExpressionMutabilityQualifier = enum { - @"const", - @"var", -}; - -const Keyword = enum { - @"return", - @"fn", -}; - -const PeekResult = union(lexer.TokenId) { - identifier: lexer.Identifier, - operator: lexer.Operator, - number: lexer.Number, -}; - -const Lexer = struct { - result: *const lexer.Result, - indices: struct { - identifier: u32 = 0, - operator: u32 = 0, - number: u32 = 0, - id: u32 = 0, - } = .{}, - - fn hasTokens(l: *const Lexer) bool { - return l.indices.id < l.result.arrays.id.items.len; - } - - fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 { - assert(l.isCurrentToken(token_id)); - return @field(l.indices, @tagName(token_id)); - } - - fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void { - assert(l.isCurrentToken(token_id)); - l.indices.id += 1; - const index_ptr = &@field(l.indices, @tagName(token_id)); - const index = index_ptr.*; - const token_value = @field(l.result.arrays, @tagName(token_id)).items[index]; - log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value }); - - index_ptr.* += 1; - } - - fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool { - return l.result.arrays.id.items[l.indices.id] == token_id; - } - - fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 { - comptime { - assert(lexer.Identifier == Node); - } - - assert(identifier.type == .identifier); - - return l.result.file[identifier.left..][0 .. identifier.right - identifier.left]; - } - - fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => |token| blk: { - l.consume(expected_token_id); - break :blk token; - }, - else => error.not_implemented, - }; - } - - fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => blk: { - const index = l.currentTokenIndex(expected_token_id); - l.consume(expected_token_id); - break :blk index; - }, - else => error.not_implemented, - }; - } - - fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void { - const peek_result = l.peek() orelse return error.not_implemented; - switch (peek_result) { - expected_token_id => |token| { - if (expected_token != token) { - return error.not_implemented; - } - - l.consume(expected_token_id); - }, - else => |token| { - std.debug.panic("{s}", .{@tagName(token)}); - }, - } - } - - fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool { - return switch (l.peek() orelse unreachable) { - .operator => |operator| { - const result = operator == expected_operator; - if (result) { - l.consume(.operator); - } - return result; - }, - else => false, - }; - } - - fn peek(l: *const Lexer) ?PeekResult { - if (l.indices.id >= l.result.arrays.id.items.len) { - return null; - } - - return switch (l.result.arrays.id.items[l.indices.id]) { - inline else => |token| blk: { - const tag = @tagName(token); - const index = @field(l.indices, tag); - const array = &@field(l.result.arrays, tag); - - break :blk @unionInit(PeekResult, tag, array.items[index]); - }, - }; - } -}; - -const Parser = struct { - lexer: Lexer, - nodes: ArrayList(Node), - function_map: ArrayList(lexer.Identifier), - allocator: Allocator, - - fn appendNode(parser: *Parser, node: Node) !Node.Index { - const index = parser.nodes.items.len; - try parser.nodes.append(parser.allocator, node); - return @intCast(index); - } - - fn getNode(parser: *Parser, node_index: Node.Index) *Node { - return &parser.nodes.items[node_index]; - } - - fn free(parser: *Parser) void { - _ = parser; - } - - fn parseTypeExpression(parser: *Parser) !Node.Index { - // TODO: make this decent - return switch (parser.lexer.peek() orelse unreachable) { - .identifier => parser.nodeFromToken(.identifier), - else => unreachable, - }; - } - - fn parseFunctionDeclaration(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_parenthesis); - while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) { - return error.not_implemented; - } - - const t = try parser.parseTypeExpression(); - const function_declaration = try parser.appendNode(.{ - .type = .function_declaration_no_arguments, - .left = t, - .right = try parser.parseBlock(), - }); - return function_declaration; - } - - fn parseBlock(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_brace); - - var statements = ArrayList(Node.Index){}; - - while (!parser.lexer.maybeExpectOperator(.right_brace)) { - const statement = try parser.parseStatement(); - try statements.append(parser.allocator, statement); - } - - const node: Node = switch (statements.items.len) { - 0 => unreachable, - 1 => .{ - .type = .block_one, - .left = statements.items[0], - .right = 0, - }, - else => unreachable, - }; - log.debug("Parsed block!", .{}); - return parser.appendNode(node); - } - - fn parseStatement(parser: *Parser) !Node.Index { - // TODO: more stuff before - const expression = try parser.parseAssignExpression(); - try parser.lexer.expectSpecificToken(.operator, .semicolon); - - return expression; - } - - fn parseAssignExpression(parser: *Parser) !Node.Index { - const expression = try parser.parseExpression(); - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => return expression, - else => unreachable, - }, - else => unreachable, - } - - return error.not_implemented; - } - - fn parseExpression(parser: *Parser) Error!Node.Index { - return parser.parseExpressionPrecedence(0); - } - - fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index { - var expr_index = try parser.parsePrefixExpression(); - log.debug("Expr index: {}", .{expr_index}); - - var banned_precedence: i32 = -1; - while (parser.lexer.hasTokens()) { - const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => -1, - else => @panic(@tagName(operator)), - }, - else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), - }; - - if (precedence < minimum_precedence) { - break; - } - - if (precedence == banned_precedence) { - unreachable; - } - - const node_index = try parser.parseExpressionPrecedence(1); - _ = node_index; - - unreachable; - } - - log.err("Parsed expression precedence", .{}); - - return expr_index; - } - - fn parsePrefixExpression(parser: *Parser) !Node.Index { - switch (parser.lexer.peek() orelse unreachable) { - // .bang => .bool_not, - // .minus => .negation, - // .tilde => .bit_not, - // .minus_percent => .negation_wrap, - // .ampersand => .address_of, - // .keyword_try => .@"try", - // .keyword_await => .@"await", - - else => |pref| { - log.err("Pref: {s}", .{@tagName(pref)}); - return parser.parsePrimaryExpression(); - }, - } - - return error.not_implemented; - } - - fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index { - const node = try parser.appendNode(.{ - .type = @field(Node.Type, @tagName(token_id)), - .left = @intCast(parser.lexer.currentTokenIndex(token_id)), - .right = 0, - }); - parser.lexer.consume(token_id); - - return node; - } - - fn parsePrimaryExpression(parser: *Parser) !Node.Index { - const result = switch (parser.lexer.peek() orelse unreachable) { - .number => try parser.nodeFromToken(.number), - .identifier => |identifier| { - const identifier_name = parser.lexer.getIdentifier(identifier); - inline for (@typeInfo(Keyword).Enum.fields) |keyword| { - if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) { - .@"return" => blk: { - parser.lexer.consume(.identifier); - const node_ref = try parser.appendNode(.{ - .type = .@"return", - .left = try parser.parseExpression(), - .right = 0, - }); - break :blk node_ref; - }, - .@"fn" => blk: { - parser.lexer.consume(.identifier); - // TODO: figure out name association - break :blk try parser.parseFunctionDeclaration(); - }, - }; - } - - unreachable; - }, - else => |foo| { - std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo }); - }, - }; - - return result; - } - - fn parseContainerMembers(parser: *Parser) !void { - var container_nodes = ArrayList(Node.Index){}; - while (parser.lexer.hasTokens()) { - const container_node = switch (parser.lexer.peek() orelse unreachable) { - .identifier => |first_identifier_ref| blk: { - parser.lexer.consume(.identifier); - - const first_identifier = parser.lexer.getIdentifier(first_identifier_ref); - - if (std.mem.eql(u8, first_identifier, "comptime")) { - unreachable; - } else { - const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier); - _ = mutability_qualifier; - - const identifier = try parser.appendNode(.{ - .type = .identifier, - .left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)), - .right = 0, - }); - - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .colon => unreachable, - .equal => { - parser.lexer.consume(.operator); - - const expression = try parser.parseExpression(); - break :blk try parser.appendNode(.{ - .type = .container_declaration, - .left = expression, - .right = identifier, - }); - }, - else => unreachable, - }, - else => |foo| std.debug.panic("WTF: {}", .{foo}), - } - } - }, - else => |a| std.debug.panic("{}", .{a}), - }; - - try container_nodes.append(parser.allocator, container_node); - } - } -}; diff --git a/src/test/main.b b/src/test/main.nat similarity index 92% rename from src/test/main.b rename to src/test/main.nat index 8847d3f..157bd8a 100644 --- a/src/test/main.b +++ b/src/test/main.nat @@ -1,3 +1,3 @@ const main = fn() i32 { return 0; -} +}; From 953c3faf064cab88d6275970f90f24a64ef561b5 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 6 Sep 2023 15:26:37 -0600 Subject: [PATCH 2/6] enable macos and windows --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 76c5590..9cef9f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,8 @@ jobs: matrix: os: [ ubuntu-latest, + windows-latest, + macos-latest ] runs-on: ${{ matrix.os }} timeout-minutes: 15 From cd3260313793cc89cd051d8349d582704cda832b Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sat, 9 Sep 2023 13:08:51 -0600 Subject: [PATCH 3/6] add debugger support for windows --- build.zig | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/build.zig b/build.zig index 453572c..c98b398 100644 --- a/build.zig +++ b/build.zig @@ -67,9 +67,28 @@ pub fn build(b: *std.Build) void { const run_unit_tests = b.addRunArtifact(unit_tests); - const debug_unit_tests_cmd = b.addSystemCommand(&.{"gf2"}); - debug_unit_tests_cmd.addArtifactArg(unit_tests); - debug_unit_tests_cmd.addArgs(&.{ "-ex", "r" }); + const debug_unit_tests_cmd = switch (@import("builtin").os.tag) { + .linux => blk: { + const result = b.addSystemCommand(&.{"gf2"}); + result.addArtifactArg(unit_tests); + result.addArgs(&.{ "-ex", "r" }); + break :blk result; + }, + .windows => blk: { + const result = b.addSystemCommand(&.{"remedybg"}); + result.addArg("-g"); + result.addArtifactArg(unit_tests); + + break :blk result; + }, + .macos => blk: { + // Broken, but it compiles + const result = b.addSystemCommand(&.{"gdb"}); + result.addArtifactArg(unit_tests); + break :blk result; + }, + else => @compileError("Operating system not supported"), + }; const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); debug_test_step.dependOn(&debug_unit_tests_cmd.step); From 15a7df3f14f3e820c78e19fd3942abf675c9c391 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sat, 9 Sep 2023 20:57:12 -0600 Subject: [PATCH 4/6] Barebones semantic analysis --- build.zig | 67 +- lib/std/start.nat | 8 +- lib/std/std.nat | 3 +- src/Compilation.zig | 280 +++++--- src/backend/emit.zig | 34 - src/backend/intermediate_representation.zig | 9 + src/backend/ir.zig | 143 ----- src/data_structures.zig | 105 ++- src/frontend/lexical_analyzer.zig | 107 +++- src/frontend/semantic_analyzer.zig | 668 ++++++++++++++++++++ src/frontend/syntactic_analyzer.zig | 342 +++++++--- src/main.zig | 12 +- 12 files changed, 1364 insertions(+), 414 deletions(-) create mode 100644 src/backend/intermediate_representation.zig delete mode 100644 src/backend/ir.zig diff --git a/build.zig b/build.zig index c98b398..cd090e8 100644 --- a/build.zig +++ b/build.zig @@ -1,32 +1,16 @@ const std = @import("std"); -// Although this function looks imperative, note that its job is to -// declaratively construct a build graph that will be executed by an external -// runner. pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. const target = b.standardTargetOptions(.{}); - - // Standard optimization options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not - // set a preferred release mode, allowing the user to decide how to optimize. const optimize = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ .name = "compiler", - // In this case the main source file is merely a path, however, in more - // complicated build scripts, this could be a generated file. .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimize, }); - // This declares intent for the executable to be installed into the - // standard location when the user invokes the "install" step (the default - // step when running `zig build`). b.installArtifact(exe); b.installDirectory(.{ .source_dir = std.Build.LazyPath.relative("lib"), @@ -34,31 +18,21 @@ pub fn build(b: *std.Build) void { .install_subdir = "lib", }); - // This *creates* a Run step in the build graph, to be executed when another - // step is evaluated that depends on it. The next line below will establish - // such a dependency. const run_cmd = b.addRunArtifact(exe); - // By making the run step depend on the install step, it will be run from the - // installation directory rather than directly from within the cache directory. - // This is not necessary, however, if the application depends on other installed - // files, this ensures they will be present and in the expected location. run_cmd.step.dependOn(b.getInstallStep()); - // This allows the user to pass arguments to the application in the build - // command itself, like this: `zig build run -- arg1 arg2 etc` if (b.args) |args| { run_cmd.addArgs(args); } - // This creates a build step. It will be visible in the `zig build --help` menu, - // and can be selected like this: `zig build run` - // This will evaluate the `run` step rather than the default, which is "install". const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); - // Creates a step for unit testing. This only builds the test executable - // but does not run it. + const debug_command = addDebugCommand(b, exe); + const debug_step = b.step("debug", "Debug the app"); + debug_step.dependOn(&debug_command.step); + const unit_tests = b.addTest(.{ .root_source_file = .{ .path = "src/main.zig" }, .target = target, @@ -66,36 +40,39 @@ pub fn build(b: *std.Build) void { }); const run_unit_tests = b.addRunArtifact(unit_tests); + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); - const debug_unit_tests_cmd = switch (@import("builtin").os.tag) { + const debug_unit_tests_cmd = addDebugCommand(b, unit_tests); + const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); + debug_test_step.dependOn(&debug_unit_tests_cmd.step); +} + +fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run { + return switch (@import("builtin").os.tag) { .linux => blk: { const result = b.addSystemCommand(&.{"gf2"}); - result.addArtifactArg(unit_tests); - result.addArgs(&.{ "-ex", "r" }); + result.addArtifactArg(artifact); + + if (artifact.kind == .@"test") { + result.addArgs(&.{ "-ex", "r" }); + } + break :blk result; }, .windows => blk: { const result = b.addSystemCommand(&.{"remedybg"}); result.addArg("-g"); - result.addArtifactArg(unit_tests); + result.addArtifactArg(artifact); break :blk result; }, .macos => blk: { - // Broken, but it compiles + // not tested const result = b.addSystemCommand(&.{"gdb"}); - result.addArtifactArg(unit_tests); + result.addArtifactArg(artifact); break :blk result; }, else => @compileError("Operating system not supported"), }; - - const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); - debug_test_step.dependOn(&debug_unit_tests_cmd.step); - - // Similar to creating the run step earlier, this exposes a `test` step to - // the `zig build --help` menu, providing a way for the user to request - // running the unit tests. - const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_unit_tests.step); } diff --git a/lib/std/start.nat b/lib/std/start.nat index 8661ac4..199fd13 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -1 +1,7 @@ -const builtin = #import("builtin"); +comptime { + _ = _start; +} + +const _start = () noreturn { + while (true) {} +}; diff --git a/lib/std/std.nat b/lib/std/std.nat index 3205fe7..3ce1556 100644 --- a/lib/std/std.nat +++ b/lib/std/std.nat @@ -1,4 +1,5 @@ -const start = #import("start.nat"); comptime { _ = start; } + +const start = #import("start.nat"); diff --git a/src/Compilation.zig b/src/Compilation.zig index ed8421a..8c4f3e9 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2,18 +2,32 @@ const Compilation = @This(); const std = @import("std"); const assert = std.debug.assert; +const equal = std.mem.eql; const print = std.debug.print; const Allocator = std.mem.Allocator; const data_structures = @import("data_structures.zig"); const ArrayList = data_structures.ArrayList; +const AutoHashMap = data_structures.AutoHashMap; +const BlockList = data_structures.BlockList; +const HashMap = data_structures.HashMap; +const SegmentedList = data_structures.SegmentedList; const StringHashMap = data_structures.StringHashMap; const StringArrayHashMap = data_structures.StringArrayHashMap; const lexical_analyzer = @import("frontend/lexical_analyzer.zig"); const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); +const Node = syntactic_analyzer.Node; const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); +const intermediate_representation = @import("backend/intermediate_representation.zig"); + +test { + _ = lexical_analyzer; + _ = syntactic_analyzer; + _ = semantic_analyzer; + _ = data_structures; +} base_allocator: Allocator, cwd_absolute_path: []const u8, @@ -43,71 +57,210 @@ pub fn init(allocator: Allocator) !*Compilation { return compilation; } -pub fn deinit(compilation: *Compilation) void { - const allocator = compilation.base_allocator; - allocator.free(compilation.cwd_absolute_path); - allocator.free(compilation.executable_absolute_path); - allocator.destroy(compilation); -} +pub const Struct = struct { + scope: Scope.Index, + initialization: Value.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Type = union(enum) { + void, + noreturn, + bool, + integer: Integer, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Integer = struct { + bit_count: u16, + signedness: Signedness, + pub const Signedness = enum(u1) { + unsigned = 0, + signed = 1, + }; +}; + +/// A scope contains a bunch of declarations +pub const Scope = struct { + parent: Scope.Index, + type: Type.Index, + declarations: AutoHashMap(u32, Declaration.Index) = .{}, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Declaration = union(enum) { + unresolved: Node.Index, + struct_type: Struct, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Function = struct { + body: Block.Index, + prototype: Prototype.Index, + + pub const Prototype = struct { + arguments: ?[]const Field.Index, + return_type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = Prototype.List.Index; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Block = struct { + foo: u32 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Field = struct { + foo: u32 = 0, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Loop = struct { + foo: u32 = 0, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Value = struct { + type: union(enum) { + declaration: Declaration.Index, + bool_true, + bool_false, + loop: Loop.Index, + function: Function.Index, + }, + is_const: bool, + is_comptime: bool, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, + string_table: AutoHashMap(u32, []const u8) = .{}, + declarations: BlockList(Declaration) = .{}, + structs: BlockList(Struct) = .{}, + scopes: BlockList(Scope) = .{}, + files: BlockList(File) = .{}, + values: BlockList(Value) = .{}, + functions: BlockList(Function) = .{}, + fields: BlockList(Field) = .{}, + function_prototypes: BlockList(Function.Prototype) = .{}, + types: BlockList(Type) = .{}, + blocks: BlockList(Block) = .{}, + loops: BlockList(Loop) = .{}, pub const Descriptor = struct { main_package_path: []const u8, }; - fn deinit(module: *Module, allocator: Allocator) void { - defer allocator.destroy(module); + const ImportFileResult = struct { + file: *File, + is_new: bool, + }; - for (module.import_table.values()) |file| { - file.deinit(allocator); + const ImportPackageResult = struct { + file: *File, + is_new: bool, + is_package: bool, + }; + + pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult { + if (equal(u8, import_name, "std")) { + return module.importPackage(allocator, module.main_package.dependencies.get("std").?); } - var iterator = module.main_package.dependencies.valueIterator(); - while (iterator.next()) |it| { - const package = it.*; - package.deinit(allocator); + if (equal(u8, import_name, "builtin")) { + return module.importPackage(allocator, module.main_package.dependencies.get("builtin").?); } - module.main_package.deinit(allocator); + if (equal(u8, import_name, "main")) { + return module.importPackage(allocator, module.main_package); + } - module.import_table.clearAndFree(allocator); - } + if (current_file.package.dependencies.get(import_name)) |package| { + return module.importPackage(allocator, package); + } - fn importPackage(module: *Module, compilation: *Compilation, package: *Package) !ImportPackageResult { - const lookup_result = try module.import_table.getOrPut(compilation.base_allocator, package.directory.path); - errdefer _ = module.import_table.pop(); - if (lookup_result.found_existing) { - const file: *File = lookup_result.value_ptr.*; - try file.addPackageReference(compilation.base_allocator, package); + if (!std.mem.endsWith(u8, import_name, ".nat")) { unreachable; } - const file = try compilation.base_allocator.create(File); - lookup_result.value_ptr.* = file; - file.* = File{ - .relative_path = package.source_path, - .package = package, + + const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); + const file_relative_path = std.fs.path.basename(full_path); + const package = current_file.package; + const import = try module.getFile(allocator, full_path, file_relative_path, package); + + try import.file.addFileReference(allocator, current_file); + + const result = ImportPackageResult{ + .file = import.file, + .is_new = import.is_new, + .is_package = false, + }; + + return result; + } + + fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { + const path_lookup = try module.import_table.getOrPut(allocator, full_path); + const file: *File = switch (path_lookup.found_existing) { + true => path_lookup.value_ptr.*, + false => blk: { + const new_file_index = try module.files.append(allocator, File{ + .relative_path = relative_path, + .package = package, + }); + const file = module.files.get(new_file_index); + path_lookup.value_ptr.* = file; + break :blk file; + }, }; - try file.addPackageReference(compilation.base_allocator, package); return .{ .file = file, - .is_new = true, + .is_new = !path_lookup.found_existing, }; } - fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { + pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { + const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); + const import = try module.getFile(allocator, full_path, package.source_path, package); + try import.file.addPackageReference(allocator, package); + + return .{ + .file = import.file, + .is_new = import.is_new, + .is_package = true, + }; + } + + pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { _ = module; const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); - defer source_file.close(); const file_size = try source_file.getEndPos(); var file_buffer = try allocator.alloc(u8, file_size); const read_byte_count = try source_file.readAll(file_buffer); assert(read_byte_count == file_size); + source_file.close(); //TODO: adjust file maximum size file.source_code = file_buffer[0..read_byte_count]; @@ -140,7 +293,6 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! builtin_file.close(); const module: *Module = try compilation.base_allocator.create(Module); - defer module.deinit(compilation.base_allocator); module.* = Module{ .main_package = blk: { const result = try compilation.base_allocator.create(Package); @@ -157,16 +309,14 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }; const std_package_dir = "lib/std"; + const package_descriptors = [2]struct { name: []const u8, directory_path: []const u8, }{ .{ .name = "std", - .directory_path = try switch (@import("builtin").is_test) { - true => compilation.pathFromCwd(std_package_dir), - false => compilation.pathFromCompiler(std_package_dir), - }, + .directory_path = try compilation.pathFromCwd(std_package_dir), }, .{ .name = "builtin", @@ -178,7 +328,8 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }, }; - for (package_descriptors) |package_descriptor| { + var packages: [package_descriptors.len]*Package = undefined; + for (package_descriptors, &packages) |package_descriptor, *package_ptr| { const package = try compilation.base_allocator.create(Package); package.* = .{ .directory = .{ @@ -189,21 +340,22 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }; try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package); + + package_ptr.* = package; } assert(module.main_package.dependencies.size == 2); - _ = try module.importPackage(compilation, module.main_package.dependencies.get("std").?); + _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); for (module.import_table.values()) |import| { try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); } -} -const ImportPackageResult = struct { - file: *File, - is_new: bool, -}; + const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]); + + try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); +} fn generateAST() !void {} @@ -222,17 +374,6 @@ pub const Package = struct { try package.dependencies.ensureUnusedCapacity(allocator, 1); package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency); } - - fn deinit(package: *Package, allocator: Allocator) void { - if (package.dependencies.size > 0) { - assert(package.dependencies.size == 2); - } - package.dependencies.clearAndFree(allocator); - allocator.free(package.source_path); - allocator.free(package.directory.path); - package.directory.handle.close(); - allocator.destroy(package); - } }; pub const File = struct { @@ -241,6 +382,7 @@ pub const File = struct { lexical_analyzer_result: lexical_analyzer.Result = undefined, syntactic_analyzer_result: syntactic_analyzer.Result = undefined, package_references: ArrayList(*Package) = .{}, + file_references: ArrayList(*File) = .{}, relative_path: []const u8, package: *Package, @@ -259,6 +401,10 @@ pub const File = struct { try file.package_references.insert(allocator, 0, package); } + fn addFileReference(file: *File, allocator: Allocator, affected: *File) !void { + try file.file_references.append(allocator, affected); + } + pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); _ = file_content; @@ -271,30 +417,18 @@ pub const File = struct { fn lex(file: *File, allocator: Allocator) !void { assert(file.status == .loaded_into_memory); file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); - if (!@import("builtin").is_test) { - print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); - } + // if (!@import("builtin").is_test) { + // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); + // } file.status = .lexed; } fn parse(file: *File, allocator: Allocator) !void { assert(file.status == .lexed); file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); - if (!@import("builtin").is_test) { - print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); - } + // if (!@import("builtin").is_test) { + // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); + // } file.status = .parsed; } - - fn deinit(file: *File, allocator: Allocator) void { - defer allocator.destroy(file); - if (file.status == .parsed) { - file.syntactic_analyzer_result.free(allocator); - file.lexical_analyzer_result.free(allocator); - file.package_references.clearAndFree(allocator); - allocator.free(file.source_code); - } else { - unreachable; - } - } }; diff --git a/src/backend/emit.zig b/src/backend/emit.zig index b5d64e3..17b708d 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -31,17 +31,6 @@ const Result = struct { }; } - fn destroy(image: *Result) void { - inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { - const section_bytes = @field(image.sections, field_name).content; - switch (@import("builtin").os.tag) { - .linux => std.os.munmap(section_bytes), - .windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE), - else => @compileError("OS not supported"), - } - } - } - fn mmap(size: usize, flags: packed struct { executable: bool, }) ![]align(page_size) u8 { @@ -79,16 +68,6 @@ const Result = struct { assert(image.sections.text.content.len > 0); return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); } - - pub fn free(result: *Result, allocator: Allocator) void { - _ = allocator; - inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| { - switch (@import("builtin").os.tag) { - .windows => unreachable, - else => std.os.munmap(@field(result.sections, field_name).content), - } - } - } }; const Rex = enum(u8) { @@ -160,9 +139,7 @@ fn movAImm(image: *Result, integer: anytype) void { } test "ret void" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); image.appendCodeByte(ret); const function_pointer = image.getEntryPoint(fn () callconv(.C) void); @@ -185,7 +162,6 @@ fn getMaxInteger(comptime T: type) T { test "ret integer" { inline for (integer_types_to_test) |Int| { var image = try Result.create(); - defer image.free(std.testing.allocator); const expected_number = getMaxInteger(Int); movAImm(&image, expected_number); @@ -234,9 +210,7 @@ fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRe test "ret integer argument" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const number = getMaxInteger(Int); movRmR(&image, Int, .a, .di); @@ -264,9 +238,7 @@ fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRe test "ret sub arguments" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); @@ -348,10 +320,8 @@ fn TestIntegerBinaryOperation(comptime T: type) type { opcode: OpcodeRmR, pub fn runTest(test_case: @This()) !void { - const allocator = std.testing.allocator; for (0..10) |_| { var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); movRmR(&image, T, .a, .di); @@ -371,9 +341,7 @@ fn TestIntegerBinaryOperation(comptime T: type) type { } test "call after" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const jump_patch_offset = image.sections.text.index + 1; image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); const jump_source = image.sections.text.index; @@ -387,9 +355,7 @@ test "call after" { } test "call before" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const first_jump_patch_offset = image.sections.text.index + 1; const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; image.appendCode(&first_call); diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig new file mode 100644 index 0000000..15ed936 --- /dev/null +++ b/src/backend/intermediate_representation.zig @@ -0,0 +1,9 @@ +const Compilation = @import("../Compilation.zig"); +const Module = Compilation.Module; +const Package = Compilation.Package; +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_declaration: Compilation.Declaration.Index) !void { + _ = main_declaration; + _ = package; + _ = module; + _ = compilation; +} diff --git a/src/backend/ir.zig b/src/backend/ir.zig deleted file mode 100644 index 20b0eba..0000000 --- a/src/backend/ir.zig +++ /dev/null @@ -1,143 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const parser = @import("parser.zig"); - -const void_type = Type{ - .id = .void, -}; - -const Type = struct { - id: Id, - - fn isPrimitive(T: Type) bool { - return switch (T.id) { - .void => true, - }; - } - const Id = enum { - void, - }; -}; - -const Error = error{ - type_mismatch, - internal, - arguments_not_used, -}; - -const TopLevelDeclaration = struct { - type: Id, - index: u31, - - const Id = enum { - function, - expression, - }; -}; - -const Instruction = struct { - id: Id, - index: u16, - - const Id = enum { - ret_void, - }; -}; - -const ret_void = Instruction{ - .id = .ret_void, - .index = 0, -}; - -const ret = struct { - is_type: bool, -}; - -const Function = struct { - instructions: ArrayList(Instruction), - return_type: Type, -}; - -pub const Result = struct { - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - instructions: struct {} = .{}, - - pub fn free(result: *Result, allocator: Allocator) void { - for (result.functions.items) |*function| { - function.instructions.clearAndFree(allocator); - } - result.functions.clearAndFree(allocator); - result.top_level_declarations.clearAndFree(allocator); - } -}; - -const Analyzer = struct { - parser: *const parser.Result, - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - allocator: Allocator, - - fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result { - var analyzer = Analyzer{ - .parser = parser_result, - .top_level_declarations = ArrayList(TopLevelDeclaration){}, - .allocator = allocator, - .functions = ArrayList(Function){}, - }; - - for (parser_result.functions.items) |ast_function| { - if (ast_function.statements.items.len != 0) { - for (ast_function.statements.items) |statement| { - _ = statement; - @panic("TODO: statement"); - } - } else { - if (ast_function.arguments.items.len != 0) { - return Error.arguments_not_used; - } - - try analyzer.expectPrimitiveType(void_type, ast_function.return_type); - - const function_index = analyzer.functions.items.len; - - var function = Function{ - .instructions = ArrayList(Instruction){}, - .return_type = void_type, - }; - - function.instructions.append(allocator, ret_void) catch return Error.internal; - - analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{ - .type = .function, - .index = @intCast(function_index), - }) catch return Error.internal; - - analyzer.functions.append(allocator, function) catch return Error.internal; - } - } - - return .{ - .top_level_declarations = analyzer.top_level_declarations, - .functions = analyzer.functions, - }; - } - - fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void { - assert(type_value.isPrimitive()); - const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal; - - if (!equal(u8, @tagName(type_value.id), type_identifier)) { - return Error.type_mismatch; - } - } -}; - -pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result { - return Analyzer.analyze(allocator, parser_result); -} diff --git a/src/data_structures.zig b/src/data_structures.zig index f6a4bb1..6edf4d2 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,7 +1,110 @@ const std = @import("std"); +const assert = std.debug.assert; pub const Allocator = std.mem.Allocator; pub const ArrayList = std.ArrayListUnmanaged; -pub const HashMap = std.AutoHashMapUnmanaged; +pub const AutoHashMap = std.AutoHashMapUnmanaged; +pub const HashMap = std.HashMapUnmanaged; +pub const SegmentedList = std.SegmentedList; pub const StringHashMap = std.StringHashMapUnmanaged; pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged; + +pub fn BlockList(comptime T: type) type { + const item_count = 64; + const Block = struct { + items: [item_count]T = undefined, + bitset: Bitset = Bitset.initEmpty(), + + const Bitset = std.StaticBitSet(item_count); + + fn allocateIndex(block: *@This()) !u6 { + if (block.bitset.mask != std.math.maxInt(@TypeOf(block.bitset.mask))) { + const index = @ctz(~block.bitset.mask); + block.bitset.set(index); + return @intCast(index); + } else { + return error.OutOfMemory; + } + } + }; + + return struct { + blocks: ArrayList(Block) = .{}, + len: usize = 0, + first_block: u32 = 0, + + const List = @This(); + + pub const Index = packed struct(u32) { + valid: bool = true, + index: u6, + block: u25, + + pub const invalid = Index{ + .valid = false, + .index = 0, + .block = 0, + }; + }; + + pub fn get(list: *List, index: Index) *T { + assert(index.valid); + return &list.blocks.items[index.block].items[index.index]; + } + + pub fn append(list: *List, allocator: Allocator, element: T) !Index { + try list.ensureCapacity(allocator, list.len + 1); + const max_allocation = list.blocks.items.len * item_count; + if (list.len < max_allocation) { + // Follow the guess + if (list.blocks.items[list.first_block].allocateIndex()) |index| { + list.blocks.items[list.first_block].items[index] = element; + return .{ + .index = index, + .block = @intCast(list.first_block), + }; + } else |_| { + @panic("TODO"); + } + } else { + const block_index = list.blocks.items.len; + const new_block = list.blocks.addOneAssumeCapacity(); + const index = new_block.allocateIndex() catch unreachable; + new_block.items[index] = element; + return .{ + .index = index, + .block = @intCast(block_index), + }; + } + } + + pub fn ensureCapacity(list: *List, allocator: Allocator, new_capacity: usize) !void { + const max_allocation = list.blocks.items.len * item_count; + if (max_allocation < new_capacity) { + const block_count = new_capacity / item_count + @intFromBool(new_capacity % item_count != 0); + try list.blocks.ensureTotalCapacity(allocator, block_count); + } + } + + test "Bitset index allocation" { + const expect = std.testing.expect; + var block = Block{}; + for (0..item_count) |expected_index| { + const new_index = try block.allocateIndex(); + try expect(new_index == expected_index); + } + + _ = block.allocateIndex() catch return; + + return error.TestUnexpectedResult; + } + }; +} + +pub fn enumFromString(comptime E: type, string: []const u8) ?E { + return inline for (@typeInfo(E).Enum.fields) |enum_field| { + if (std.mem.eql(u8, string, enum_field.name)) { + break @field(E, enum_field.name); + } + } else null; +} diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index e485a5c..a5f26ce 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -7,6 +7,7 @@ const equal = std.mem.eql; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; const Compilation = @import("../Compilation.zig"); const fs = @import("../fs.zig"); @@ -17,29 +18,71 @@ pub const Token = packed struct(u64) { id: Id, pub const Id = enum(u8) { - identifier = 0, - number = 1, - string_literal = 2, - left_parenthesis = '(', - right_parenthesis = ')', - left_brace = '{', - right_brace = '}', - equal = '=', - colon = ':', - semicolon = ';', - hash = '#', - comma = ',', - bang = '!', + eof = 0x00, + identifier = 0x01, + number = 0x02, + string_literal = 0x03, + fixed_keyword_function = 0x04, + fixed_keyword_const = 0x05, + fixed_keyword_var = 0x06, + fixed_keyword_void = 0x07, + fixed_keyword_noreturn = 0x08, + fixed_keyword_comptime = 0x09, + fixed_keyword_while = 0x0a, + fixed_keyword_bool = 0x0b, + fixed_keyword_true = 0x0c, + fixed_keyword_false = 0x0d, + bang = '!', // 0x21 + hash = '#', // 0x23 + dollar_sign = '$', // 0x24 + modulus = '%', // 0x25 + ampersand = '&', // 0x26 + left_parenthesis = '(', // 0x28 + right_parenthesis = ')', // 0x29 + asterisk = '*', // 0x2a + plus = '+', // 0x2b + comma = ',', // 0x2c + minus = '-', // 0x2d + period = '.', // 0x2e + slash = '/', // 0x2f + colon = ':', // 0x3a + semicolon = ';', // 0x3b + less = '<', // 0x3c + equal = '=', // 0x3d + greater = '>', // 0x3e + question_mark = '?', // 0x3f + at = '@', // 0x40 + left_bracket = '[', // 0x5b + backlash = '\\', // 0x5c + right_bracket = ']', // 0x5d + caret = '^', // 0x5e + underscore = '_', // 0x5f + grave = '`', // 0x60 + left_brace = '{', // 0x7b + vertical_bar = '|', // 0x7c + right_brace = '}', // 0x7d + tilde = '~', // 0x7e }; + + pub const Index = u32; +}; + +pub const FixedKeyword = enum { + @"comptime", + @"const", + @"var", + void, + noreturn, + function, + @"while", + bool, + true, + false, }; pub const Result = struct { tokens: ArrayList(Token), time: u64, - - pub fn free(result: *Result, allocator: Allocator) void { - result.tokens.clearAndFree(allocator); - } }; pub fn analyze(allocator: Allocator, text: []const u8) !Result { @@ -62,7 +105,23 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { break; } - break :blk .identifier; + const identifier = text[start_index..][0 .. index - start_index]; + std.debug.print("Identifier: {s}\n", .{identifier}); + + if (start_character == 'u' or start_character == 's') { + var index_integer = start_index + 1; + while (text[index_integer] >= '0' and text[index_integer] <= '9') { + index_integer += 1; + } + + if (index_integer == index) { + unreachable; + } + } + + break :blk if (enumFromString(FixedKeyword, text[start_index..][0 .. index - start_index])) |fixed_keyword| switch (fixed_keyword) { + inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), + } else .identifier; }, '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { index += 1; @@ -75,9 +134,17 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { break :blk .number; }, + '\'' => { + unreachable; + }, '"' => blk: { index += 1; - while (text[index] != '"') { + + while (true) { + if (text[index] == '"' and text[index - 1] != '"') { + break; + } + index += 1; } @@ -103,7 +170,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { }); } - const should_log = false; + const should_log = true; if (should_log) { for (tokens.items, 0..) |token, i| { std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index e69de29..761054b 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -0,0 +1,668 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; +const Module = Compilation.Module; +const Package = Compilation.Package; + +const Block = Compilation.Block; +const Declaration = Compilation.Declaration; +const Field = Compilation.Field; +const Function = Compilation.Function; +const Scope = Compilation.Scope; +const Struct = Compilation.Struct; +const Type = Compilation.Type; +const Value = Compilation.Value; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +const syntactic_analyzer = @import("syntactic_analyzer.zig"); +const ContainerDeclaration = syntactic_analyzer.ContainerDeclaration; +const Node = syntactic_analyzer.Node; +const SymbolDeclaration = syntactic_analyzer.SymbolDeclaration; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.AutoHashMap; + +const print = std.debug.print; + +const Analyzer = struct { + source_code: []const u8, + nodes: []const Node, + tokens: []const Token, + file: *File, + allocator: Allocator, + module: *Module, + + fn lazyGlobalDeclaration(analyzer: *Analyzer, node_index: Node.Index) void { + print("Global: {}", .{analyzer.nodes[node_index.unwrap()]}); + } + + fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { + const comptime_node = analyzer.nodes[node_index.unwrap()]; + + const comptime_block_node = analyzer.nodes[comptime_node.left.unwrap()]; + var statement_node_indices = ArrayList(Node.Index){}; + switch (comptime_block_node.id) { + .block_one => { + try statement_node_indices.append(analyzer.allocator, comptime_block_node.left); + }, + else => |t| @panic(@tagName(t)), + } + + var statement_values = ArrayList(Value.Index){}; + + for (statement_node_indices.items) |statement_node_index| { + const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + switch (statement_node.id) { + .assign => { + const assign_expression = try analyzer.assign(scope, statement_node_index); + try statement_values.append(analyzer.allocator, assign_expression); + }, + else => |t| @panic(@tagName(t)), + } + } + + // TODO + + return Value.Index.invalid; + } + + fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + + print("\nAssign. Left: {}. Right: {}\n", .{ node.left, node.right }); + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + if (node.left.valid) { + @panic("Not discard"); + } else { + return try analyzer.expression(scope, ExpectType{ .none = {} }, node.right); + } + } + + fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Block.Index { + const block_node = analyzer.nodes[node_index.unwrap()]; + var statements = ArrayList(Node.Index){}; + switch (block_node.id) { + .block_one => { + try statements.append(analyzer.allocator, block_node.left); + }, + .block_zero => {}, + else => |t| @panic(@tagName(t)), + } + + for (statements.items) |statement_node_index| { + _ = try analyzer.expression(scope, expect_type, statement_node_index); + // const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + // + // switch (statement_node.id) { + // try .simple_while => { + // const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); + // _ = while_condition; + // const while_block = try analyzer.block(scope, expect_type, statement_node.right); + // _ = while_block; + // unreachable; + // }, + // else => |t| @panic(@tagName(t)), + // } + } + + return try analyzer.module.blocks.append(analyzer.allocator, .{}); + } + + fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) error{OutOfMemory}!Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .identifier => blk: { + const identifier_hash = try analyzer.identifierFromToken(node.token); + // TODO: search in upper scopes too + const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); + if (identifier_scope_lookup.found_existing) { + const declaration_index = identifier_scope_lookup.value_ptr.*; + const declaration = analyzer.module.declarations.get(declaration_index); + break :blk try analyzer.analyzeDeclaration(scope, declaration); + } else { + @panic("TODO: not found"); + } + }, + .compiler_intrinsic_one => blk: { + const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[node.left.unwrap()]; + switch (import_argument.id) { + .string_literal => { + const import_name = analyzer.tokenStringLiteral(import_argument.token); + const imported_file = try analyzer.module.importFile(analyzer.allocator, analyzer.file, import_name); + + if (imported_file.is_new) { + // TODO: fix error + analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file) catch return error.OutOfMemory; + } else { + unreachable; + } + + const file_struct_declaration_index = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file); + break :blk try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .declaration = file_struct_declaration_index, + }, + .is_const = true, + .is_comptime = true, + }); + }, + else => unreachable, + } + }, + } + unreachable; + }, + .function_definition => blk: { + const function_prototype_index = try analyzer.functionPrototype(node.left); + + const function_body = try analyzer.block(scope, .{ + .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), + }, node.right); + + const function_index = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = function_prototype_index, + .body = function_body, + }); + const value_index = try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .function = function_index, + }, + .is_const = true, + .is_comptime = true, + }); + break :blk value_index; + }, + .keyword_true => blk: { + switch (expect_type) { + .none => {}, + .type_index => |expected_type| { + if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) { + @panic("TODO: compile error"); + } + }, + } + + break :blk bool_true; + }, + .simple_while => blk: { + const while_condition = try analyzer.expression(scope, ExpectType.boolean, node.left); + _ = while_condition; + const while_body = try analyzer.block(scope, expect_type, node.right); + _ = while_body; + const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{}); + const value_index = try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .loop = loop_index, + }, + // TODO: + .is_const = false, + .is_comptime = false, + }); + break :blk value_index; + }, + else => |t| @panic(@tagName(t)), + }; + } + + fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index { + const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); + return function_prototype.return_type; + } + + fn functionPrototype(analyzer: *Analyzer, node_index: Node.Index) !Function.Prototype.Index { + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .simple_function_prototype => { + const arguments: ?[]const Field.Index = blk: { + const argument_node = analyzer.nodes[node.left.get() orelse break :blk null]; + switch (argument_node.id) { + else => |t| @panic(@tagName(t)), + } + }; + const return_type_node = analyzer.nodes[node.right.unwrap()]; + const return_type: Type.Index = switch (return_type_node.id) { + .identifier => { + unreachable; + }, + .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, + else => |t| @panic(@tagName(t)), + }; + + return try analyzer.module.function_prototypes.append(analyzer.allocator, .{ + .arguments = arguments, + .return_type = return_type, + }); + }, + else => |t| @panic(@tagName(t)), + } + } + + fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index { + switch (declaration.*) { + .unresolved => |node_index| { + const declaration_node = analyzer.nodes[node_index.unwrap()]; + return switch (declaration_node.id) { + .simple_variable_declaration => blk: { + const expect_type = switch (declaration_node.left.valid) { + true => unreachable, + false => @unionInit(ExpectType, "none", {}), + }; + + const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); + const value = analyzer.module.values.get(initialization_expression); + if (value.is_comptime and value.is_const) { + break :blk initialization_expression; + } + + unreachable; + }, + else => |t| @panic(@tagName(t)), + }; + }, + .struct_type => unreachable, + } + + @panic("TODO: analyzeDeclaration"); + } + + fn containerMember(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !void { + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .simple_variable_declaration => {}, + .@"comptime" => { + _ = try analyzer.comptimeBlock(scope, node_index); + }, + else => std.debug.panic("Tag: {}", .{node.id}), + } + } + + fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void { + if (symbol_declaration.type_node.get()) |type_node_index| { + _ = type_node_index; + @panic("TODO: type node"); + } + const initialization_node = analyzer.nodes[symbol_declaration.initialization_node.unwrap()]; + switch (initialization_node.id) { + .compiler_intrinsic_one => { + const intrinsic_name = analyzer.tokenIdentifier(initialization_node.token + 1); + const intrinsic = inline for (@typeInfo(Intrinsic).Enum.fields) |intrinsic_enum_field| { + if (equal(u8, intrinsic_name, intrinsic_enum_field.name)) { + break @field(Intrinsic, intrinsic_enum_field.name); + } + } else unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[initialization_node.left.get()]; + switch (import_argument.id) { + .string_literal => unreachable, + else => unreachable, + } + }, + } + // const intrinsic_node_index = initialization_node.left.unwrap(); + // const intrinsic_node = analyzer.nodes[intrinsic_node_index]; + // + // switch (intrinsic_node.id) { + // .string_literal => + // } + // print("intrinsic: {}", .{intrinsic_node.id}); + + // _ = a; + }, + else => unreachable, + } + print("Init node: {}\n", .{initialization_node}); + @panic("TODO"); + } + + fn symbolDeclaration(analyzer: *Analyzer, node_index: Node.Index) SymbolDeclaration { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .simple_variable_declaration => .{ + .type_node = node.left, + .initialization_node = node.right, + .mutability_token = node.token, + }, + else => unreachable, + }; + } + + fn structDeclaration(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Declaration.Index { + _ = index; + const new_scope = try analyzer.allocateScope(parent_scope, Type.Index.invalid); + const scope = new_scope.ptr; + + const is_file = !parent_scope.valid; + assert(is_file); + // TODO: do it properly + const declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ + .struct_type = .{ + .scope = new_scope.index, + .initialization = if (is_file) Value.Index.invalid else unreachable, + }, + }); + // TODO: + assert(container_declaration.members.len > 0); + + const count = blk: { + var result: struct { + fields: u32 = 0, + declarations: u32 = 0, + } = .{}; + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + + switch (member_type) { + .declaration => result.declarations += 1, + .field => result.fields += 1, + } + } + break :blk result; + }; + + var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations); + var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); + + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + const array_list = switch (member_type) { + .declaration => &declaration_nodes, + .field => &field_nodes, + }; + array_list.appendAssumeCapacity(member_index); + } + + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime" => {}, + .simple_variable_declaration => { + const expected_identifier_token_index = declaration_node.token + 1; + const expected_identifier_token = analyzer.tokens[expected_identifier_token_index]; + if (expected_identifier_token.id != .identifier) { + print("Error: found: {}", .{expected_identifier_token.id}); + @panic("Expected identifier"); + } + // TODO: Check if it is a keyword + + const identifier_index = try analyzer.identifierFromToken(expected_identifier_token_index); + + const declaration_name = analyzer.tokenIdentifier(expected_identifier_token_index); + // Check if the symbol name is already occupied in the same scope + const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); + if (scope_lookup.found_existing) { + std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); + } + + // Check if the symbol name is already occupied in parent scopes + var upper_scope_index = scope.parent; + + while (upper_scope_index.valid) { + @panic("TODO: upper scope"); + } + + const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ + .unresolved = declaration_node_index, + }); + + scope_lookup.value_ptr.* = container_declaration_index; + }, + else => unreachable, + } + } + + // TODO: consider iterating over scope declarations instead? + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime", .simple_variable_declaration => try analyzer.containerMember(scope, declaration_node_index), + else => unreachable, + } + } + + for (field_nodes.items) |field_index| { + const field_node = analyzer.nodes[field_index.unwrap()]; + _ = field_node; + + @panic("TODO: fields"); + } + + return declaration_index; + } + + const MemberType = enum { + declaration, + field, + }; + + fn getContainerMemberType(member_id: Node.Id) MemberType { + return switch (member_id) { + .@"comptime" => .declaration, + .simple_variable_declaration => .declaration, + else => unreachable, + }; + } + + fn identifierFromToken(analyzer: *Analyzer, token_index: Token.Index) !u32 { + const identifier = analyzer.tokenIdentifier(token_index); + const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); + + const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); + + if (lookup_result.found_existing) { + return lookup_result.key_ptr.*; + } else { + return key; + } + } + + fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .identifier); + const identifier = analyzer.source_code[token.start..][0..token.len]; + + return identifier; + } + + fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .string_literal); + // Eat double quotes + const start = token.start + 1; + const len = token.len - 2; + const string_literal = analyzer.source_code[start..][0..len]; + + return string_literal; + } + + const ScopeAllocation = struct { + ptr: *Scope, + index: Scope.Index, + }; + + fn allocateScope(analyzer: *Analyzer, parent_scope: Scope.Index, scope_type: Type.Index) !ScopeAllocation { + const scope_index = try analyzer.module.scopes.append(analyzer.allocator, .{ + .parent = parent_scope, + .type = scope_type, + }); + const scope = analyzer.module.scopes.get(scope_index); + + return .{ + .ptr = scope, + .index = scope_index, + }; + } +}; + +const ExpectType = union(enum) { + none, + type_index: Type.Index, + + pub const boolean = ExpectType{ + .type_index = type_boolean, + }; +}; + +const type_boolean = Type.Index{ + .block = 0, + .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), +}; + +const bool_false = Value.Index{ + .block = 0, + .index = 1, +}; + +const bool_true = Value.Index{ + .block = 0, + .index = 1, +}; + +const Intrinsic = enum { + import, +}; + +const FixedTypeKeyword = enum { + void, + noreturn, + bool, + + const offset = 0; +}; + +const HardwareUnsignedIntegerType = enum { + u8, + u16, + u32, + u64, + + const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; +}; + +const HardwareSignedIntegerType = enum { + s8, + s16, + s32, + s64, + + const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; +}; + +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { + inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); + } + + inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .unsigned, + .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { + .u8 => 8, + .u16 => 16, + .u32 => 32, + .u64 => 64, + }, + }, + }); + } + + inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .signed, + .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { + .s8 => 8, + .s16 => 16, + .s32 => 32, + .s64 => 64, + }, + }, + }); + } + + _ = try module.values.append(compilation.base_allocator, .{ + .type = .{ + .bool_false = {}, + }, + .is_const = true, + .is_comptime = true, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .type = .{ + .bool_true = {}, + }, + .is_const = true, + .is_comptime = true, + }); + + return analyzeExistingPackage(compilation, module, package); +} + +pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { + const package_import = try module.importPackage(compilation.base_allocator, package); + assert(!package_import.is_new); + const package_file = package_import.file; + + return try analyzeFile(compilation.base_allocator, module, package_file); +} + +pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Declaration.Index { + assert(file.status == .parsed); + + var analyzer = Analyzer{ + .source_code = file.source_code, + .nodes = file.syntactic_analyzer_result.nodes.items, + .tokens = file.lexical_analyzer_result.tokens.items, + .file = file, + .allocator = allocator, + .module = module, + }; + + const result = try analyzer.structDeclaration(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); + return result; +} + +fn mainNodeToContainerDeclaration(allocator: Allocator, file: *File) !ContainerDeclaration { + const main_node = getNode(file, 0); + var list_buffer: [2]Node.Index = undefined; + const left_node = getNode(file, main_node.left.value); + const node_list: []const Node.Index = blk: { + if (left_node.id != .node_list) { + const len = @as(u2, @intFromBool(main_node.left.valid)) + @as(u2, @intFromBool(main_node.right.valid)) - @as(u2, @intFromBool(main_node.left.valid and main_node.right.valid and main_node.left.value == main_node.right.value)); + assert(len > 0); + list_buffer[0] = main_node.left; + list_buffer[1] = main_node.right; + break :blk list_buffer[0..len]; + } else { + @panic("TODO: get list"); + } + }; + + const owned_node_list = try allocator.alloc(Node.Index, node_list.len); + @memcpy(owned_node_list, node_list); + + // Deal properly with this allocation + return .{ + .members = owned_node_list, + }; +} + +fn getNode(file: *const File, index: u32) *Node { + return &file.syntactic_analyzer_result.nodes.items[index]; +} diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 54ae1ac..8bfbe81 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -6,6 +6,7 @@ const log = std.log; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; const HashMap = data_structures.HashMap; const lexical_analyzer = @import("lexical_analyzer.zig"); @@ -14,26 +15,40 @@ const Token = lexical_analyzer.Token; pub const Result = struct { nodes: ArrayList(Node), time: u64, - - pub fn free(result: *Result, allocator: Allocator) void { - result.nodes.clearAndFree(allocator); - } }; -pub const Node = packed struct(u96) { +// TODO: pack it to be more efficient +pub const Node = packed struct(u128) { token: u32, id: Id, left: Node.Index, right: Node.Index, - pub const Index = u27; + pub const Index = packed struct(u32) { + value: u31, + valid: bool = true, + + pub const invalid = Index{ + .value = 0, + .valid = false, + }; + + pub fn get(index: Index) ?u32 { + return if (index.valid) index.value else null; + } + + pub fn unwrap(index: Index) u32 { + assert(index.valid); + return index.value; + } + }; pub const Range = struct { start: u32, end: u32, }; - pub const Id = enum(u10) { + pub const Id = enum(u32) { main = 0, identifier = 1, number = 2, @@ -46,6 +61,13 @@ pub const Node = packed struct(u96) { simple_variable_declaration = 9, assign = 10, @"comptime" = 11, + node_list = 12, + block_zero = 13, + simple_while = 14, + simple_function_prototype = 15, + function_definition = 16, + keyword_noreturn = 17, + keyword_true = 18, }; }; @@ -63,10 +85,6 @@ const Analyzer = struct { allocator: Allocator, temporal_node_heap: ArrayList(Node.Index) = .{}, - fn free(analyzer: *Analyzer) void { - _ = analyzer; - } - fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { if (analyzer.tokens[analyzer.token_i].id == token_id) { const result = analyzer.token_i; @@ -90,56 +108,50 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; const member_node: Node = switch (analyzer.tokens[first].id) { - .identifier => blk: { - const first_identifier_token = analyzer.tokens[first]; - analyzer.token_i += 1; + .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .left_brace => blk: { + analyzer.token_i += 1; + const comptime_block = try analyzer.block(); - const identifier = analyzer.getIdentifier(first_identifier_token); - - if (equal(u8, identifier, "comptime")) { - switch (analyzer.tokens[analyzer.token_i].id) { - .left_brace => { - const comptime_block = try analyzer.block(); - - break :blk .{ - .id = .@"comptime", - .token = first, - .left = comptime_block, - .right = 0, - }; - }, - else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), - } - } else { - const is_const = equal(u8, identifier, "const"); - const is_var = equal(u8, identifier, "var"); - assert(is_const or is_var); - - _ = try analyzer.expectToken(.identifier); - - // TODO: type - _ = try analyzer.expectToken(.equal); - - // TODO: do this in a function - const init_node = switch (analyzer.tokens[analyzer.token_i].id) { - .identifier => unreachable, - .hash => try analyzer.compilerIntrinsic(), - else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), - }; - - _ = try analyzer.expectToken(.semicolon); - - // TODO: - const type_node = 0; - const top_level_decl = .{ - .id = .simple_variable_declaration, + break :blk .{ + .id = .@"comptime", .token = first, - .left = type_node, - .right = init_node, + .left = comptime_block, + .right = Node.Index.invalid, }; + }, + else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + }, + .fixed_keyword_const, .fixed_keyword_var => blk: { + analyzer.token_i += 1; + _ = try analyzer.expectToken(.identifier); - break :blk top_level_decl; - } + // TODO: type + _ = try analyzer.expectToken(.equal); + + // TODO: do this in a function + const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => unreachable, + .hash => try analyzer.compilerIntrinsic(), + .left_parenthesis => try analyzer.function(), + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = Node.Index.invalid; + const top_level_decl = .{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + break :blk top_level_decl; + }, + .identifier => { + unreachable; }, else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), }; @@ -150,6 +162,11 @@ const Analyzer = struct { const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; const members: Members = switch (members_array.len) { + 1 => .{ + .len = 1, + .left = members_array[0], + .right = Node.Index.invalid, + }, 2 => .{ .len = 2, .left = members_array[0], @@ -161,6 +178,51 @@ const Analyzer = struct { return members; } + fn function(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const function_prototype = try analyzer.functionPrototype(); + const function_body = try analyzer.block(); + return analyzer.addNode(.{ + .id = .function_definition, + .token = token, + .left = function_prototype, + .right = function_body, + }); + } + + fn functionPrototype(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); + const return_type = try analyzer.typeExpression(); + + return analyzer.addNode(.{ + .id = .simple_function_prototype, + .token = token, + .left = arguments, + .right = return_type, + }); + } + + fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { + if (maybe_start_token) |start_token| { + _ = try analyzer.expectToken(start_token); + } + + var list = ArrayList(Node.Index){}; + + while (analyzer.tokens[analyzer.token_i].id != end_token) { + @panic("TODO: argument list"); + } + + _ = try analyzer.expectToken(end_token); + + if (list.items.len != 0) { + @panic("TODO: arguments"); + } else { + return Node.Index.invalid; + } + } + fn block(analyzer: *Analyzer) !Node.Index { const left_brace = try analyzer.expectToken(.left_brace); const node_heap_top = analyzer.temporal_node_heap.items.len; @@ -174,11 +236,17 @@ const Analyzer = struct { const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; const node: Node = switch (statement_array.len) { + 0 => .{ + .id = .block_zero, + .token = left_brace, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }, 1 => .{ .id = .block_one, .token = left_brace, .left = statement_array[0], - .right = 0, + .right = Node.Index.invalid, }, else => |len| std.debug.panic("len: {}", .{len}), }; @@ -187,10 +255,41 @@ const Analyzer = struct { fn statement(analyzer: *Analyzer) !Node.Index { // TODO: more stuff before - const result = try analyzer.assignExpression(); - _ = try analyzer.expectToken(.semicolon); + const first_statement_token = analyzer.tokens[analyzer.token_i]; + return switch (first_statement_token.id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => { + unreachable; + }, + else => blk: { + const identifier = analyzer.getIdentifier(first_statement_token); + std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + break :blk result; + }, + }, + .fixed_keyword_while => try analyzer.whileStatement(), + else => unreachable, + }; + } - return result; + fn whileStatement(analyzer: *Analyzer) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); + + _ = try analyzer.expectToken(.left_parenthesis); + // TODO: + const while_condition = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + + const while_block = try analyzer.block(); + + return analyzer.addNode(.{ + .id = .simple_while, + .token = while_identifier_index, + .left = while_condition, + .right = while_block, + }); } fn assignExpression(analyzer: *Analyzer) !Node.Index { @@ -242,13 +341,13 @@ const Analyzer = struct { .id = .compiler_intrinsic_one, .token = hash, .left = parameters[0], - .right = 0, + .right = Node.Index.invalid, }), else => unreachable, }; } - fn expression(analyzer: *Analyzer) !Node.Index { + fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { return analyzer.expressionPrecedence(0); } @@ -259,7 +358,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis => -1, + .equal, .semicolon, .right_parenthesis, .right_brace => -1, else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), }; @@ -305,7 +404,8 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal => try analyzer.curlySuffixExpression(), + .string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(), + .left_brace => try analyzer.block(), else => |id| { log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); unreachable; @@ -324,9 +424,33 @@ const Analyzer = struct { }; } + fn noReturn(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_noreturn, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + + fn boolTrue(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_true); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_true, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + fn typeExpression(analyzer: *Analyzer) !Node.Index { return switch (analyzer.tokens[analyzer.token_i].id) { - .string_literal, .identifier => try analyzer.errorUnionExpression(), + .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false => try analyzer.errorUnionExpression(), else => |id| blk: { log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); @@ -354,7 +478,21 @@ const Analyzer = struct { unreachable; } else { if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { - unreachable; + analyzer.token_i += 1; + + var expression_list = ArrayList(Node.Index){}; + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try expression_list.append(analyzer.allocator, parameter); + analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { + .comma, .right_parenthesis => true, + .colon, .right_brace, .right_bracket => unreachable, + else => unreachable, + }); + } + + _ = try analyzer.expectToken(.right_parenthesis); + @panic("TODO"); } else { return result; } @@ -366,28 +504,34 @@ const Analyzer = struct { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; - return switch (analyzer.tokens[token_i].id) { + const token = analyzer.tokens[token_i]; + return switch (token.id) { .string_literal => blk: { analyzer.token_i += 1; break :blk analyzer.addNode(.{ .id = .string_literal, .token = token_i, - .left = 0, - .right = 0, + .left = Node.Index.invalid, + .right = Node.Index.invalid, }); }, .identifier => switch (analyzer.tokens[token_i + 1].id) { .colon => unreachable, - else => analyzer.addNode(.{ - .id = .identifier, - .token = blk: { - analyzer.token_i += 1; - break :blk token_i; - }, - .left = 0, - .right = 0, - }), + else => blk: { + const identifier = analyzer.getIdentifier(token); + analyzer.token_i += 1; + if (equal(u8, identifier, "_")) { + break :blk Node.Index.invalid; + } else break :blk analyzer.addNode(.{ + .id = .identifier, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + }, }, + .fixed_keyword_noreturn => try analyzer.noReturn(), + .fixed_keyword_true => try analyzer.boolTrue(), else => |foo| { switch (foo) { .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), @@ -405,9 +549,13 @@ const Analyzer = struct { } fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { + std.debug.print("Adding node {s}\n", .{@tagName(node.id)}); const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - return @intCast(index); + + return Node.Index{ + .value = @intCast(index), + }; } }; @@ -420,12 +568,12 @@ const Members = struct { return switch (members.len) { 0 => unreachable, 1 => .{ - .start = members.left, - .end = members.left, + .start = members.left.value, + .end = members.left.value, }, 2 => .{ - .start = members.left, - .end = members.right, + .start = members.left.value, + .end = members.right.value, }, else => unreachable, }; @@ -439,19 +587,19 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R .file = file, .allocator = allocator, }; - errdefer analyzer.free(); const node_index = try analyzer.addNode(.{ .id = .main, .token = 0, - .left = 0, - .right = 0, + .left = Node.Index.invalid, + .right = Node.Index.invalid, }); - assert(node_index == 0); + assert(node_index.value == 0); + assert(node_index.valid); const members = try analyzer.containerMembers(); const member_range = members.toRange(); - analyzer.nodes.items[0].left = @intCast(member_range.start); - analyzer.nodes.items[0].right = @intCast(member_range.end); + analyzer.nodes.items[0].left = .{ .value = @intCast(member_range.start) }; + analyzer.nodes.items[0].right = .{ .value = @intCast(member_range.end) }; const end = std.time.Instant.now() catch unreachable; @@ -471,4 +619,18 @@ const ExpressionMutabilityQualifier = enum { const Keyword = enum { @"return", @"fn", + @"while", + void, + noreturn, +}; + +// These types are meant to be used by the semantic analyzer +pub const ContainerDeclaration = struct { + members: []const Node.Index, +}; + +pub const SymbolDeclaration = struct { + type_node: Node.Index, + initialization_node: Node.Index, + mutability_token: Token.Index, }; diff --git a/src/main.zig b/src/main.zig index ce0e6df..93052c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,7 +3,6 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Compilation = @import("Compilation.zig"); -const fs = @import("fs.zig"); pub const seed = std.math.maxInt(u64); const default_src_file = "src/test/main.b"; @@ -13,17 +12,18 @@ pub fn main() !void { } fn singleCompilation(main_file_path: []const u8) !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - - const compilation = try Compilation.init(gpa.allocator()); - defer compilation.deinit(); + const allocator = std.heap.page_allocator; + const compilation = try Compilation.init(allocator); try compilation.compileModule(.{ .main_package_path = main_file_path, }); } +test { + _ = Compilation; +} + test "basic" { try singleCompilation(default_src_file); } From 48c3b5e2241f2558e150aba50faf71dff0bb2175 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Mon, 18 Sep 2023 07:31:26 -0600 Subject: [PATCH 5/6] ir --- .vscode/launch.json | 17 +- src/Compilation.zig | 97 ++++- src/backend/emit.zig | 185 +++++++++- src/backend/intermediate_representation.zig | 242 +++++++++++- src/backend/x86_64.zig | 0 src/data_structures.zig | 49 ++- src/frontend/semantic_analyzer.zig | 389 ++++++++++++-------- src/frontend/syntactic_analyzer.zig | 81 ++-- 8 files changed, 822 insertions(+), 238 deletions(-) create mode 100644 src/backend/x86_64.zig diff --git a/.vscode/launch.json b/.vscode/launch.json index 27965bb..46a561c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,13 +5,22 @@ "version": "0.2.0", "configurations": [ { - "type": "cppvsdbg", + "type": "lldb", "request": "launch", - "name": "Debug", - "program": "${workspaceFolder}/zig-out/bin/compiler.exe", + "name": "Launch", + "program": "${workspaceFolder}/zig-out/bin/compiler", "args": [], "cwd": "${workspaceFolder}", "preLaunchTask": "zig build" - } + }, + // { + // "type": "cppvsdbg", + // "request": "launch", + // "name": "Debug", + // "program": "${workspaceFolder}/zig-out/bin/compiler.exe", + // "args": [], + // "cwd": "${workspaceFolder}", + // "preLaunchTask": "zig build" + // } ] } \ No newline at end of file diff --git a/src/Compilation.zig b/src/Compilation.zig index 8c4f3e9..f471fcc 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -21,6 +21,7 @@ const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); const Node = syntactic_analyzer.Node; const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); const intermediate_representation = @import("backend/intermediate_representation.zig"); +const emit = @import("backend/emit.zig"); test { _ = lexical_analyzer; @@ -59,7 +60,8 @@ pub fn init(allocator: Allocator) !*Compilation { pub const Struct = struct { scope: Scope.Index, - initialization: Value.Index, + fields: ArrayList(Field.Index) = .{}, + pub const List = BlockList(@This()); pub const Index = List.Index; }; @@ -69,6 +71,7 @@ pub const Type = union(enum) { noreturn, bool, integer: Integer, + @"struct": Struct.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; @@ -85,16 +88,29 @@ pub const Integer = struct { /// A scope contains a bunch of declarations pub const Scope = struct { parent: Scope.Index, - type: Type.Index, + type: Type.Index = Type.Index.invalid, declarations: AutoHashMap(u32, Declaration.Index) = .{}, pub const List = BlockList(@This()); pub const Index = List.Index; }; -pub const Declaration = union(enum) { - unresolved: Node.Index, - struct_type: Struct, +pub const ScopeType = enum(u1) { + local = 0, + global = 1, +}; + +pub const Mutability = enum(u1) { + @"const", + @"var", +}; + +pub const Declaration = struct { + scope_type: ScopeType, + mutability: Mutability, + init_value: Value.Index, + name: []const u8, + pub const List = BlockList(@This()); pub const Index = List.Index; }; @@ -111,12 +127,17 @@ pub const Function = struct { pub const Index = Prototype.List.Index; }; + pub fn getBodyBlock(function: Function, module: *Module) *Block { + return module.blocks.get(function.body); + } + pub const List = BlockList(@This()); pub const Index = List.Index; }; pub const Block = struct { - foo: u32 = 0, + statements: ArrayList(Value.Index) = .{}, + reaches_end: bool, pub const List = BlockList(@This()); pub const Index = List.Index; }; @@ -129,27 +150,61 @@ pub const Field = struct { }; pub const Loop = struct { - foo: u32 = 0, + condition: Value.Index, + body: Value.Index, + breaks: bool, pub const List = BlockList(@This()); pub const Index = List.Index; }; -pub const Value = struct { - type: union(enum) { - declaration: Declaration.Index, - bool_true, - bool_false, - loop: Loop.Index, - function: Function.Index, - }, - is_const: bool, - is_comptime: bool, +const Runtime = struct { + foo: u32 = 0, +}; + +const Unresolved = struct { + node_index: Node.Index, +}; + +pub const Assignment = struct { + store: Value.Index, + load: Value.Index, pub const List = BlockList(@This()); pub const Index = List.Index; }; +pub const Value = union(enum) { + unresolved: Unresolved, + declaration: Declaration.Index, + void, + bool: bool, + undefined, + loop: Loop.Index, + function: Function.Index, + block: Block.Index, + runtime: Runtime, + assign: Assignment.Index, + type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + + pub fn isComptime(value: Value) bool { + return switch (value) { + .bool, .void, .undefined, .function => true, + else => false, + }; + } + + pub fn getType(value: *Value) !void { + switch (value.*) { + else => |t| @panic(@tagName(t)), + } + unreachable; + } +}; + pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, @@ -165,6 +220,7 @@ pub const Module = struct { types: BlockList(Type) = .{}, blocks: BlockList(Block) = .{}, loops: BlockList(Loop) = .{}, + assignments: BlockList(Assignment) = .{}, pub const Descriptor = struct { main_package_path: []const u8, @@ -354,7 +410,12 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]); - try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); + var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); + + switch (@import("builtin").cpu.arch) { + .x86_64 => |arch| try emit.get(arch).initialize(compilation.base_allocator, &ir), + else => {}, + } } fn generateAST() !void {} diff --git a/src/backend/emit.zig b/src/backend/emit.zig index 17b708d..a8f652f 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -6,7 +6,13 @@ const assert = std.debug.assert; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; -const ir = @import("ir.zig"); +const ir = @import("intermediate_representation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const AutoHashMap = data_structures.AutoHashMap; + +const jit_callconv = .SysV; const Section = struct { content: []align(page_size) u8, @@ -39,8 +45,13 @@ const Result = struct { const windows = std.os.windows; break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; }, - .linux => blk: { - const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; + .linux, .macos => |os_tag| blk: { + const execute_flag: switch (os_tag) { + .linux => u32, + .macos => c_int, + else => unreachable, + } = if (flags.executable) std.os.PROT.EXEC else 0; + const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); @@ -60,16 +71,163 @@ const Result = struct { image.sections.text.index += 1; } - fn getEntryPoint(image: *const Result, comptime Function: type) *const Function { + fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { comptime { - assert(@typeInfo(Function) == .Fn); + assert(@typeInfo(FunctionType) == .Fn); } assert(image.sections.text.content.len > 0); - return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); + return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); } }; +const SimpleRelocation = struct { + source: u32, + write_offset: u8, + instruction_len: u8, + size: u8, +}; + +const RelocationManager = struct { + in_function_relocations: data_structures.AutoHashMap(ir.BasicBlock.Index, ArrayList(SimpleRelocation)) = .{}, +}; + +pub fn get(comptime arch: std.Target.Cpu.Arch) type { + const backend = switch (arch) { + .x86_64 => @import("x86_64.zig"), + else => @compileError("Architecture not supported"), + }; + _ = backend; + const Function = struct { + block_byte_counts: ArrayList(u16), + byte_count: u32 = 0, + relocations: ArrayList(Relocation) = .{}, + block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, + const Relocation = struct { + source: ir.BasicBlock.Index, + destination: ir.BasicBlock.Index, + offset_offset: u8, + preferred_instruction_len: u8, + }; + }; + + const InstructionSelector = struct { + functions: ArrayList(Function), + }; + _ = InstructionSelector; + + return struct { + pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { + _ = intermediate; + _ = allocator; + // var function_iterator = intermediate.functions.iterator(); + // var instruction_selector = InstructionSelector{ + // .functions = try ArrayList(Function).initCapacity(allocator, intermediate.functions.len), + // }; + // while (function_iterator.next()) |ir_function| { + // const function = instruction_selector.functions.addOneAssumeCapacity(); + // function.* = .{ + // .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), + // }; + // try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); + // for (ir_function.blocks.items, 0..) |block_index, index| { + // function.block_map.putAssumeCapacity(allocator, block_index, @intCast(index)); + // } + // + // for (ir_function.blocks.items) |block_index| { + // const block = intermediate.blocks.get(block_index); + // var block_byte_count: u16 = 0; + // for (block.instructions.items) |instruction_index| { + // const instruction = intermediate.instructions.get(instruction_index).*; + // switch (instruction) { + // .phi => unreachable, + // .ret => unreachable, + // .jump => { + // block_byte_count += 2; + // }, + // } + // } + // function.block_byte_counts.appendAssumeCapacity(block_byte_count); + // } + // } + // unreachable; + } + }; +} + +pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { + _ = allocator; + var result = try Result.create(); + _ = result; + var relocation_manager = RelocationManager{}; + _ = relocation_manager; + + var function_iterator = intermediate.functions.iterator(); + _ = function_iterator; + // while (function_iterator.next()) |function| { + // defer relocation_manager.in_function_relocations.clearRetainingCapacity(); + // + // for (function.blocks.items) |block_index| { + // if (relocation_manager.in_function_relocations.getPtr(block_index)) |relocations| { + // const current_offset: i64 = @intCast(result.sections.text.index); + // _ = current_offset; + // for (relocations.items) |relocation| switch (relocation.size) { + // inline @sizeOf(u8), @sizeOf(u32) => |relocation_size| { + // const Elem = switch (relocation_size) { + // @sizeOf(u8) => u8, + // @sizeOf(u32) => u32, + // else => unreachable, + // }; + // const Ptr = *align(1) Elem; + // _ = Ptr; + // const relocation_slice = result.sections.text.content[relocation.source + relocation.write_offset ..][0..relocation_size]; + // _ = relocation_slice; + // // std.math.cast( + // // + // unreachable; + // }, + // else => unreachable, + // }; + // // const ptr: *align(1) u32 = @ptrCast(&result.sections.text[relocation_source..][0..@sizeOf(u32)]); + // // ptr.* = + // // try relocations.append(allocator, @intCast(result.sections.text[)); + // } + // + // const block = intermediate.blocks.get(block_index); + // + // for (block.instructions.items) |instruction_index| { + // const instruction = intermediate.instructions.get(instruction_index); + // switch (instruction.*) { + // .jump => |jump_index| { + // const jump = intermediate.jumps.get(jump_index); + // assert(@as(u32, @bitCast(jump.source)) == @as(u32, @bitCast(block_index))); + // const relocation_index = result.sections.text.index + 1; + // if (@as(u32, @bitCast(jump.destination)) <= @as(u32, @bitCast(jump.source))) { + // unreachable; + // } else { + // result.appendCode(&(.{jmp_rel_32} ++ .{0} ** @sizeOf(u32))); + // const lookup_result = try relocation_manager.in_function_relocations.getOrPut(allocator, jump.destination); + // if (!lookup_result.found_existing) { + // lookup_result.value_ptr.* = .{}; + // } + // + // try lookup_result.value_ptr.append(allocator, .{ + // .source = @intCast(relocation_index), + // .write_offset = @sizeOf(u8), + // .instruction_len = @sizeOf(u8) + @sizeOf(u32), + // .size = @sizeOf(u32), + // }); + // } + // }, + // else => |t| @panic(@tagName(t)), + // } + // } + // } + // unreachable; + // } + // unreachable; +} + const Rex = enum(u8) { b = upper_4_bits | (1 << 0), x = upper_4_bits | (1 << 1), @@ -115,6 +273,7 @@ const prefix_rep = 0xf3; const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)}; const prefix_16_bit_operand = [1]u8{0x66}; +const jmp_rel_32 = 0xe9; const ret = 0xc3; const mov_a_imm = [1]u8{0xb8}; const mov_reg_imm8: u8 = 0xb0; @@ -142,7 +301,7 @@ test "ret void" { var image = try Result.create(); image.appendCodeByte(ret); - const function_pointer = image.getEntryPoint(fn () callconv(.C) void); + const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) void); function_pointer(); } @@ -167,7 +326,7 @@ test "ret integer" { movAImm(&image, expected_number); image.appendCodeByte(ret); - const function_pointer = image.getEntryPoint(fn () callconv(.C) Int); + const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) Int); const result = function_pointer(); try expect(result == expected_number); } @@ -216,7 +375,7 @@ test "ret integer argument" { movRmR(&image, Int, .a, .di); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int); + const functionPointer = image.getEntryPoint(fn (Int) callconv(jit_callconv) Int); const result = functionPointer(number); try expectEqual(number, result); } @@ -246,7 +405,7 @@ test "ret sub arguments" { subRmR(&image, Int, .a, .si); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int); + const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(jit_callconv) Int); const result = functionPointer(a, b); try expectEqual(a - b, result); } @@ -328,7 +487,7 @@ fn TestIntegerBinaryOperation(comptime T: type) type { dstRmSrcR(&image, T, test_case.opcode, .a, .si); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T); + const functionPointer = image.getEntryPoint(fn (T, T) callconv(jit_callconv) T); const expected = test_case.callback(a, b); const result = functionPointer(a, b); if (should_log) { @@ -350,7 +509,7 @@ test "call after" { @as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); functionPointer(); } @@ -369,7 +528,7 @@ test "call before" { image.appendCode(&second_call); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); functionPointer(); } diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 15ed936..501319f 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -1,9 +1,243 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const print = std.debug.print; + const Compilation = @import("../Compilation.zig"); const Module = Compilation.Module; const Package = Compilation.Package; -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_declaration: Compilation.Declaration.Index) !void { - _ = main_declaration; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const BlockList = data_structures.BlockList; + +pub const Result = struct { + functions: BlockList(Function) = .{}, + blocks: BlockList(BasicBlock) = .{}, + instructions: BlockList(Instruction) = .{}, + jumps: BlockList(Jump) = .{}, +}; + +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { + _ = main_file; _ = package; - _ = module; - _ = compilation; + print("\nFunction count: {}\n", .{module.functions.len}); + + var function_iterator = module.functions.iterator(); + var builder = Builder{ + .allocator = compilation.base_allocator, + .module = module, + }; + + while (function_iterator.next()) |sema_function| { + print("\nFunction: {}\n", .{sema_function}); + + try builder.function(sema_function); + } + + return builder.ir; } + +pub const BasicBlock = struct { + instructions: ArrayList(Instruction.Index) = .{}, + incomplete_phis: ArrayList(Instruction.Index) = .{}, + filled: bool = false, + sealed: bool = false, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + + fn seal(basic_block: *BasicBlock) void { + for (basic_block.incomplete_phis.items) |incomplete_phi| { + _ = incomplete_phi; + unreachable; + } + + basic_block.sealed = true; + } +}; + +const Instruction = union(enum) { + jump: Jump.Index, + phi: Phi.Index, + ret: Ret, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Phi = struct { + foo: u32 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Ret = struct { + value: Instruction.Index, +}; + +pub const Jump = struct { + source: BasicBlock.Index, + destination: BasicBlock.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Function = struct { + blocks: ArrayList(BasicBlock.Index) = .{}, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Builder = struct { + allocator: Allocator, + ir: Result = .{}, + module: *Module, + current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, + current_function_index: Function.Index = Function.Index.invalid, + + fn function(builder: *Builder, sema_function: Compilation.Function) !void { + builder.current_function_index = try builder.ir.functions.append(builder.allocator, .{}); + // TODO: arguments + builder.current_basic_block = try builder.newBlock(); + + const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); + const is_noreturn = return_type.* == .noreturn; + if (!is_noreturn) { + const exit_block = try builder.newBlock(); + const phi = try builder.appendToBlock(exit_block, .{ + .phi = Phi.Index.invalid, + }); + const ret = try builder.appendToBlock(exit_block, .{ + .ret = .{ + .value = phi, + }, + }); + _ = ret; + } + const sema_block = sema_function.getBodyBlock(builder.module); + try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); + + try builder.dumpFunction(std.io.getStdErr().writer(), builder.current_function_index); + } + + fn dumpFunction(builder: *Builder, writer: anytype, index: Function.Index) !void { + const f = builder.ir.functions.get(index); + try writer.writeAll("Hello world!\n"); + print("Function blocks: {}\n", .{f.blocks.items.len}); + var function_instruction_index: usize = 0; + for (f.blocks.items, 0..) |block_index, function_block_index| { + print("#{}:\n", .{function_block_index}); + const function_block = builder.ir.blocks.get(block_index); + for (function_block.instructions.items) |instruction_index| { + const instruction = builder.ir.instructions.get(instruction_index); + print("%{}: {}\n", .{ function_instruction_index, instruction }); + function_instruction_index += 1; + } + + print("\n", .{}); + } + } + + fn blockInsideBasicBlock(builder: *Builder, sema_block: *Compilation.Block, block_index: BasicBlock.Index) !BasicBlock.Index { + builder.current_basic_block = block_index; + try builder.block(sema_block, .{}); + return builder.current_basic_block; + } + + const BlockOptions = packed struct { + emit_exit_block: bool = true, + }; + + fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) error{OutOfMemory}!void { + for (sema_block.statements.items) |sema_statement_index| { + const sema_statement = builder.module.values.get(sema_statement_index); + switch (sema_statement.*) { + .loop => |loop_index| { + const sema_loop = builder.module.loops.get(loop_index); + const sema_loop_condition = builder.module.values.get(sema_loop.condition); + const sema_loop_body = builder.module.values.get(sema_loop.body); + const condition: Compilation.Value.Index = switch (sema_loop_condition.*) { + .bool => |bool_value| switch (bool_value) { + true => Compilation.Value.Index.invalid, + false => unreachable, + }, + else => |t| @panic(@tagName(t)), + }; + + const original_block = builder.current_basic_block; + const jump_to_loop = try builder.append(.{ + .jump = undefined, + }); + const loop_body_block = try builder.newBlock(); + const loop_prologue_block = if (options.emit_exit_block) try builder.newBlock() else BasicBlock.Index.invalid; + + const loop_head_block = switch (condition.valid) { + false => loop_body_block, + true => unreachable, + }; + + builder.ir.instructions.get(jump_to_loop).jump = try builder.jump(.{ + .source = original_block, + .destination = loop_head_block, + }); + + const sema_body_block = builder.module.blocks.get(sema_loop_body.block); + builder.current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); + if (loop_prologue_block.valid) { + builder.ir.blocks.get(loop_prologue_block).seal(); + } + + if (sema_body_block.reaches_end) { + _ = try builder.append(.{ + .jump = try builder.jump(.{ + .source = builder.current_basic_block, + .destination = loop_head_block, + }), + }); + } + + builder.ir.blocks.get(builder.current_basic_block).filled = true; + builder.ir.blocks.get(loop_body_block).seal(); + if (!loop_head_block.eq(loop_body_block)) { + unreachable; + } + + if (loop_prologue_block.valid) { + builder.current_basic_block = loop_prologue_block; + } + }, + else => |t| @panic(@tagName(t)), + } + } + } + + fn jump(builder: *Builder, jump_descriptor: Jump) !Jump.Index { + const destination_block = builder.ir.blocks.get(jump_descriptor.destination); + assert(!destination_block.sealed); + return try builder.ir.jumps.append(builder.allocator, jump_descriptor); + } + + fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { + assert(builder.current_basic_block.valid); + return builder.appendToBlock(builder.current_basic_block, instruction); + } + + fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { + const instruction_index = try builder.ir.instructions.append(builder.allocator, instruction); + try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_index); + + return instruction_index; + } + + fn newBlock(builder: *Builder) !BasicBlock.Index { + const new_block_index = try builder.ir.blocks.append(builder.allocator, .{}); + const current_function = builder.ir.functions.get(builder.current_function_index); + const function_block_index = current_function.blocks.items.len; + try current_function.blocks.append(builder.allocator, new_block_index); + + print("Adding block: {}\n", .{function_block_index}); + + return new_block_index; + } +}; diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/data_structures.zig b/src/data_structures.zig index 6edf4d2..cc47ff3 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -36,17 +36,59 @@ pub fn BlockList(comptime T: type) type { const List = @This(); pub const Index = packed struct(u32) { - valid: bool = true, + block: u24, index: u6, - block: u25, + _reserved: bool = false, + valid: bool = true, pub const invalid = Index{ .valid = false, .index = 0, .block = 0, }; + + pub fn eq(index: Index, other: Index) bool { + return @as(u32, @bitCast(index)) == @as(u32, @bitCast(other)); + } }; + pub const Iterator = struct { + block_index: u26, + element_index: u7, + list: *const List, + + pub fn next(i: *Iterator) ?T { + return if (i.nextPointer()) |ptr| ptr.* else null; + } + + pub fn nextPointer(i: *Iterator) ?*T { + if (i.element_index >= item_count) { + i.block_index += 1; + i.element_index = 0; + } + + while (i.block_index < i.list.blocks.items.len) : (i.block_index += 1) { + while (i.element_index < item_count) : (i.element_index += 1) { + if (i.list.blocks.items[i.block_index].bitset.isSet(i.element_index)) { + const index = i.element_index; + i.element_index += 1; + return &i.list.blocks.items[i.block_index].items[index]; + } + } + } + + return null; + } + }; + + pub fn iterator(list: *const List) Iterator { + return .{ + .block_index = 0, + .element_index = 0, + .list = list, + }; + } + pub fn get(list: *List, index: Index) *T { assert(index.valid); return &list.blocks.items[index.block].items[index.index]; @@ -59,6 +101,7 @@ pub fn BlockList(comptime T: type) type { // Follow the guess if (list.blocks.items[list.first_block].allocateIndex()) |index| { list.blocks.items[list.first_block].items[index] = element; + list.len += 1; return .{ .index = index, .block = @intCast(list.first_block), @@ -69,8 +112,10 @@ pub fn BlockList(comptime T: type) type { } else { const block_index = list.blocks.items.len; const new_block = list.blocks.addOneAssumeCapacity(); + new_block.* = .{}; const index = new_block.allocateIndex() catch unreachable; new_block.items[index] = element; + list.len += 1; return .{ .index = index, .block = @intCast(block_index), diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index 761054b..65be4e4 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -7,10 +7,12 @@ const File = Compilation.File; const Module = Compilation.Module; const Package = Compilation.Package; +const Assignment = Compilation.Assignment; const Block = Compilation.Block; const Declaration = Compilation.Declaration; const Field = Compilation.Field; const Function = Compilation.Function; +const Loop = Compilation.Loop; const Scope = Compilation.Scope; const Struct = Compilation.Struct; const Type = Compilation.Type; @@ -45,90 +47,157 @@ const Analyzer = struct { fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { const comptime_node = analyzer.nodes[node_index.unwrap()]; - const comptime_block_node = analyzer.nodes[comptime_node.left.unwrap()]; - var statement_node_indices = ArrayList(Node.Index){}; - switch (comptime_block_node.id) { - .block_one => { - try statement_node_indices.append(analyzer.allocator, comptime_block_node.left); + const comptime_block = try analyzer.block(scope, .{ .none = {} }, comptime_node.left); + return try analyzer.module.values.append(analyzer.allocator, .{ + .block = comptime_block, + }); + } + + fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Assignment.Index { + print("Assign: #{}", .{node_index.value}); + const node = analyzer.nodes[node_index.unwrap()]; + assert(node.id == .assign); + const Result = struct { + left: Value.Index, + right: Value.Index, + }; + const result: Result = switch (node.left.valid) { + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + false => .{ + .left = Value.Index.invalid, + .right = try analyzer.expression(scope, ExpectType.none, node.right), }, + true => { + const left_node = analyzer.nodes[node.left.unwrap()]; + print("left node index: {}. Left node: {}", .{ node.left, left_node }); + // const id = analyzer.tokenIdentifier(.token); + // print("id: {s}\n", .{id}); + const left = try analyzer.expression(scope, ExpectType.none, node.left); + _ = left; + unreachable; + }, + }; + + print("Assignment: L: {}. R: {}\n", .{ result.left, result.right }); + + if (result.left.valid and analyzer.module.values.get(result.left).isComptime() and analyzer.module.values.get(result.right).isComptime()) { + unreachable; + } else { + const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ + .store = result.left, + .load = result.right, + }); + return assignment_index; + } + } + + fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + var reaches_end = true; + const block_node = analyzer.nodes[node_index.unwrap()]; + var statement_nodes = ArrayList(Node.Index){}; + switch (block_node.id) { + .block_one, .comptime_block_one => { + try statement_nodes.append(analyzer.allocator, block_node.left); + }, + .block_zero, .comptime_block_zero => {}, else => |t| @panic(@tagName(t)), } - var statement_values = ArrayList(Value.Index){}; + const is_comptime = switch (block_node.id) { + .comptime_block_zero, .comptime_block_one => true, + .block_zero, .block_one => false, + else => |t| @panic(@tagName(t)), + }; + _ = is_comptime; + + var statements = ArrayList(Value.Index){}; + + for (statement_nodes.items) |statement_node_index| { + if (!reaches_end) { + unreachable; + } - for (statement_node_indices.items) |statement_node_index| { const statement_node = analyzer.nodes[statement_node_index.unwrap()]; - switch (statement_node.id) { - .assign => { - const assign_expression = try analyzer.assign(scope, statement_node_index); - try statement_values.append(analyzer.allocator, assign_expression); + const statement_value = switch (statement_node.id) { + inline .assign, .simple_while => |statement_id| blk: { + const specific_value_index = switch (statement_id) { + .assign => try analyzer.assign(scope, statement_node_index), + .simple_while => statement: { + const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{ + .condition = Value.Index.invalid, + .body = Value.Index.invalid, + .breaks = false, + }); + const loop_structure = analyzer.module.loops.get(loop_index); + const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); + const while_body = try analyzer.expression(scope, expect_type, statement_node.right); + loop_structure.condition = while_condition; + loop_structure.body = while_body; + + reaches_end = loop_structure.breaks or while_condition.valid; + + break :statement loop_index; + }, + else => unreachable, + }; + const value = @unionInit(Value, switch (statement_id) { + .assign => "assign", + .simple_while => "loop", + else => unreachable, + }, specific_value_index); + const value_index = try analyzer.module.values.append(analyzer.allocator, value); + break :blk value_index; }, else => |t| @panic(@tagName(t)), - } + }; + try statements.append(analyzer.allocator, statement_value); } - // TODO - - return Value.Index.invalid; + return try analyzer.module.blocks.append(analyzer.allocator, .{ + .statements = statements, + .reaches_end = reaches_end, + }); } - fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { - const node = analyzer.nodes[node_index.unwrap()]; - - print("\nAssign. Left: {}. Right: {}\n", .{ node.left, node.right }); - // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` - if (node.left.valid) { - @panic("Not discard"); - } else { - return try analyzer.expression(scope, ExpectType{ .none = {} }, node.right); - } + fn whileExpression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Loop.Index { + _ = node; + _ = expect_type; + _ = scope; + _ = analyzer; } - fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Block.Index { - const block_node = analyzer.nodes[node_index.unwrap()]; - var statements = ArrayList(Node.Index){}; - switch (block_node.id) { - .block_one => { - try statements.append(analyzer.allocator, block_node.left); - }, - .block_zero => {}, + fn resolve(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, value: *Value) !void { + const node_index = switch (value.*) { + .unresolved => |unresolved| unresolved.node_index, else => |t| @panic(@tagName(t)), - } - - for (statements.items) |statement_node_index| { - _ = try analyzer.expression(scope, expect_type, statement_node_index); - // const statement_node = analyzer.nodes[statement_node_index.unwrap()]; - // - // switch (statement_node.id) { - // try .simple_while => { - // const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); - // _ = while_condition; - // const while_block = try analyzer.block(scope, expect_type, statement_node.right); - // _ = while_block; - // unreachable; - // }, - // else => |t| @panic(@tagName(t)), - // } - } - - return try analyzer.module.blocks.append(analyzer.allocator, .{}); + }; + value.* = try analyzer.resolveNode(scope, expect_type, node_index); } - fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) error{OutOfMemory}!Value.Index { + fn doIdentifier(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Value.Index { + assert(node.id == .identifier); + const identifier_hash = try analyzer.identifierFromToken(node.token); + // TODO: search in upper scopes too + const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); + if (identifier_scope_lookup.found_existing) { + const declaration_index = identifier_scope_lookup.value_ptr.*; + const declaration = analyzer.module.declarations.get(declaration_index); + const init_value = analyzer.module.values.get(declaration.init_value); + try analyzer.resolve(scope, expect_type, init_value); + if (init_value.* != .runtime and declaration.mutability == .@"const") { + return declaration.init_value; + } else { + unreachable; + } + } else { + @panic("TODO: not found"); + } + } + + fn resolveNode(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Value { const node = analyzer.nodes[node_index.unwrap()]; return switch (node.id) { - .identifier => blk: { - const identifier_hash = try analyzer.identifierFromToken(node.token); - // TODO: search in upper scopes too - const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); - if (identifier_scope_lookup.found_existing) { - const declaration_index = identifier_scope_lookup.value_ptr.*; - const declaration = analyzer.module.declarations.get(declaration_index); - break :blk try analyzer.analyzeDeclaration(scope, declaration); - } else { - @panic("TODO: not found"); - } - }, + .identifier => unreachable, .compiler_intrinsic_one => blk: { const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; @@ -148,14 +217,9 @@ const Analyzer = struct { unreachable; } - const file_struct_declaration_index = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file); - break :blk try analyzer.module.values.append(analyzer.allocator, .{ - .type = .{ - .declaration = file_struct_declaration_index, - }, - .is_const = true, - .is_comptime = true, - }); + break :blk .{ + .type = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file), + }; }, else => unreachable, } @@ -174,15 +238,27 @@ const Analyzer = struct { .prototype = function_prototype_index, .body = function_body, }); - const value_index = try analyzer.module.values.append(analyzer.allocator, .{ - .type = .{ - .function = function_index, - }, - .is_const = true, - .is_comptime = true, - }); - break :blk value_index; + break :blk .{ + .function = function_index, + }; }, + .keyword_true => unreachable, + .simple_while => unreachable, + // .assign => try analyzer.assign(scope, node_index), + .block_zero, .block_one => blk: { + const block_index = try analyzer.block(scope, expect_type, node_index); + break :blk .{ + .block = block_index, + }; + }, + else => |t| @panic(@tagName(t)), + }; + } + + fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .identifier => analyzer.doIdentifier(scope, expect_type, node), .keyword_true => blk: { switch (expect_type) { .none => {}, @@ -195,22 +271,9 @@ const Analyzer = struct { break :blk bool_true; }, - .simple_while => blk: { - const while_condition = try analyzer.expression(scope, ExpectType.boolean, node.left); - _ = while_condition; - const while_body = try analyzer.block(scope, expect_type, node.right); - _ = while_body; - const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{}); - const value_index = try analyzer.module.values.append(analyzer.allocator, .{ - .type = .{ - .loop = loop_index, - }, - // TODO: - .is_const = false, - .is_comptime = false, - }); - break :blk value_index; - }, + .block_zero => try analyzer.module.values.append(analyzer.allocator, .{ + .block = try analyzer.block(scope, expect_type, node_index), + }), else => |t| @panic(@tagName(t)), }; } @@ -249,44 +312,36 @@ const Analyzer = struct { } fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index { - switch (declaration.*) { - .unresolved => |node_index| { - const declaration_node = analyzer.nodes[node_index.unwrap()]; - return switch (declaration_node.id) { - .simple_variable_declaration => blk: { - const expect_type = switch (declaration_node.left.valid) { - true => unreachable, - false => @unionInit(ExpectType, "none", {}), - }; - - const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); - const value = analyzer.module.values.get(initialization_expression); - if (value.is_comptime and value.is_const) { - break :blk initialization_expression; - } - - unreachable; - }, - else => |t| @panic(@tagName(t)), - }; - }, - .struct_type => unreachable, - } + _ = declaration; + _ = scope; + _ = analyzer; + // switch (declaration.*) { + // .unresolved => |node_index| { + // const declaration_node = analyzer.nodes[node_index.unwrap()]; + // return switch (declaration_node.id) { + // .simple_variable_declaration => blk: { + // const expect_type = switch (declaration_node.left.valid) { + // true => unreachable, + // false => @unionInit(ExpectType, "none", {}), + // }; + // + // const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); + // const value = analyzer.module.values.get(initialization_expression); + // if (value.is_comptime and value.is_const) { + // break :blk initialization_expression; + // } + // + // unreachable; + // }, + // else => |t| @panic(@tagName(t)), + // }; + // }, + // .struct_type => unreachable, + // } @panic("TODO: analyzeDeclaration"); } - fn containerMember(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !void { - const node = analyzer.nodes[node_index.unwrap()]; - switch (node.id) { - .simple_variable_declaration => {}, - .@"comptime" => { - _ = try analyzer.comptimeBlock(scope, node_index); - }, - else => std.debug.panic("Tag: {}", .{node.id}), - } - } - fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void { if (symbol_declaration.type_node.get()) |type_node_index| { _ = type_node_index; @@ -339,21 +394,24 @@ const Analyzer = struct { }; } - fn structDeclaration(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Declaration.Index { + fn structType(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Type.Index { _ = index; - const new_scope = try analyzer.allocateScope(parent_scope, Type.Index.invalid); + const new_scope = try analyzer.allocateScope(.{ .parent = parent_scope }); const scope = new_scope.ptr; const is_file = !parent_scope.valid; assert(is_file); - // TODO: do it properly - const declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ - .struct_type = .{ - .scope = new_scope.index, - .initialization = if (is_file) Value.Index.invalid else unreachable, - }, + + const struct_index = try analyzer.module.structs.append(analyzer.allocator, .{ + .scope = new_scope.index, }); - // TODO: + const struct_type = analyzer.module.structs.get(struct_index); + const type_index = try analyzer.module.types.append(analyzer.allocator, .{ + .@"struct" = struct_index, + }); + scope.type = type_index; + + _ = struct_type; assert(container_declaration.members.len > 0); const count = blk: { @@ -391,6 +449,11 @@ const Analyzer = struct { switch (declaration_node.id) { .@"comptime" => {}, .simple_variable_declaration => { + const mutability: Compilation.Mutability = switch (analyzer.tokens[declaration_node.token].id) { + .fixed_keyword_const => .@"const", + .fixed_keyword_var => .@"var", + else => |t| @panic(@tagName(t)), + }; const expected_identifier_token_index = declaration_node.token + 1; const expected_identifier_token = analyzer.tokens[expected_identifier_token_index]; if (expected_identifier_token.id != .identifier) { @@ -416,7 +479,14 @@ const Analyzer = struct { } const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ - .unresolved = declaration_node_index, + .name = declaration_name, + .scope_type = .global, + .mutability = mutability, + .init_value = try analyzer.module.values.append(analyzer.allocator, .{ + .unresolved = .{ + .node_index = declaration_node.right, + }, + }), }); scope_lookup.value_ptr.* = container_declaration_index; @@ -429,8 +499,9 @@ const Analyzer = struct { for (declaration_nodes.items) |declaration_node_index| { const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; switch (declaration_node.id) { - .@"comptime", .simple_variable_declaration => try analyzer.containerMember(scope, declaration_node_index), - else => unreachable, + .@"comptime" => _ = try analyzer.comptimeBlock(scope, declaration_node_index), + .simple_variable_declaration => {}, + else => |t| @panic(@tagName(t)), } } @@ -441,7 +512,7 @@ const Analyzer = struct { @panic("TODO: fields"); } - return declaration_index; + return type_index; } const MemberType = enum { @@ -494,11 +565,8 @@ const Analyzer = struct { index: Scope.Index, }; - fn allocateScope(analyzer: *Analyzer, parent_scope: Scope.Index, scope_type: Type.Index) !ScopeAllocation { - const scope_index = try analyzer.module.scopes.append(analyzer.allocator, .{ - .parent = parent_scope, - .type = scope_type, - }); + fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !ScopeAllocation { + const scope_index = try analyzer.module.scopes.append(analyzer.allocator, scope_value); const scope = analyzer.module.scopes.get(scope_index); return .{ @@ -512,6 +580,9 @@ const ExpectType = union(enum) { none, type_index: Type.Index, + pub const none = ExpectType{ + .none = {}, + }; pub const boolean = ExpectType{ .type_index = type_boolean, }; @@ -562,7 +633,7 @@ const HardwareSignedIntegerType = enum { const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; }; -pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); } @@ -596,25 +667,17 @@ pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) } _ = try module.values.append(compilation.base_allocator, .{ - .type = .{ - .bool_false = {}, - }, - .is_const = true, - .is_comptime = true, + .bool = false, }); _ = try module.values.append(compilation.base_allocator, .{ - .type = .{ - .bool_true = {}, - }, - .is_const = true, - .is_comptime = true, + .bool = true, }); return analyzeExistingPackage(compilation, module, package); } -pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { +pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { const package_import = try module.importPackage(compilation.base_allocator, package); assert(!package_import.is_new); const package_file = package_import.file; @@ -622,7 +685,7 @@ pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, packag return try analyzeFile(compilation.base_allocator, module, package_file); } -pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Declaration.Index { +pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Type.Index { assert(file.status == .parsed); var analyzer = Analyzer{ @@ -634,7 +697,7 @@ pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Declarat .module = module, }; - const result = try analyzer.structDeclaration(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); + const result = try analyzer.structType(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); return result; } diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 8bfbe81..36f7fcd 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -17,6 +17,10 @@ pub const Result = struct { time: u64, }; +pub const Options = packed struct { + is_comptime: bool, +}; + // TODO: pack it to be more efficient pub const Node = packed struct(u128) { token: u32, @@ -68,6 +72,8 @@ pub const Node = packed struct(u128) { function_definition = 16, keyword_noreturn = 17, keyword_true = 18, + comptime_block_zero = 19, + comptime_block_one = 20, }; }; @@ -111,7 +117,7 @@ const Analyzer = struct { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .left_brace => blk: { analyzer.token_i += 1; - const comptime_block = try analyzer.block(); + const comptime_block = try analyzer.block(.{ .is_comptime = true }); break :blk .{ .id = .@"comptime", @@ -181,7 +187,9 @@ const Analyzer = struct { fn function(analyzer: *Analyzer) !Node.Index { const token = analyzer.token_i; const function_prototype = try analyzer.functionPrototype(); - const function_body = try analyzer.block(); + const is_comptime = false; + _ = is_comptime; + const function_body = try analyzer.block(.{ .is_comptime = false }); return analyzer.addNode(.{ .id = .function_definition, .token = token, @@ -223,27 +231,50 @@ const Analyzer = struct { } } - fn block(analyzer: *Analyzer) !Node.Index { + fn block(analyzer: *Analyzer, options: Options) !Node.Index { const left_brace = try analyzer.expectToken(.left_brace); const node_heap_top = analyzer.temporal_node_heap.items.len; defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); while (analyzer.tokens[analyzer.token_i].id != .right_brace) { - const statement_index = try analyzer.statement(); + const first_statement_token = analyzer.tokens[analyzer.token_i]; + const statement_index = switch (first_statement_token.id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => { + unreachable; + }, + else => blk: { + const identifier = analyzer.getIdentifier(first_statement_token); + std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + break :blk result; + }, + }, + .fixed_keyword_while => try analyzer.whileStatement(options), + else => unreachable, + }; try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); } + _ = try analyzer.expectToken(.right_brace); const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; const node: Node = switch (statement_array.len) { 0 => .{ - .id = .block_zero, + .id = switch (options.is_comptime) { + true => .comptime_block_zero, + false => .block_zero, + }, .token = left_brace, .left = Node.Index.invalid, .right = Node.Index.invalid, }, 1 => .{ - .id = .block_one, + .id = switch (options.is_comptime) { + true => .comptime_block_one, + false => .block_one, + }, .token = left_brace, .left = statement_array[0], .right = Node.Index.invalid, @@ -253,28 +284,7 @@ const Analyzer = struct { return analyzer.addNode(node); } - fn statement(analyzer: *Analyzer) !Node.Index { - // TODO: more stuff before - const first_statement_token = analyzer.tokens[analyzer.token_i]; - return switch (first_statement_token.id) { - .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { - .colon => { - unreachable; - }, - else => blk: { - const identifier = analyzer.getIdentifier(first_statement_token); - std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); - const result = try analyzer.assignExpression(); - _ = try analyzer.expectToken(.semicolon); - break :blk result; - }, - }, - .fixed_keyword_while => try analyzer.whileStatement(), - else => unreachable, - }; - } - - fn whileStatement(analyzer: *Analyzer) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + fn whileStatement(analyzer: *Analyzer, options: Options) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); _ = try analyzer.expectToken(.left_parenthesis); @@ -282,7 +292,7 @@ const Analyzer = struct { const while_condition = try analyzer.expression(); _ = try analyzer.expectToken(.right_parenthesis); - const while_block = try analyzer.block(); + const while_block = try analyzer.block(options); return analyzer.addNode(.{ .id = .simple_while, @@ -300,7 +310,7 @@ const Analyzer = struct { else => unreachable, }; - return analyzer.addNode(.{ + const node = Node{ .id = expression_id, .token = blk: { const token_i = analyzer.token_i; @@ -309,7 +319,9 @@ const Analyzer = struct { }, .left = expr, .right = try analyzer.expression(), - }); + }; + std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); + return analyzer.addNode(node); } fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { @@ -405,7 +417,8 @@ const Analyzer = struct { else => try analyzer.curlySuffixExpression(), }, .string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(), - .left_brace => try analyzer.block(), + // todo:? + // .left_brace => try analyzer.block(), else => |id| { log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); unreachable; @@ -519,6 +532,7 @@ const Analyzer = struct { .colon => unreachable, else => blk: { const identifier = analyzer.getIdentifier(token); + std.debug.print("identifier: {s}\n", .{identifier}); analyzer.token_i += 1; if (equal(u8, identifier, "_")) { break :blk Node.Index.invalid; @@ -549,10 +563,9 @@ const Analyzer = struct { } fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { - std.debug.print("Adding node {s}\n", .{@tagName(node.id)}); const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - + std.debug.print("Adding node #{} {s}\n", .{ index, @tagName(node.id) }); return Node.Index{ .value = @intCast(index), }; From a2535ac5120accd6761f960fbfc2af517d74ca60 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Fri, 22 Sep 2023 09:16:39 -0600 Subject: [PATCH 6/6] x86_64 backend --- src/backend/emit.zig | 307 +++++++++++++++++++++++++------------------ 1 file changed, 177 insertions(+), 130 deletions(-) diff --git a/src/backend/emit.zig b/src/backend/emit.zig index a8f652f..768b33d 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -71,6 +71,14 @@ const Result = struct { image.sections.text.index += 1; } + fn appendOnlyOpcodeSkipInstructionBytes(image: *Result, instruction: Instruction) void { + const instruction_descriptor = instruction_descriptors.get(instruction); + assert(instruction_descriptor.opcode_byte_count == instruction_descriptor.operand_offset); + image.appendCode(instruction_descriptor.getOpcode()); + + image.sections.text.index += instruction_descriptor.size - instruction_descriptor.opcode_byte_count; + } + fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { comptime { assert(@typeInfo(FunctionType) == .Fn); @@ -81,15 +89,86 @@ const Result = struct { } }; -const SimpleRelocation = struct { - source: u32, - write_offset: u8, - instruction_len: u8, - size: u8, +const Instruction = enum { + jmp_rel_8, + + const Descriptor = struct { + operands: [4]Operand, + operand_count: u3, + operand_offset: u5, + size: u8, + opcode: [2]u8, + opcode_byte_count: u8, + + fn getOperands(descriptor: Descriptor) []const Operand { + return descriptor.operands[0..descriptor.operand_count]; + } + + fn getOpcode(descriptor: Descriptor) []const u8 { + return descriptor.opcode[0..descriptor.opcode_byte_count]; + } + + fn new(opcode_bytes: []const u8, operands: []const Operand) Descriptor { + // TODO: prefixes + var result = Descriptor{ + .operands = undefined, + .operand_count = @intCast(operands.len), + .operand_offset = opcode_bytes.len, + .size = opcode_bytes.len, + .opcode = undefined, + .opcode_byte_count = opcode_bytes.len, + }; + + for (opcode_bytes, result.opcode[0..opcode_bytes.len]) |opcode_byte, *out_opcode| { + out_opcode.* = opcode_byte; + } + + for (operands, result.operands[0..operands.len]) |operand, *out_operand| { + out_operand.* = operand; + result.size += operand.size; + } + + return result; + } + }; + + const Operand = struct { + type: Type, + size: u8, + + const Type = enum { + rel, + }; + }; }; -const RelocationManager = struct { - in_function_relocations: data_structures.AutoHashMap(ir.BasicBlock.Index, ArrayList(SimpleRelocation)) = .{}, +const rel8 = Instruction.Operand{ + .type = .rel, + .size = @sizeOf(u8), +}; + +const instruction_descriptors = blk: { + var result = std.EnumArray(Instruction, Instruction.Descriptor).initUndefined(); + result.getPtr(.jmp_rel_8).* = Instruction.Descriptor.new(&.{0xeb}, &[_]Instruction.Operand{rel8}); + break :blk result; +}; + +const InstructionSelector = struct { + functions: ArrayList(Function), + const Function = struct { + instructions: ArrayList(Instruction) = .{}, + block_byte_counts: ArrayList(u16), + block_offsets: ArrayList(u32), + byte_count: u32 = 0, + relocations: ArrayList(Relocation) = .{}, + block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, + const Relocation = struct { + instruction: Instruction, + source: u16, + destination: u16, + block_offset: u16, + }; + }; }; pub fn get(comptime arch: std.Target.Cpu.Arch) type { @@ -98,136 +177,104 @@ pub fn get(comptime arch: std.Target.Cpu.Arch) type { else => @compileError("Architecture not supported"), }; _ = backend; - const Function = struct { - block_byte_counts: ArrayList(u16), - byte_count: u32 = 0, - relocations: ArrayList(Relocation) = .{}, - block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, - const Relocation = struct { - source: ir.BasicBlock.Index, - destination: ir.BasicBlock.Index, - offset_offset: u8, - preferred_instruction_len: u8, - }; - }; - - const InstructionSelector = struct { - functions: ArrayList(Function), - }; - _ = InstructionSelector; return struct { pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { - _ = intermediate; - _ = allocator; - // var function_iterator = intermediate.functions.iterator(); - // var instruction_selector = InstructionSelector{ - // .functions = try ArrayList(Function).initCapacity(allocator, intermediate.functions.len), - // }; - // while (function_iterator.next()) |ir_function| { - // const function = instruction_selector.functions.addOneAssumeCapacity(); - // function.* = .{ - // .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), - // }; - // try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); - // for (ir_function.blocks.items, 0..) |block_index, index| { - // function.block_map.putAssumeCapacity(allocator, block_index, @intCast(index)); - // } - // - // for (ir_function.blocks.items) |block_index| { - // const block = intermediate.blocks.get(block_index); - // var block_byte_count: u16 = 0; - // for (block.instructions.items) |instruction_index| { - // const instruction = intermediate.instructions.get(instruction_index).*; - // switch (instruction) { - // .phi => unreachable, - // .ret => unreachable, - // .jump => { - // block_byte_count += 2; - // }, - // } - // } - // function.block_byte_counts.appendAssumeCapacity(block_byte_count); - // } - // } - // unreachable; + var result = try Result.create(); + var function_iterator = intermediate.functions.iterator(); + var instruction_selector = InstructionSelector{ + .functions = try ArrayList(InstructionSelector.Function).initCapacity(allocator, intermediate.functions.len), + }; + + while (function_iterator.next()) |ir_function| { + const function = instruction_selector.functions.addOneAssumeCapacity(); + function.* = .{ + .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), + .block_offsets = try ArrayList(u32).initCapacity(allocator, ir_function.blocks.items.len), + }; + try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); + for (ir_function.blocks.items, 0..) |block_index, index| { + function.block_map.putAssumeCapacity(block_index, @intCast(index)); + } + + for (ir_function.blocks.items) |block_index| { + const block = intermediate.blocks.get(block_index); + function.block_offsets.appendAssumeCapacity(function.byte_count); + var block_byte_count: u16 = 0; + for (block.instructions.items) |instruction_index| { + const instruction = intermediate.instructions.get(instruction_index).*; + switch (instruction) { + .phi => unreachable, + .ret => unreachable, + .jump => |jump_index| { + const jump = intermediate.jumps.get(jump_index); + const relocation = InstructionSelector.Function.Relocation{ + .instruction = .jmp_rel_8, + .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + .block_offset = block_byte_count, + }; + try function.relocations.append(allocator, relocation); + block_byte_count += instruction_descriptors.get(.jmp_rel_8).size; + try function.instructions.append(allocator, .jmp_rel_8); + }, + } + } + function.block_byte_counts.appendAssumeCapacity(block_byte_count); + function.byte_count += block_byte_count; + } + } + + for (instruction_selector.functions.items) |function| { + for (function.instructions.items) |instruction| switch (instruction) { + .jmp_rel_8 => result.appendOnlyOpcodeSkipInstructionBytes(instruction), + + // else => unreachable, + }; + } + + for (instruction_selector.functions.items) |function| { + var fix_size: bool = false; + _ = fix_size; + for (function.relocations.items) |relocation| { + std.debug.print("RELOC: {}\n", .{relocation}); + const source_block = relocation.source; + const destination_block = relocation.destination; + const source_offset = function.block_offsets.items[source_block]; + const destination_offset = function.block_offsets.items[destination_block]; + std.debug.print("Source offset: {}. Destination: {}\n", .{ source_offset, destination_offset }); + const instruction_descriptor = instruction_descriptors.get(relocation.instruction); + const instruction_offset = source_offset + relocation.block_offset; + const really_source_offset = instruction_offset + instruction_descriptor.size; + const displacement = @as(i64, destination_offset) - @as(i64, really_source_offset); + + const operands = instruction_descriptor.getOperands(); + switch (operands.len) { + 1 => switch (operands[0].size) { + @sizeOf(u8) => { + if (displacement >= std.math.minInt(i8) and displacement <= std.math.maxInt(i8)) { + const writer_index = instruction_offset + instruction_descriptor.operand_offset; + std.debug.print("Instruction offset: {}. Operand offset: {}. Writer index: {}. displacement: {}\n", .{ instruction_offset, instruction_descriptor.operand_offset, writer_index, displacement }); + result.sections.text.content[writer_index] = @bitCast(@as(i8, @intCast(displacement))); + } else { + unreachable; + } + }, + else => unreachable, + }, + else => unreachable, + } + } + } + + const text_section = result.sections.text.content[0..result.sections.text.index]; + for (text_section) |byte| { + std.debug.print("0x{x}\n", .{byte}); + } } }; } -pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { - _ = allocator; - var result = try Result.create(); - _ = result; - var relocation_manager = RelocationManager{}; - _ = relocation_manager; - - var function_iterator = intermediate.functions.iterator(); - _ = function_iterator; - // while (function_iterator.next()) |function| { - // defer relocation_manager.in_function_relocations.clearRetainingCapacity(); - // - // for (function.blocks.items) |block_index| { - // if (relocation_manager.in_function_relocations.getPtr(block_index)) |relocations| { - // const current_offset: i64 = @intCast(result.sections.text.index); - // _ = current_offset; - // for (relocations.items) |relocation| switch (relocation.size) { - // inline @sizeOf(u8), @sizeOf(u32) => |relocation_size| { - // const Elem = switch (relocation_size) { - // @sizeOf(u8) => u8, - // @sizeOf(u32) => u32, - // else => unreachable, - // }; - // const Ptr = *align(1) Elem; - // _ = Ptr; - // const relocation_slice = result.sections.text.content[relocation.source + relocation.write_offset ..][0..relocation_size]; - // _ = relocation_slice; - // // std.math.cast( - // // - // unreachable; - // }, - // else => unreachable, - // }; - // // const ptr: *align(1) u32 = @ptrCast(&result.sections.text[relocation_source..][0..@sizeOf(u32)]); - // // ptr.* = - // // try relocations.append(allocator, @intCast(result.sections.text[)); - // } - // - // const block = intermediate.blocks.get(block_index); - // - // for (block.instructions.items) |instruction_index| { - // const instruction = intermediate.instructions.get(instruction_index); - // switch (instruction.*) { - // .jump => |jump_index| { - // const jump = intermediate.jumps.get(jump_index); - // assert(@as(u32, @bitCast(jump.source)) == @as(u32, @bitCast(block_index))); - // const relocation_index = result.sections.text.index + 1; - // if (@as(u32, @bitCast(jump.destination)) <= @as(u32, @bitCast(jump.source))) { - // unreachable; - // } else { - // result.appendCode(&(.{jmp_rel_32} ++ .{0} ** @sizeOf(u32))); - // const lookup_result = try relocation_manager.in_function_relocations.getOrPut(allocator, jump.destination); - // if (!lookup_result.found_existing) { - // lookup_result.value_ptr.* = .{}; - // } - // - // try lookup_result.value_ptr.append(allocator, .{ - // .source = @intCast(relocation_index), - // .write_offset = @sizeOf(u8), - // .instruction_len = @sizeOf(u8) + @sizeOf(u32), - // .size = @sizeOf(u32), - // }); - // } - // }, - // else => |t| @panic(@tagName(t)), - // } - // } - // } - // unreachable; - // } - // unreachable; -} - const Rex = enum(u8) { b = upper_4_bits | (1 << 0), x = upper_4_bits | (1 << 1),