diff --git a/build.zig b/build.zig index c98b398..cd090e8 100644 --- a/build.zig +++ b/build.zig @@ -1,32 +1,16 @@ const std = @import("std"); -// Although this function looks imperative, note that its job is to -// declaratively construct a build graph that will be executed by an external -// runner. pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. const target = b.standardTargetOptions(.{}); - - // Standard optimization options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not - // set a preferred release mode, allowing the user to decide how to optimize. const optimize = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ .name = "compiler", - // In this case the main source file is merely a path, however, in more - // complicated build scripts, this could be a generated file. .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimize, }); - // This declares intent for the executable to be installed into the - // standard location when the user invokes the "install" step (the default - // step when running `zig build`). b.installArtifact(exe); b.installDirectory(.{ .source_dir = std.Build.LazyPath.relative("lib"), @@ -34,31 +18,21 @@ pub fn build(b: *std.Build) void { .install_subdir = "lib", }); - // This *creates* a Run step in the build graph, to be executed when another - // step is evaluated that depends on it. The next line below will establish - // such a dependency. const run_cmd = b.addRunArtifact(exe); - // By making the run step depend on the install step, it will be run from the - // installation directory rather than directly from within the cache directory. - // This is not necessary, however, if the application depends on other installed - // files, this ensures they will be present and in the expected location. run_cmd.step.dependOn(b.getInstallStep()); - // This allows the user to pass arguments to the application in the build - // command itself, like this: `zig build run -- arg1 arg2 etc` if (b.args) |args| { run_cmd.addArgs(args); } - // This creates a build step. It will be visible in the `zig build --help` menu, - // and can be selected like this: `zig build run` - // This will evaluate the `run` step rather than the default, which is "install". const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); - // Creates a step for unit testing. This only builds the test executable - // but does not run it. + const debug_command = addDebugCommand(b, exe); + const debug_step = b.step("debug", "Debug the app"); + debug_step.dependOn(&debug_command.step); + const unit_tests = b.addTest(.{ .root_source_file = .{ .path = "src/main.zig" }, .target = target, @@ -66,36 +40,39 @@ pub fn build(b: *std.Build) void { }); const run_unit_tests = b.addRunArtifact(unit_tests); + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); - const debug_unit_tests_cmd = switch (@import("builtin").os.tag) { + const debug_unit_tests_cmd = addDebugCommand(b, unit_tests); + const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); + debug_test_step.dependOn(&debug_unit_tests_cmd.step); +} + +fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run { + return switch (@import("builtin").os.tag) { .linux => blk: { const result = b.addSystemCommand(&.{"gf2"}); - result.addArtifactArg(unit_tests); - result.addArgs(&.{ "-ex", "r" }); + result.addArtifactArg(artifact); + + if (artifact.kind == .@"test") { + result.addArgs(&.{ "-ex", "r" }); + } + break :blk result; }, .windows => blk: { const result = b.addSystemCommand(&.{"remedybg"}); result.addArg("-g"); - result.addArtifactArg(unit_tests); + result.addArtifactArg(artifact); break :blk result; }, .macos => blk: { - // Broken, but it compiles + // not tested const result = b.addSystemCommand(&.{"gdb"}); - result.addArtifactArg(unit_tests); + result.addArtifactArg(artifact); break :blk result; }, else => @compileError("Operating system not supported"), }; - - const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); - debug_test_step.dependOn(&debug_unit_tests_cmd.step); - - // Similar to creating the run step earlier, this exposes a `test` step to - // the `zig build --help` menu, providing a way for the user to request - // running the unit tests. - const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_unit_tests.step); } diff --git a/lib/std/start.nat b/lib/std/start.nat index 8661ac4..199fd13 100644 --- a/lib/std/start.nat +++ b/lib/std/start.nat @@ -1 +1,7 @@ -const builtin = #import("builtin"); +comptime { + _ = _start; +} + +const _start = () noreturn { + while (true) {} +}; diff --git a/lib/std/std.nat b/lib/std/std.nat index 3205fe7..3ce1556 100644 --- a/lib/std/std.nat +++ b/lib/std/std.nat @@ -1,4 +1,5 @@ -const start = #import("start.nat"); comptime { _ = start; } + +const start = #import("start.nat"); diff --git a/src/Compilation.zig b/src/Compilation.zig index ed8421a..8c4f3e9 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2,18 +2,32 @@ const Compilation = @This(); const std = @import("std"); const assert = std.debug.assert; +const equal = std.mem.eql; const print = std.debug.print; const Allocator = std.mem.Allocator; const data_structures = @import("data_structures.zig"); const ArrayList = data_structures.ArrayList; +const AutoHashMap = data_structures.AutoHashMap; +const BlockList = data_structures.BlockList; +const HashMap = data_structures.HashMap; +const SegmentedList = data_structures.SegmentedList; const StringHashMap = data_structures.StringHashMap; const StringArrayHashMap = data_structures.StringArrayHashMap; const lexical_analyzer = @import("frontend/lexical_analyzer.zig"); const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); +const Node = syntactic_analyzer.Node; const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); +const intermediate_representation = @import("backend/intermediate_representation.zig"); + +test { + _ = lexical_analyzer; + _ = syntactic_analyzer; + _ = semantic_analyzer; + _ = data_structures; +} base_allocator: Allocator, cwd_absolute_path: []const u8, @@ -43,71 +57,210 @@ pub fn init(allocator: Allocator) !*Compilation { return compilation; } -pub fn deinit(compilation: *Compilation) void { - const allocator = compilation.base_allocator; - allocator.free(compilation.cwd_absolute_path); - allocator.free(compilation.executable_absolute_path); - allocator.destroy(compilation); -} +pub const Struct = struct { + scope: Scope.Index, + initialization: Value.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Type = union(enum) { + void, + noreturn, + bool, + integer: Integer, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Integer = struct { + bit_count: u16, + signedness: Signedness, + pub const Signedness = enum(u1) { + unsigned = 0, + signed = 1, + }; +}; + +/// A scope contains a bunch of declarations +pub const Scope = struct { + parent: Scope.Index, + type: Type.Index, + declarations: AutoHashMap(u32, Declaration.Index) = .{}, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Declaration = union(enum) { + unresolved: Node.Index, + struct_type: Struct, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Function = struct { + body: Block.Index, + prototype: Prototype.Index, + + pub const Prototype = struct { + arguments: ?[]const Field.Index, + return_type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = Prototype.List.Index; + }; + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Block = struct { + foo: u32 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Field = struct { + foo: u32 = 0, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Loop = struct { + foo: u32 = 0, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Value = struct { + type: union(enum) { + declaration: Declaration.Index, + bool_true, + bool_false, + loop: Loop.Index, + function: Function.Index, + }, + is_const: bool, + is_comptime: bool, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; pub const Module = struct { main_package: *Package, import_table: StringArrayHashMap(*File) = .{}, + string_table: AutoHashMap(u32, []const u8) = .{}, + declarations: BlockList(Declaration) = .{}, + structs: BlockList(Struct) = .{}, + scopes: BlockList(Scope) = .{}, + files: BlockList(File) = .{}, + values: BlockList(Value) = .{}, + functions: BlockList(Function) = .{}, + fields: BlockList(Field) = .{}, + function_prototypes: BlockList(Function.Prototype) = .{}, + types: BlockList(Type) = .{}, + blocks: BlockList(Block) = .{}, + loops: BlockList(Loop) = .{}, pub const Descriptor = struct { main_package_path: []const u8, }; - fn deinit(module: *Module, allocator: Allocator) void { - defer allocator.destroy(module); + const ImportFileResult = struct { + file: *File, + is_new: bool, + }; - for (module.import_table.values()) |file| { - file.deinit(allocator); + const ImportPackageResult = struct { + file: *File, + is_new: bool, + is_package: bool, + }; + + pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult { + if (equal(u8, import_name, "std")) { + return module.importPackage(allocator, module.main_package.dependencies.get("std").?); } - var iterator = module.main_package.dependencies.valueIterator(); - while (iterator.next()) |it| { - const package = it.*; - package.deinit(allocator); + if (equal(u8, import_name, "builtin")) { + return module.importPackage(allocator, module.main_package.dependencies.get("builtin").?); } - module.main_package.deinit(allocator); + if (equal(u8, import_name, "main")) { + return module.importPackage(allocator, module.main_package); + } - module.import_table.clearAndFree(allocator); - } + if (current_file.package.dependencies.get(import_name)) |package| { + return module.importPackage(allocator, package); + } - fn importPackage(module: *Module, compilation: *Compilation, package: *Package) !ImportPackageResult { - const lookup_result = try module.import_table.getOrPut(compilation.base_allocator, package.directory.path); - errdefer _ = module.import_table.pop(); - if (lookup_result.found_existing) { - const file: *File = lookup_result.value_ptr.*; - try file.addPackageReference(compilation.base_allocator, package); + if (!std.mem.endsWith(u8, import_name, ".nat")) { unreachable; } - const file = try compilation.base_allocator.create(File); - lookup_result.value_ptr.* = file; - file.* = File{ - .relative_path = package.source_path, - .package = package, + + const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); + const file_relative_path = std.fs.path.basename(full_path); + const package = current_file.package; + const import = try module.getFile(allocator, full_path, file_relative_path, package); + + try import.file.addFileReference(allocator, current_file); + + const result = ImportPackageResult{ + .file = import.file, + .is_new = import.is_new, + .is_package = false, + }; + + return result; + } + + fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { + const path_lookup = try module.import_table.getOrPut(allocator, full_path); + const file: *File = switch (path_lookup.found_existing) { + true => path_lookup.value_ptr.*, + false => blk: { + const new_file_index = try module.files.append(allocator, File{ + .relative_path = relative_path, + .package = package, + }); + const file = module.files.get(new_file_index); + path_lookup.value_ptr.* = file; + break :blk file; + }, }; - try file.addPackageReference(compilation.base_allocator, package); return .{ .file = file, - .is_new = true, + .is_new = !path_lookup.found_existing, }; } - fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { + pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { + const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); + const import = try module.getFile(allocator, full_path, package.source_path, package); + try import.file.addPackageReference(allocator, package); + + return .{ + .file = import.file, + .is_new = import.is_new, + .is_package = true, + }; + } + + pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { _ = module; const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); - defer source_file.close(); const file_size = try source_file.getEndPos(); var file_buffer = try allocator.alloc(u8, file_size); const read_byte_count = try source_file.readAll(file_buffer); assert(read_byte_count == file_size); + source_file.close(); //TODO: adjust file maximum size file.source_code = file_buffer[0..read_byte_count]; @@ -140,7 +293,6 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! builtin_file.close(); const module: *Module = try compilation.base_allocator.create(Module); - defer module.deinit(compilation.base_allocator); module.* = Module{ .main_package = blk: { const result = try compilation.base_allocator.create(Package); @@ -157,16 +309,14 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }; const std_package_dir = "lib/std"; + const package_descriptors = [2]struct { name: []const u8, directory_path: []const u8, }{ .{ .name = "std", - .directory_path = try switch (@import("builtin").is_test) { - true => compilation.pathFromCwd(std_package_dir), - false => compilation.pathFromCompiler(std_package_dir), - }, + .directory_path = try compilation.pathFromCwd(std_package_dir), }, .{ .name = "builtin", @@ -178,7 +328,8 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }, }; - for (package_descriptors) |package_descriptor| { + var packages: [package_descriptors.len]*Package = undefined; + for (package_descriptors, &packages) |package_descriptor, *package_ptr| { const package = try compilation.base_allocator.create(Package); package.* = .{ .directory = .{ @@ -189,21 +340,22 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) ! }; try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package); + + package_ptr.* = package; } assert(module.main_package.dependencies.size == 2); - _ = try module.importPackage(compilation, module.main_package.dependencies.get("std").?); + _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); for (module.import_table.values()) |import| { try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); } -} -const ImportPackageResult = struct { - file: *File, - is_new: bool, -}; + const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]); + + try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); +} fn generateAST() !void {} @@ -222,17 +374,6 @@ pub const Package = struct { try package.dependencies.ensureUnusedCapacity(allocator, 1); package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency); } - - fn deinit(package: *Package, allocator: Allocator) void { - if (package.dependencies.size > 0) { - assert(package.dependencies.size == 2); - } - package.dependencies.clearAndFree(allocator); - allocator.free(package.source_path); - allocator.free(package.directory.path); - package.directory.handle.close(); - allocator.destroy(package); - } }; pub const File = struct { @@ -241,6 +382,7 @@ pub const File = struct { lexical_analyzer_result: lexical_analyzer.Result = undefined, syntactic_analyzer_result: syntactic_analyzer.Result = undefined, package_references: ArrayList(*Package) = .{}, + file_references: ArrayList(*File) = .{}, relative_path: []const u8, package: *Package, @@ -259,6 +401,10 @@ pub const File = struct { try file.package_references.insert(allocator, 0, package); } + fn addFileReference(file: *File, allocator: Allocator, affected: *File) !void { + try file.file_references.append(allocator, affected); + } + pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); _ = file_content; @@ -271,30 +417,18 @@ pub const File = struct { fn lex(file: *File, allocator: Allocator) !void { assert(file.status == .loaded_into_memory); file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); - if (!@import("builtin").is_test) { - print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); - } + // if (!@import("builtin").is_test) { + // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); + // } file.status = .lexed; } fn parse(file: *File, allocator: Allocator) !void { assert(file.status == .lexed); file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); - if (!@import("builtin").is_test) { - print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); - } + // if (!@import("builtin").is_test) { + // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); + // } file.status = .parsed; } - - fn deinit(file: *File, allocator: Allocator) void { - defer allocator.destroy(file); - if (file.status == .parsed) { - file.syntactic_analyzer_result.free(allocator); - file.lexical_analyzer_result.free(allocator); - file.package_references.clearAndFree(allocator); - allocator.free(file.source_code); - } else { - unreachable; - } - } }; diff --git a/src/backend/emit.zig b/src/backend/emit.zig index b5d64e3..17b708d 100644 --- a/src/backend/emit.zig +++ b/src/backend/emit.zig @@ -31,17 +31,6 @@ const Result = struct { }; } - fn destroy(image: *Result) void { - inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { - const section_bytes = @field(image.sections, field_name).content; - switch (@import("builtin").os.tag) { - .linux => std.os.munmap(section_bytes), - .windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE), - else => @compileError("OS not supported"), - } - } - } - fn mmap(size: usize, flags: packed struct { executable: bool, }) ![]align(page_size) u8 { @@ -79,16 +68,6 @@ const Result = struct { assert(image.sections.text.content.len > 0); return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); } - - pub fn free(result: *Result, allocator: Allocator) void { - _ = allocator; - inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| { - switch (@import("builtin").os.tag) { - .windows => unreachable, - else => std.os.munmap(@field(result.sections, field_name).content), - } - } - } }; const Rex = enum(u8) { @@ -160,9 +139,7 @@ fn movAImm(image: *Result, integer: anytype) void { } test "ret void" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); image.appendCodeByte(ret); const function_pointer = image.getEntryPoint(fn () callconv(.C) void); @@ -185,7 +162,6 @@ fn getMaxInteger(comptime T: type) T { test "ret integer" { inline for (integer_types_to_test) |Int| { var image = try Result.create(); - defer image.free(std.testing.allocator); const expected_number = getMaxInteger(Int); movAImm(&image, expected_number); @@ -234,9 +210,7 @@ fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRe test "ret integer argument" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const number = getMaxInteger(Int); movRmR(&image, Int, .a, .di); @@ -264,9 +238,7 @@ fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRe test "ret sub arguments" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); @@ -348,10 +320,8 @@ fn TestIntegerBinaryOperation(comptime T: type) type { opcode: OpcodeRmR, pub fn runTest(test_case: @This()) !void { - const allocator = std.testing.allocator; for (0..10) |_| { var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); movRmR(&image, T, .a, .di); @@ -371,9 +341,7 @@ fn TestIntegerBinaryOperation(comptime T: type) type { } test "call after" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const jump_patch_offset = image.sections.text.index + 1; image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); const jump_source = image.sections.text.index; @@ -387,9 +355,7 @@ test "call after" { } test "call before" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const first_jump_patch_offset = image.sections.text.index + 1; const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; image.appendCode(&first_call); diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig new file mode 100644 index 0000000..15ed936 --- /dev/null +++ b/src/backend/intermediate_representation.zig @@ -0,0 +1,9 @@ +const Compilation = @import("../Compilation.zig"); +const Module = Compilation.Module; +const Package = Compilation.Package; +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_declaration: Compilation.Declaration.Index) !void { + _ = main_declaration; + _ = package; + _ = module; + _ = compilation; +} diff --git a/src/backend/ir.zig b/src/backend/ir.zig deleted file mode 100644 index 20b0eba..0000000 --- a/src/backend/ir.zig +++ /dev/null @@ -1,143 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const parser = @import("parser.zig"); - -const void_type = Type{ - .id = .void, -}; - -const Type = struct { - id: Id, - - fn isPrimitive(T: Type) bool { - return switch (T.id) { - .void => true, - }; - } - const Id = enum { - void, - }; -}; - -const Error = error{ - type_mismatch, - internal, - arguments_not_used, -}; - -const TopLevelDeclaration = struct { - type: Id, - index: u31, - - const Id = enum { - function, - expression, - }; -}; - -const Instruction = struct { - id: Id, - index: u16, - - const Id = enum { - ret_void, - }; -}; - -const ret_void = Instruction{ - .id = .ret_void, - .index = 0, -}; - -const ret = struct { - is_type: bool, -}; - -const Function = struct { - instructions: ArrayList(Instruction), - return_type: Type, -}; - -pub const Result = struct { - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - instructions: struct {} = .{}, - - pub fn free(result: *Result, allocator: Allocator) void { - for (result.functions.items) |*function| { - function.instructions.clearAndFree(allocator); - } - result.functions.clearAndFree(allocator); - result.top_level_declarations.clearAndFree(allocator); - } -}; - -const Analyzer = struct { - parser: *const parser.Result, - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - allocator: Allocator, - - fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result { - var analyzer = Analyzer{ - .parser = parser_result, - .top_level_declarations = ArrayList(TopLevelDeclaration){}, - .allocator = allocator, - .functions = ArrayList(Function){}, - }; - - for (parser_result.functions.items) |ast_function| { - if (ast_function.statements.items.len != 0) { - for (ast_function.statements.items) |statement| { - _ = statement; - @panic("TODO: statement"); - } - } else { - if (ast_function.arguments.items.len != 0) { - return Error.arguments_not_used; - } - - try analyzer.expectPrimitiveType(void_type, ast_function.return_type); - - const function_index = analyzer.functions.items.len; - - var function = Function{ - .instructions = ArrayList(Instruction){}, - .return_type = void_type, - }; - - function.instructions.append(allocator, ret_void) catch return Error.internal; - - analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{ - .type = .function, - .index = @intCast(function_index), - }) catch return Error.internal; - - analyzer.functions.append(allocator, function) catch return Error.internal; - } - } - - return .{ - .top_level_declarations = analyzer.top_level_declarations, - .functions = analyzer.functions, - }; - } - - fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void { - assert(type_value.isPrimitive()); - const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal; - - if (!equal(u8, @tagName(type_value.id), type_identifier)) { - return Error.type_mismatch; - } - } -}; - -pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result { - return Analyzer.analyze(allocator, parser_result); -} diff --git a/src/data_structures.zig b/src/data_structures.zig index f6a4bb1..6edf4d2 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,7 +1,110 @@ const std = @import("std"); +const assert = std.debug.assert; pub const Allocator = std.mem.Allocator; pub const ArrayList = std.ArrayListUnmanaged; -pub const HashMap = std.AutoHashMapUnmanaged; +pub const AutoHashMap = std.AutoHashMapUnmanaged; +pub const HashMap = std.HashMapUnmanaged; +pub const SegmentedList = std.SegmentedList; pub const StringHashMap = std.StringHashMapUnmanaged; pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged; + +pub fn BlockList(comptime T: type) type { + const item_count = 64; + const Block = struct { + items: [item_count]T = undefined, + bitset: Bitset = Bitset.initEmpty(), + + const Bitset = std.StaticBitSet(item_count); + + fn allocateIndex(block: *@This()) !u6 { + if (block.bitset.mask != std.math.maxInt(@TypeOf(block.bitset.mask))) { + const index = @ctz(~block.bitset.mask); + block.bitset.set(index); + return @intCast(index); + } else { + return error.OutOfMemory; + } + } + }; + + return struct { + blocks: ArrayList(Block) = .{}, + len: usize = 0, + first_block: u32 = 0, + + const List = @This(); + + pub const Index = packed struct(u32) { + valid: bool = true, + index: u6, + block: u25, + + pub const invalid = Index{ + .valid = false, + .index = 0, + .block = 0, + }; + }; + + pub fn get(list: *List, index: Index) *T { + assert(index.valid); + return &list.blocks.items[index.block].items[index.index]; + } + + pub fn append(list: *List, allocator: Allocator, element: T) !Index { + try list.ensureCapacity(allocator, list.len + 1); + const max_allocation = list.blocks.items.len * item_count; + if (list.len < max_allocation) { + // Follow the guess + if (list.blocks.items[list.first_block].allocateIndex()) |index| { + list.blocks.items[list.first_block].items[index] = element; + return .{ + .index = index, + .block = @intCast(list.first_block), + }; + } else |_| { + @panic("TODO"); + } + } else { + const block_index = list.blocks.items.len; + const new_block = list.blocks.addOneAssumeCapacity(); + const index = new_block.allocateIndex() catch unreachable; + new_block.items[index] = element; + return .{ + .index = index, + .block = @intCast(block_index), + }; + } + } + + pub fn ensureCapacity(list: *List, allocator: Allocator, new_capacity: usize) !void { + const max_allocation = list.blocks.items.len * item_count; + if (max_allocation < new_capacity) { + const block_count = new_capacity / item_count + @intFromBool(new_capacity % item_count != 0); + try list.blocks.ensureTotalCapacity(allocator, block_count); + } + } + + test "Bitset index allocation" { + const expect = std.testing.expect; + var block = Block{}; + for (0..item_count) |expected_index| { + const new_index = try block.allocateIndex(); + try expect(new_index == expected_index); + } + + _ = block.allocateIndex() catch return; + + return error.TestUnexpectedResult; + } + }; +} + +pub fn enumFromString(comptime E: type, string: []const u8) ?E { + return inline for (@typeInfo(E).Enum.fields) |enum_field| { + if (std.mem.eql(u8, string, enum_field.name)) { + break @field(E, enum_field.name); + } + } else null; +} diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index e485a5c..a5f26ce 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -7,6 +7,7 @@ const equal = std.mem.eql; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; const Compilation = @import("../Compilation.zig"); const fs = @import("../fs.zig"); @@ -17,29 +18,71 @@ pub const Token = packed struct(u64) { id: Id, pub const Id = enum(u8) { - identifier = 0, - number = 1, - string_literal = 2, - left_parenthesis = '(', - right_parenthesis = ')', - left_brace = '{', - right_brace = '}', - equal = '=', - colon = ':', - semicolon = ';', - hash = '#', - comma = ',', - bang = '!', + eof = 0x00, + identifier = 0x01, + number = 0x02, + string_literal = 0x03, + fixed_keyword_function = 0x04, + fixed_keyword_const = 0x05, + fixed_keyword_var = 0x06, + fixed_keyword_void = 0x07, + fixed_keyword_noreturn = 0x08, + fixed_keyword_comptime = 0x09, + fixed_keyword_while = 0x0a, + fixed_keyword_bool = 0x0b, + fixed_keyword_true = 0x0c, + fixed_keyword_false = 0x0d, + bang = '!', // 0x21 + hash = '#', // 0x23 + dollar_sign = '$', // 0x24 + modulus = '%', // 0x25 + ampersand = '&', // 0x26 + left_parenthesis = '(', // 0x28 + right_parenthesis = ')', // 0x29 + asterisk = '*', // 0x2a + plus = '+', // 0x2b + comma = ',', // 0x2c + minus = '-', // 0x2d + period = '.', // 0x2e + slash = '/', // 0x2f + colon = ':', // 0x3a + semicolon = ';', // 0x3b + less = '<', // 0x3c + equal = '=', // 0x3d + greater = '>', // 0x3e + question_mark = '?', // 0x3f + at = '@', // 0x40 + left_bracket = '[', // 0x5b + backlash = '\\', // 0x5c + right_bracket = ']', // 0x5d + caret = '^', // 0x5e + underscore = '_', // 0x5f + grave = '`', // 0x60 + left_brace = '{', // 0x7b + vertical_bar = '|', // 0x7c + right_brace = '}', // 0x7d + tilde = '~', // 0x7e }; + + pub const Index = u32; +}; + +pub const FixedKeyword = enum { + @"comptime", + @"const", + @"var", + void, + noreturn, + function, + @"while", + bool, + true, + false, }; pub const Result = struct { tokens: ArrayList(Token), time: u64, - - pub fn free(result: *Result, allocator: Allocator) void { - result.tokens.clearAndFree(allocator); - } }; pub fn analyze(allocator: Allocator, text: []const u8) !Result { @@ -62,7 +105,23 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { break; } - break :blk .identifier; + const identifier = text[start_index..][0 .. index - start_index]; + std.debug.print("Identifier: {s}\n", .{identifier}); + + if (start_character == 'u' or start_character == 's') { + var index_integer = start_index + 1; + while (text[index_integer] >= '0' and text[index_integer] <= '9') { + index_integer += 1; + } + + if (index_integer == index) { + unreachable; + } + } + + break :blk if (enumFromString(FixedKeyword, text[start_index..][0 .. index - start_index])) |fixed_keyword| switch (fixed_keyword) { + inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), + } else .identifier; }, '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { index += 1; @@ -75,9 +134,17 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { break :blk .number; }, + '\'' => { + unreachable; + }, '"' => blk: { index += 1; - while (text[index] != '"') { + + while (true) { + if (text[index] == '"' and text[index - 1] != '"') { + break; + } + index += 1; } @@ -103,7 +170,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result { }); } - const should_log = false; + const should_log = true; if (should_log) { for (tokens.items, 0..) |token, i| { std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index e69de29..761054b 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -0,0 +1,668 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; +const Module = Compilation.Module; +const Package = Compilation.Package; + +const Block = Compilation.Block; +const Declaration = Compilation.Declaration; +const Field = Compilation.Field; +const Function = Compilation.Function; +const Scope = Compilation.Scope; +const Struct = Compilation.Struct; +const Type = Compilation.Type; +const Value = Compilation.Value; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +const syntactic_analyzer = @import("syntactic_analyzer.zig"); +const ContainerDeclaration = syntactic_analyzer.ContainerDeclaration; +const Node = syntactic_analyzer.Node; +const SymbolDeclaration = syntactic_analyzer.SymbolDeclaration; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.AutoHashMap; + +const print = std.debug.print; + +const Analyzer = struct { + source_code: []const u8, + nodes: []const Node, + tokens: []const Token, + file: *File, + allocator: Allocator, + module: *Module, + + fn lazyGlobalDeclaration(analyzer: *Analyzer, node_index: Node.Index) void { + print("Global: {}", .{analyzer.nodes[node_index.unwrap()]}); + } + + fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { + const comptime_node = analyzer.nodes[node_index.unwrap()]; + + const comptime_block_node = analyzer.nodes[comptime_node.left.unwrap()]; + var statement_node_indices = ArrayList(Node.Index){}; + switch (comptime_block_node.id) { + .block_one => { + try statement_node_indices.append(analyzer.allocator, comptime_block_node.left); + }, + else => |t| @panic(@tagName(t)), + } + + var statement_values = ArrayList(Value.Index){}; + + for (statement_node_indices.items) |statement_node_index| { + const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + switch (statement_node.id) { + .assign => { + const assign_expression = try analyzer.assign(scope, statement_node_index); + try statement_values.append(analyzer.allocator, assign_expression); + }, + else => |t| @panic(@tagName(t)), + } + } + + // TODO + + return Value.Index.invalid; + } + + fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + + print("\nAssign. Left: {}. Right: {}\n", .{ node.left, node.right }); + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + if (node.left.valid) { + @panic("Not discard"); + } else { + return try analyzer.expression(scope, ExpectType{ .none = {} }, node.right); + } + } + + fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Block.Index { + const block_node = analyzer.nodes[node_index.unwrap()]; + var statements = ArrayList(Node.Index){}; + switch (block_node.id) { + .block_one => { + try statements.append(analyzer.allocator, block_node.left); + }, + .block_zero => {}, + else => |t| @panic(@tagName(t)), + } + + for (statements.items) |statement_node_index| { + _ = try analyzer.expression(scope, expect_type, statement_node_index); + // const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + // + // switch (statement_node.id) { + // try .simple_while => { + // const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); + // _ = while_condition; + // const while_block = try analyzer.block(scope, expect_type, statement_node.right); + // _ = while_block; + // unreachable; + // }, + // else => |t| @panic(@tagName(t)), + // } + } + + return try analyzer.module.blocks.append(analyzer.allocator, .{}); + } + + fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) error{OutOfMemory}!Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .identifier => blk: { + const identifier_hash = try analyzer.identifierFromToken(node.token); + // TODO: search in upper scopes too + const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); + if (identifier_scope_lookup.found_existing) { + const declaration_index = identifier_scope_lookup.value_ptr.*; + const declaration = analyzer.module.declarations.get(declaration_index); + break :blk try analyzer.analyzeDeclaration(scope, declaration); + } else { + @panic("TODO: not found"); + } + }, + .compiler_intrinsic_one => blk: { + const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[node.left.unwrap()]; + switch (import_argument.id) { + .string_literal => { + const import_name = analyzer.tokenStringLiteral(import_argument.token); + const imported_file = try analyzer.module.importFile(analyzer.allocator, analyzer.file, import_name); + + if (imported_file.is_new) { + // TODO: fix error + analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file) catch return error.OutOfMemory; + } else { + unreachable; + } + + const file_struct_declaration_index = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file); + break :blk try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .declaration = file_struct_declaration_index, + }, + .is_const = true, + .is_comptime = true, + }); + }, + else => unreachable, + } + }, + } + unreachable; + }, + .function_definition => blk: { + const function_prototype_index = try analyzer.functionPrototype(node.left); + + const function_body = try analyzer.block(scope, .{ + .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), + }, node.right); + + const function_index = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = function_prototype_index, + .body = function_body, + }); + const value_index = try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .function = function_index, + }, + .is_const = true, + .is_comptime = true, + }); + break :blk value_index; + }, + .keyword_true => blk: { + switch (expect_type) { + .none => {}, + .type_index => |expected_type| { + if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) { + @panic("TODO: compile error"); + } + }, + } + + break :blk bool_true; + }, + .simple_while => blk: { + const while_condition = try analyzer.expression(scope, ExpectType.boolean, node.left); + _ = while_condition; + const while_body = try analyzer.block(scope, expect_type, node.right); + _ = while_body; + const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{}); + const value_index = try analyzer.module.values.append(analyzer.allocator, .{ + .type = .{ + .loop = loop_index, + }, + // TODO: + .is_const = false, + .is_comptime = false, + }); + break :blk value_index; + }, + else => |t| @panic(@tagName(t)), + }; + } + + fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index { + const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); + return function_prototype.return_type; + } + + fn functionPrototype(analyzer: *Analyzer, node_index: Node.Index) !Function.Prototype.Index { + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .simple_function_prototype => { + const arguments: ?[]const Field.Index = blk: { + const argument_node = analyzer.nodes[node.left.get() orelse break :blk null]; + switch (argument_node.id) { + else => |t| @panic(@tagName(t)), + } + }; + const return_type_node = analyzer.nodes[node.right.unwrap()]; + const return_type: Type.Index = switch (return_type_node.id) { + .identifier => { + unreachable; + }, + .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, + else => |t| @panic(@tagName(t)), + }; + + return try analyzer.module.function_prototypes.append(analyzer.allocator, .{ + .arguments = arguments, + .return_type = return_type, + }); + }, + else => |t| @panic(@tagName(t)), + } + } + + fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index { + switch (declaration.*) { + .unresolved => |node_index| { + const declaration_node = analyzer.nodes[node_index.unwrap()]; + return switch (declaration_node.id) { + .simple_variable_declaration => blk: { + const expect_type = switch (declaration_node.left.valid) { + true => unreachable, + false => @unionInit(ExpectType, "none", {}), + }; + + const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); + const value = analyzer.module.values.get(initialization_expression); + if (value.is_comptime and value.is_const) { + break :blk initialization_expression; + } + + unreachable; + }, + else => |t| @panic(@tagName(t)), + }; + }, + .struct_type => unreachable, + } + + @panic("TODO: analyzeDeclaration"); + } + + fn containerMember(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !void { + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .simple_variable_declaration => {}, + .@"comptime" => { + _ = try analyzer.comptimeBlock(scope, node_index); + }, + else => std.debug.panic("Tag: {}", .{node.id}), + } + } + + fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void { + if (symbol_declaration.type_node.get()) |type_node_index| { + _ = type_node_index; + @panic("TODO: type node"); + } + const initialization_node = analyzer.nodes[symbol_declaration.initialization_node.unwrap()]; + switch (initialization_node.id) { + .compiler_intrinsic_one => { + const intrinsic_name = analyzer.tokenIdentifier(initialization_node.token + 1); + const intrinsic = inline for (@typeInfo(Intrinsic).Enum.fields) |intrinsic_enum_field| { + if (equal(u8, intrinsic_name, intrinsic_enum_field.name)) { + break @field(Intrinsic, intrinsic_enum_field.name); + } + } else unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[initialization_node.left.get()]; + switch (import_argument.id) { + .string_literal => unreachable, + else => unreachable, + } + }, + } + // const intrinsic_node_index = initialization_node.left.unwrap(); + // const intrinsic_node = analyzer.nodes[intrinsic_node_index]; + // + // switch (intrinsic_node.id) { + // .string_literal => + // } + // print("intrinsic: {}", .{intrinsic_node.id}); + + // _ = a; + }, + else => unreachable, + } + print("Init node: {}\n", .{initialization_node}); + @panic("TODO"); + } + + fn symbolDeclaration(analyzer: *Analyzer, node_index: Node.Index) SymbolDeclaration { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .simple_variable_declaration => .{ + .type_node = node.left, + .initialization_node = node.right, + .mutability_token = node.token, + }, + else => unreachable, + }; + } + + fn structDeclaration(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Declaration.Index { + _ = index; + const new_scope = try analyzer.allocateScope(parent_scope, Type.Index.invalid); + const scope = new_scope.ptr; + + const is_file = !parent_scope.valid; + assert(is_file); + // TODO: do it properly + const declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ + .struct_type = .{ + .scope = new_scope.index, + .initialization = if (is_file) Value.Index.invalid else unreachable, + }, + }); + // TODO: + assert(container_declaration.members.len > 0); + + const count = blk: { + var result: struct { + fields: u32 = 0, + declarations: u32 = 0, + } = .{}; + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + + switch (member_type) { + .declaration => result.declarations += 1, + .field => result.fields += 1, + } + } + break :blk result; + }; + + var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations); + var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); + + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + const array_list = switch (member_type) { + .declaration => &declaration_nodes, + .field => &field_nodes, + }; + array_list.appendAssumeCapacity(member_index); + } + + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime" => {}, + .simple_variable_declaration => { + const expected_identifier_token_index = declaration_node.token + 1; + const expected_identifier_token = analyzer.tokens[expected_identifier_token_index]; + if (expected_identifier_token.id != .identifier) { + print("Error: found: {}", .{expected_identifier_token.id}); + @panic("Expected identifier"); + } + // TODO: Check if it is a keyword + + const identifier_index = try analyzer.identifierFromToken(expected_identifier_token_index); + + const declaration_name = analyzer.tokenIdentifier(expected_identifier_token_index); + // Check if the symbol name is already occupied in the same scope + const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); + if (scope_lookup.found_existing) { + std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); + } + + // Check if the symbol name is already occupied in parent scopes + var upper_scope_index = scope.parent; + + while (upper_scope_index.valid) { + @panic("TODO: upper scope"); + } + + const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ + .unresolved = declaration_node_index, + }); + + scope_lookup.value_ptr.* = container_declaration_index; + }, + else => unreachable, + } + } + + // TODO: consider iterating over scope declarations instead? + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime", .simple_variable_declaration => try analyzer.containerMember(scope, declaration_node_index), + else => unreachable, + } + } + + for (field_nodes.items) |field_index| { + const field_node = analyzer.nodes[field_index.unwrap()]; + _ = field_node; + + @panic("TODO: fields"); + } + + return declaration_index; + } + + const MemberType = enum { + declaration, + field, + }; + + fn getContainerMemberType(member_id: Node.Id) MemberType { + return switch (member_id) { + .@"comptime" => .declaration, + .simple_variable_declaration => .declaration, + else => unreachable, + }; + } + + fn identifierFromToken(analyzer: *Analyzer, token_index: Token.Index) !u32 { + const identifier = analyzer.tokenIdentifier(token_index); + const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); + + const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); + + if (lookup_result.found_existing) { + return lookup_result.key_ptr.*; + } else { + return key; + } + } + + fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .identifier); + const identifier = analyzer.source_code[token.start..][0..token.len]; + + return identifier; + } + + fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .string_literal); + // Eat double quotes + const start = token.start + 1; + const len = token.len - 2; + const string_literal = analyzer.source_code[start..][0..len]; + + return string_literal; + } + + const ScopeAllocation = struct { + ptr: *Scope, + index: Scope.Index, + }; + + fn allocateScope(analyzer: *Analyzer, parent_scope: Scope.Index, scope_type: Type.Index) !ScopeAllocation { + const scope_index = try analyzer.module.scopes.append(analyzer.allocator, .{ + .parent = parent_scope, + .type = scope_type, + }); + const scope = analyzer.module.scopes.get(scope_index); + + return .{ + .ptr = scope, + .index = scope_index, + }; + } +}; + +const ExpectType = union(enum) { + none, + type_index: Type.Index, + + pub const boolean = ExpectType{ + .type_index = type_boolean, + }; +}; + +const type_boolean = Type.Index{ + .block = 0, + .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), +}; + +const bool_false = Value.Index{ + .block = 0, + .index = 1, +}; + +const bool_true = Value.Index{ + .block = 0, + .index = 1, +}; + +const Intrinsic = enum { + import, +}; + +const FixedTypeKeyword = enum { + void, + noreturn, + bool, + + const offset = 0; +}; + +const HardwareUnsignedIntegerType = enum { + u8, + u16, + u32, + u64, + + const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; +}; + +const HardwareSignedIntegerType = enum { + s8, + s16, + s32, + s64, + + const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; +}; + +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { + inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); + } + + inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .unsigned, + .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { + .u8 => 8, + .u16 => 16, + .u32 => 32, + .u64 => 64, + }, + }, + }); + } + + inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .signed, + .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { + .s8 => 8, + .s16 => 16, + .s32 => 32, + .s64 => 64, + }, + }, + }); + } + + _ = try module.values.append(compilation.base_allocator, .{ + .type = .{ + .bool_false = {}, + }, + .is_const = true, + .is_comptime = true, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .type = .{ + .bool_true = {}, + }, + .is_const = true, + .is_comptime = true, + }); + + return analyzeExistingPackage(compilation, module, package); +} + +pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index { + const package_import = try module.importPackage(compilation.base_allocator, package); + assert(!package_import.is_new); + const package_file = package_import.file; + + return try analyzeFile(compilation.base_allocator, module, package_file); +} + +pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Declaration.Index { + assert(file.status == .parsed); + + var analyzer = Analyzer{ + .source_code = file.source_code, + .nodes = file.syntactic_analyzer_result.nodes.items, + .tokens = file.lexical_analyzer_result.tokens.items, + .file = file, + .allocator = allocator, + .module = module, + }; + + const result = try analyzer.structDeclaration(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); + return result; +} + +fn mainNodeToContainerDeclaration(allocator: Allocator, file: *File) !ContainerDeclaration { + const main_node = getNode(file, 0); + var list_buffer: [2]Node.Index = undefined; + const left_node = getNode(file, main_node.left.value); + const node_list: []const Node.Index = blk: { + if (left_node.id != .node_list) { + const len = @as(u2, @intFromBool(main_node.left.valid)) + @as(u2, @intFromBool(main_node.right.valid)) - @as(u2, @intFromBool(main_node.left.valid and main_node.right.valid and main_node.left.value == main_node.right.value)); + assert(len > 0); + list_buffer[0] = main_node.left; + list_buffer[1] = main_node.right; + break :blk list_buffer[0..len]; + } else { + @panic("TODO: get list"); + } + }; + + const owned_node_list = try allocator.alloc(Node.Index, node_list.len); + @memcpy(owned_node_list, node_list); + + // Deal properly with this allocation + return .{ + .members = owned_node_list, + }; +} + +fn getNode(file: *const File, index: u32) *Node { + return &file.syntactic_analyzer_result.nodes.items[index]; +} diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 54ae1ac..8bfbe81 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -6,6 +6,7 @@ const log = std.log; const data_structures = @import("../data_structures.zig"); const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; const HashMap = data_structures.HashMap; const lexical_analyzer = @import("lexical_analyzer.zig"); @@ -14,26 +15,40 @@ const Token = lexical_analyzer.Token; pub const Result = struct { nodes: ArrayList(Node), time: u64, - - pub fn free(result: *Result, allocator: Allocator) void { - result.nodes.clearAndFree(allocator); - } }; -pub const Node = packed struct(u96) { +// TODO: pack it to be more efficient +pub const Node = packed struct(u128) { token: u32, id: Id, left: Node.Index, right: Node.Index, - pub const Index = u27; + pub const Index = packed struct(u32) { + value: u31, + valid: bool = true, + + pub const invalid = Index{ + .value = 0, + .valid = false, + }; + + pub fn get(index: Index) ?u32 { + return if (index.valid) index.value else null; + } + + pub fn unwrap(index: Index) u32 { + assert(index.valid); + return index.value; + } + }; pub const Range = struct { start: u32, end: u32, }; - pub const Id = enum(u10) { + pub const Id = enum(u32) { main = 0, identifier = 1, number = 2, @@ -46,6 +61,13 @@ pub const Node = packed struct(u96) { simple_variable_declaration = 9, assign = 10, @"comptime" = 11, + node_list = 12, + block_zero = 13, + simple_while = 14, + simple_function_prototype = 15, + function_definition = 16, + keyword_noreturn = 17, + keyword_true = 18, }; }; @@ -63,10 +85,6 @@ const Analyzer = struct { allocator: Allocator, temporal_node_heap: ArrayList(Node.Index) = .{}, - fn free(analyzer: *Analyzer) void { - _ = analyzer; - } - fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { if (analyzer.tokens[analyzer.token_i].id == token_id) { const result = analyzer.token_i; @@ -90,56 +108,50 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const first = analyzer.token_i; const member_node: Node = switch (analyzer.tokens[first].id) { - .identifier => blk: { - const first_identifier_token = analyzer.tokens[first]; - analyzer.token_i += 1; + .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .left_brace => blk: { + analyzer.token_i += 1; + const comptime_block = try analyzer.block(); - const identifier = analyzer.getIdentifier(first_identifier_token); - - if (equal(u8, identifier, "comptime")) { - switch (analyzer.tokens[analyzer.token_i].id) { - .left_brace => { - const comptime_block = try analyzer.block(); - - break :blk .{ - .id = .@"comptime", - .token = first, - .left = comptime_block, - .right = 0, - }; - }, - else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), - } - } else { - const is_const = equal(u8, identifier, "const"); - const is_var = equal(u8, identifier, "var"); - assert(is_const or is_var); - - _ = try analyzer.expectToken(.identifier); - - // TODO: type - _ = try analyzer.expectToken(.equal); - - // TODO: do this in a function - const init_node = switch (analyzer.tokens[analyzer.token_i].id) { - .identifier => unreachable, - .hash => try analyzer.compilerIntrinsic(), - else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), - }; - - _ = try analyzer.expectToken(.semicolon); - - // TODO: - const type_node = 0; - const top_level_decl = .{ - .id = .simple_variable_declaration, + break :blk .{ + .id = .@"comptime", .token = first, - .left = type_node, - .right = init_node, + .left = comptime_block, + .right = Node.Index.invalid, }; + }, + else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + }, + .fixed_keyword_const, .fixed_keyword_var => blk: { + analyzer.token_i += 1; + _ = try analyzer.expectToken(.identifier); - break :blk top_level_decl; - } + // TODO: type + _ = try analyzer.expectToken(.equal); + + // TODO: do this in a function + const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => unreachable, + .hash => try analyzer.compilerIntrinsic(), + .left_parenthesis => try analyzer.function(), + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = Node.Index.invalid; + const top_level_decl = .{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + break :blk top_level_decl; + }, + .identifier => { + unreachable; }, else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), }; @@ -150,6 +162,11 @@ const Analyzer = struct { const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; const members: Members = switch (members_array.len) { + 1 => .{ + .len = 1, + .left = members_array[0], + .right = Node.Index.invalid, + }, 2 => .{ .len = 2, .left = members_array[0], @@ -161,6 +178,51 @@ const Analyzer = struct { return members; } + fn function(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const function_prototype = try analyzer.functionPrototype(); + const function_body = try analyzer.block(); + return analyzer.addNode(.{ + .id = .function_definition, + .token = token, + .left = function_prototype, + .right = function_body, + }); + } + + fn functionPrototype(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); + const return_type = try analyzer.typeExpression(); + + return analyzer.addNode(.{ + .id = .simple_function_prototype, + .token = token, + .left = arguments, + .right = return_type, + }); + } + + fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { + if (maybe_start_token) |start_token| { + _ = try analyzer.expectToken(start_token); + } + + var list = ArrayList(Node.Index){}; + + while (analyzer.tokens[analyzer.token_i].id != end_token) { + @panic("TODO: argument list"); + } + + _ = try analyzer.expectToken(end_token); + + if (list.items.len != 0) { + @panic("TODO: arguments"); + } else { + return Node.Index.invalid; + } + } + fn block(analyzer: *Analyzer) !Node.Index { const left_brace = try analyzer.expectToken(.left_brace); const node_heap_top = analyzer.temporal_node_heap.items.len; @@ -174,11 +236,17 @@ const Analyzer = struct { const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; const node: Node = switch (statement_array.len) { + 0 => .{ + .id = .block_zero, + .token = left_brace, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }, 1 => .{ .id = .block_one, .token = left_brace, .left = statement_array[0], - .right = 0, + .right = Node.Index.invalid, }, else => |len| std.debug.panic("len: {}", .{len}), }; @@ -187,10 +255,41 @@ const Analyzer = struct { fn statement(analyzer: *Analyzer) !Node.Index { // TODO: more stuff before - const result = try analyzer.assignExpression(); - _ = try analyzer.expectToken(.semicolon); + const first_statement_token = analyzer.tokens[analyzer.token_i]; + return switch (first_statement_token.id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => { + unreachable; + }, + else => blk: { + const identifier = analyzer.getIdentifier(first_statement_token); + std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + break :blk result; + }, + }, + .fixed_keyword_while => try analyzer.whileStatement(), + else => unreachable, + }; + } - return result; + fn whileStatement(analyzer: *Analyzer) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); + + _ = try analyzer.expectToken(.left_parenthesis); + // TODO: + const while_condition = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + + const while_block = try analyzer.block(); + + return analyzer.addNode(.{ + .id = .simple_while, + .token = while_identifier_index, + .left = while_condition, + .right = while_block, + }); } fn assignExpression(analyzer: *Analyzer) !Node.Index { @@ -242,13 +341,13 @@ const Analyzer = struct { .id = .compiler_intrinsic_one, .token = hash, .left = parameters[0], - .right = 0, + .right = Node.Index.invalid, }), else => unreachable, }; } - fn expression(analyzer: *Analyzer) !Node.Index { + fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { return analyzer.expressionPrecedence(0); } @@ -259,7 +358,7 @@ const Analyzer = struct { while (analyzer.token_i < analyzer.tokens.len) { const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { - .equal, .semicolon, .right_parenthesis => -1, + .equal, .semicolon, .right_parenthesis, .right_brace => -1, else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), }; @@ -305,7 +404,8 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal => try analyzer.curlySuffixExpression(), + .string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(), + .left_brace => try analyzer.block(), else => |id| { log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); unreachable; @@ -324,9 +424,33 @@ const Analyzer = struct { }; } + fn noReturn(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_noreturn, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + + fn boolTrue(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_true); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_true, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + fn typeExpression(analyzer: *Analyzer) !Node.Index { return switch (analyzer.tokens[analyzer.token_i].id) { - .string_literal, .identifier => try analyzer.errorUnionExpression(), + .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false => try analyzer.errorUnionExpression(), else => |id| blk: { log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); @@ -354,7 +478,21 @@ const Analyzer = struct { unreachable; } else { if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { - unreachable; + analyzer.token_i += 1; + + var expression_list = ArrayList(Node.Index){}; + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try expression_list.append(analyzer.allocator, parameter); + analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { + .comma, .right_parenthesis => true, + .colon, .right_brace, .right_bracket => unreachable, + else => unreachable, + }); + } + + _ = try analyzer.expectToken(.right_parenthesis); + @panic("TODO"); } else { return result; } @@ -366,28 +504,34 @@ const Analyzer = struct { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; - return switch (analyzer.tokens[token_i].id) { + const token = analyzer.tokens[token_i]; + return switch (token.id) { .string_literal => blk: { analyzer.token_i += 1; break :blk analyzer.addNode(.{ .id = .string_literal, .token = token_i, - .left = 0, - .right = 0, + .left = Node.Index.invalid, + .right = Node.Index.invalid, }); }, .identifier => switch (analyzer.tokens[token_i + 1].id) { .colon => unreachable, - else => analyzer.addNode(.{ - .id = .identifier, - .token = blk: { - analyzer.token_i += 1; - break :blk token_i; - }, - .left = 0, - .right = 0, - }), + else => blk: { + const identifier = analyzer.getIdentifier(token); + analyzer.token_i += 1; + if (equal(u8, identifier, "_")) { + break :blk Node.Index.invalid; + } else break :blk analyzer.addNode(.{ + .id = .identifier, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + }, }, + .fixed_keyword_noreturn => try analyzer.noReturn(), + .fixed_keyword_true => try analyzer.boolTrue(), else => |foo| { switch (foo) { .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), @@ -405,9 +549,13 @@ const Analyzer = struct { } fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { + std.debug.print("Adding node {s}\n", .{@tagName(node.id)}); const index = analyzer.nodes.items.len; try analyzer.nodes.append(analyzer.allocator, node); - return @intCast(index); + + return Node.Index{ + .value = @intCast(index), + }; } }; @@ -420,12 +568,12 @@ const Members = struct { return switch (members.len) { 0 => unreachable, 1 => .{ - .start = members.left, - .end = members.left, + .start = members.left.value, + .end = members.left.value, }, 2 => .{ - .start = members.left, - .end = members.right, + .start = members.left.value, + .end = members.right.value, }, else => unreachable, }; @@ -439,19 +587,19 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R .file = file, .allocator = allocator, }; - errdefer analyzer.free(); const node_index = try analyzer.addNode(.{ .id = .main, .token = 0, - .left = 0, - .right = 0, + .left = Node.Index.invalid, + .right = Node.Index.invalid, }); - assert(node_index == 0); + assert(node_index.value == 0); + assert(node_index.valid); const members = try analyzer.containerMembers(); const member_range = members.toRange(); - analyzer.nodes.items[0].left = @intCast(member_range.start); - analyzer.nodes.items[0].right = @intCast(member_range.end); + analyzer.nodes.items[0].left = .{ .value = @intCast(member_range.start) }; + analyzer.nodes.items[0].right = .{ .value = @intCast(member_range.end) }; const end = std.time.Instant.now() catch unreachable; @@ -471,4 +619,18 @@ const ExpressionMutabilityQualifier = enum { const Keyword = enum { @"return", @"fn", + @"while", + void, + noreturn, +}; + +// These types are meant to be used by the semantic analyzer +pub const ContainerDeclaration = struct { + members: []const Node.Index, +}; + +pub const SymbolDeclaration = struct { + type_node: Node.Index, + initialization_node: Node.Index, + mutability_token: Token.Index, }; diff --git a/src/main.zig b/src/main.zig index ce0e6df..93052c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,7 +3,6 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Compilation = @import("Compilation.zig"); -const fs = @import("fs.zig"); pub const seed = std.math.maxInt(u64); const default_src_file = "src/test/main.b"; @@ -13,17 +12,18 @@ pub fn main() !void { } fn singleCompilation(main_file_path: []const u8) !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - - const compilation = try Compilation.init(gpa.allocator()); - defer compilation.deinit(); + const allocator = std.heap.page_allocator; + const compilation = try Compilation.init(allocator); try compilation.compileModule(.{ .main_package_path = main_file_path, }); } +test { + _ = Compilation; +} + test "basic" { try singleCompilation(default_src_file); }