diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 76c5590..9cef9f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,8 @@ jobs: matrix: os: [ ubuntu-latest, + windows-latest, + macos-latest ] runs-on: ${{ matrix.os }} timeout-minutes: 15 diff --git a/.gitignore b/.gitignore index 4c82b07..212371e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ zig-cache zig-out +nat diff --git a/.vscode/launch.json b/.vscode/launch.json index 27965bb..46a561c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,13 +5,22 @@ "version": "0.2.0", "configurations": [ { - "type": "cppvsdbg", + "type": "lldb", "request": "launch", - "name": "Debug", - "program": "${workspaceFolder}/zig-out/bin/compiler.exe", + "name": "Launch", + "program": "${workspaceFolder}/zig-out/bin/compiler", "args": [], "cwd": "${workspaceFolder}", "preLaunchTask": "zig build" - } + }, + // { + // "type": "cppvsdbg", + // "request": "launch", + // "name": "Debug", + // "program": "${workspaceFolder}/zig-out/bin/compiler.exe", + // "args": [], + // "cwd": "${workspaceFolder}", + // "preLaunchTask": "zig build" + // } ] } \ No newline at end of file diff --git a/build.zig b/build.zig index dcb261d..cd090e8 100644 --- a/build.zig +++ b/build.zig @@ -1,59 +1,38 @@ const std = @import("std"); -// Although this function looks imperative, note that its job is to -// declaratively construct a build graph that will be executed by an external -// runner. pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. const target = b.standardTargetOptions(.{}); - - // Standard optimization options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not - // set a preferred release mode, allowing the user to decide how to optimize. const optimize = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ .name = "compiler", - // In this case the main source file is merely a path, however, in more - // complicated build scripts, this could be a generated file. .root_source_file = .{ .path = "src/main.zig" }, .target = target, .optimize = optimize, }); - // This declares intent for the executable to be installed into the - // standard location when the user invokes the "install" step (the default - // step when running `zig build`). b.installArtifact(exe); + b.installDirectory(.{ + .source_dir = std.Build.LazyPath.relative("lib"), + .install_dir = .bin, + .install_subdir = "lib", + }); - // This *creates* a Run step in the build graph, to be executed when another - // step is evaluated that depends on it. The next line below will establish - // such a dependency. const run_cmd = b.addRunArtifact(exe); - // By making the run step depend on the install step, it will be run from the - // installation directory rather than directly from within the cache directory. - // This is not necessary, however, if the application depends on other installed - // files, this ensures they will be present and in the expected location. run_cmd.step.dependOn(b.getInstallStep()); - // This allows the user to pass arguments to the application in the build - // command itself, like this: `zig build run -- arg1 arg2 etc` if (b.args) |args| { run_cmd.addArgs(args); } - // This creates a build step. It will be visible in the `zig build --help` menu, - // and can be selected like this: `zig build run` - // This will evaluate the `run` step rather than the default, which is "install". const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); - // Creates a step for unit testing. This only builds the test executable - // but does not run it. + const debug_command = addDebugCommand(b, exe); + const debug_step = b.step("debug", "Debug the app"); + debug_step.dependOn(&debug_command.step); + const unit_tests = b.addTest(.{ .root_source_file = .{ .path = "src/main.zig" }, .target = target, @@ -61,10 +40,39 @@ pub fn build(b: *std.Build) void { }); const run_unit_tests = b.addRunArtifact(unit_tests); - - // Similar to creating the run step earlier, this exposes a `test` step to - // the `zig build --help` menu, providing a way for the user to request - // running the unit tests. const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_unit_tests.step); + + const debug_unit_tests_cmd = addDebugCommand(b, unit_tests); + const debug_test_step = b.step("debug_test", "Run the tests through the debugger"); + debug_test_step.dependOn(&debug_unit_tests_cmd.step); +} + +fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run { + return switch (@import("builtin").os.tag) { + .linux => blk: { + const result = b.addSystemCommand(&.{"gf2"}); + result.addArtifactArg(artifact); + + if (artifact.kind == .@"test") { + result.addArgs(&.{ "-ex", "r" }); + } + + break :blk result; + }, + .windows => blk: { + const result = b.addSystemCommand(&.{"remedybg"}); + result.addArg("-g"); + result.addArtifactArg(artifact); + + break :blk result; + }, + .macos => blk: { + // not tested + const result = b.addSystemCommand(&.{"gdb"}); + result.addArtifactArg(artifact); + break :blk result; + }, + else => @compileError("Operating system not supported"), + }; } diff --git a/lib/std/start.nat b/lib/std/start.nat new file mode 100644 index 0000000..199fd13 --- /dev/null +++ b/lib/std/start.nat @@ -0,0 +1,7 @@ +comptime { + _ = _start; +} + +const _start = () noreturn { + while (true) {} +}; diff --git a/lib/std/std.nat b/lib/std/std.nat new file mode 100644 index 0000000..3ce1556 --- /dev/null +++ b/lib/std/std.nat @@ -0,0 +1,5 @@ +comptime { + _ = start; +} + +const start = #import("start.nat"); diff --git a/src/Compilation.zig b/src/Compilation.zig new file mode 100644 index 0000000..f471fcc --- /dev/null +++ b/src/Compilation.zig @@ -0,0 +1,495 @@ +const Compilation = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const equal = std.mem.eql; +const print = std.debug.print; + +const Allocator = std.mem.Allocator; + +const data_structures = @import("data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const AutoHashMap = data_structures.AutoHashMap; +const BlockList = data_structures.BlockList; +const HashMap = data_structures.HashMap; +const SegmentedList = data_structures.SegmentedList; +const StringHashMap = data_structures.StringHashMap; +const StringArrayHashMap = data_structures.StringArrayHashMap; + +const lexical_analyzer = @import("frontend/lexical_analyzer.zig"); +const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig"); +const Node = syntactic_analyzer.Node; +const semantic_analyzer = @import("frontend/semantic_analyzer.zig"); +const intermediate_representation = @import("backend/intermediate_representation.zig"); +const emit = @import("backend/emit.zig"); + +test { + _ = lexical_analyzer; + _ = syntactic_analyzer; + _ = semantic_analyzer; + _ = data_structures; +} + +base_allocator: Allocator, +cwd_absolute_path: []const u8, +directory_absolute_path: []const u8, +executable_absolute_path: []const u8, +build_directory: std.fs.Dir, + +const cache_dir_name = "cache"; +const installation_dir_name = "installation"; + +pub fn init(allocator: Allocator) !*Compilation { + const compilation: *Compilation = try allocator.create(Compilation); + + const self_exe_path = try std.fs.selfExePathAlloc(allocator); + const self_exe_dir_path = std.fs.path.dirname(self_exe_path).?; + compilation.* = .{ + .base_allocator = allocator, + .cwd_absolute_path = try realpathAlloc(allocator, "."), + .executable_absolute_path = self_exe_path, + .directory_absolute_path = self_exe_dir_path, + .build_directory = try std.fs.cwd().makeOpenPath("nat", .{}), + }; + + try compilation.build_directory.makePath(cache_dir_name); + try compilation.build_directory.makePath(installation_dir_name); + + return compilation; +} + +pub const Struct = struct { + scope: Scope.Index, + fields: ArrayList(Field.Index) = .{}, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Type = union(enum) { + void, + noreturn, + bool, + integer: Integer, + @"struct": Struct.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Integer = struct { + bit_count: u16, + signedness: Signedness, + pub const Signedness = enum(u1) { + unsigned = 0, + signed = 1, + }; +}; + +/// A scope contains a bunch of declarations +pub const Scope = struct { + parent: Scope.Index, + type: Type.Index = Type.Index.invalid, + declarations: AutoHashMap(u32, Declaration.Index) = .{}, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const ScopeType = enum(u1) { + local = 0, + global = 1, +}; + +pub const Mutability = enum(u1) { + @"const", + @"var", +}; + +pub const Declaration = struct { + scope_type: ScopeType, + mutability: Mutability, + init_value: Value.Index, + name: []const u8, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Function = struct { + body: Block.Index, + prototype: Prototype.Index, + + pub const Prototype = struct { + arguments: ?[]const Field.Index, + return_type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = Prototype.List.Index; + }; + + pub fn getBodyBlock(function: Function, module: *Module) *Block { + return module.blocks.get(function.body); + } + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Block = struct { + statements: ArrayList(Value.Index) = .{}, + reaches_end: bool, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Field = struct { + foo: u32 = 0, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Loop = struct { + condition: Value.Index, + body: Value.Index, + breaks: bool, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Runtime = struct { + foo: u32 = 0, +}; + +const Unresolved = struct { + node_index: Node.Index, +}; + +pub const Assignment = struct { + store: Value.Index, + load: Value.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Value = union(enum) { + unresolved: Unresolved, + declaration: Declaration.Index, + void, + bool: bool, + undefined, + loop: Loop.Index, + function: Function.Index, + block: Block.Index, + runtime: Runtime, + assign: Assignment.Index, + type: Type.Index, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + + pub fn isComptime(value: Value) bool { + return switch (value) { + .bool, .void, .undefined, .function => true, + else => false, + }; + } + + pub fn getType(value: *Value) !void { + switch (value.*) { + else => |t| @panic(@tagName(t)), + } + unreachable; + } +}; + +pub const Module = struct { + main_package: *Package, + import_table: StringArrayHashMap(*File) = .{}, + string_table: AutoHashMap(u32, []const u8) = .{}, + declarations: BlockList(Declaration) = .{}, + structs: BlockList(Struct) = .{}, + scopes: BlockList(Scope) = .{}, + files: BlockList(File) = .{}, + values: BlockList(Value) = .{}, + functions: BlockList(Function) = .{}, + fields: BlockList(Field) = .{}, + function_prototypes: BlockList(Function.Prototype) = .{}, + types: BlockList(Type) = .{}, + blocks: BlockList(Block) = .{}, + loops: BlockList(Loop) = .{}, + assignments: BlockList(Assignment) = .{}, + + pub const Descriptor = struct { + main_package_path: []const u8, + }; + + const ImportFileResult = struct { + file: *File, + is_new: bool, + }; + + const ImportPackageResult = struct { + file: *File, + is_new: bool, + is_package: bool, + }; + + pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult { + if (equal(u8, import_name, "std")) { + return module.importPackage(allocator, module.main_package.dependencies.get("std").?); + } + + if (equal(u8, import_name, "builtin")) { + return module.importPackage(allocator, module.main_package.dependencies.get("builtin").?); + } + + if (equal(u8, import_name, "main")) { + return module.importPackage(allocator, module.main_package); + } + + if (current_file.package.dependencies.get(import_name)) |package| { + return module.importPackage(allocator, package); + } + + if (!std.mem.endsWith(u8, import_name, ".nat")) { + unreachable; + } + + const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); + const file_relative_path = std.fs.path.basename(full_path); + const package = current_file.package; + const import = try module.getFile(allocator, full_path, file_relative_path, package); + + try import.file.addFileReference(allocator, current_file); + + const result = ImportPackageResult{ + .file = import.file, + .is_new = import.is_new, + .is_package = false, + }; + + return result; + } + + fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { + const path_lookup = try module.import_table.getOrPut(allocator, full_path); + const file: *File = switch (path_lookup.found_existing) { + true => path_lookup.value_ptr.*, + false => blk: { + const new_file_index = try module.files.append(allocator, File{ + .relative_path = relative_path, + .package = package, + }); + const file = module.files.get(new_file_index); + path_lookup.value_ptr.* = file; + break :blk file; + }, + }; + + return .{ + .file = file, + .is_new = !path_lookup.found_existing, + }; + } + + pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { + const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); + const import = try module.getFile(allocator, full_path, package.source_path, package); + try import.file.addPackageReference(allocator, package); + + return .{ + .file = import.file, + .is_new = import.is_new, + .is_package = true, + }; + } + + pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { + _ = module; + const source_file = try file.package.directory.handle.openFile(file.relative_path, .{}); + + const file_size = try source_file.getEndPos(); + var file_buffer = try allocator.alloc(u8, file_size); + + const read_byte_count = try source_file.readAll(file_buffer); + assert(read_byte_count == file_size); + source_file.close(); + + //TODO: adjust file maximum size + file.source_code = file_buffer[0..read_byte_count]; + file.status = .loaded_into_memory; + + try file.lex(allocator); + try file.parse(allocator); + } +}; + +fn pathFromCwd(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.cwd_absolute_path, relative_path }); +} + +fn pathFromCompiler(compilation: *const Compilation, relative_path: []const u8) ![]const u8 { + return std.fs.path.join(compilation.base_allocator, &.{ compilation.directory_absolute_path, relative_path }); +} + +fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]const u8 { + var path_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const realpathInStack = try std.os.realpath(pathname, &path_buffer); + return allocator.dupe(u8, realpathInStack); +} + +pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !void { + // TODO: generate an actual file + const builtin_file_name = "builtin.nat"; + var cache_dir = try compilation.build_directory.openDir("cache", .{}); + const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); + builtin_file.close(); + + const module: *Module = try compilation.base_allocator.create(Module); + module.* = Module{ + .main_package = blk: { + const result = try compilation.base_allocator.create(Package); + const main_package_absolute_directory_path = try compilation.pathFromCwd(std.fs.path.dirname(descriptor.main_package_path).?); + result.* = .{ + .directory = .{ + .handle = try std.fs.openDirAbsolute(main_package_absolute_directory_path, .{}), + .path = main_package_absolute_directory_path, + }, + .source_path = try compilation.base_allocator.dupe(u8, std.fs.path.basename(descriptor.main_package_path)), + }; + break :blk result; + }, + }; + + const std_package_dir = "lib/std"; + + const package_descriptors = [2]struct { + name: []const u8, + directory_path: []const u8, + }{ + .{ + .name = "std", + .directory_path = try compilation.pathFromCwd(std_package_dir), + }, + .{ + .name = "builtin", + .directory_path = blk: { + const result = try cache_dir.realpathAlloc(compilation.base_allocator, "."); + cache_dir.close(); + break :blk result; + }, + }, + }; + + var packages: [package_descriptors.len]*Package = undefined; + for (package_descriptors, &packages) |package_descriptor, *package_ptr| { + const package = try compilation.base_allocator.create(Package); + package.* = .{ + .directory = .{ + .path = package_descriptor.directory_path, + .handle = try std.fs.openDirAbsolute(package_descriptor.directory_path, .{}), + }, + .source_path = try std.mem.concat(compilation.base_allocator, u8, &.{ package_descriptor.name, ".nat" }), + }; + + try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package); + + package_ptr.* = package; + } + + assert(module.main_package.dependencies.size == 2); + + _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); + + for (module.import_table.values()) |import| { + try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); + } + + const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]); + + var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); + + switch (@import("builtin").cpu.arch) { + .x86_64 => |arch| try emit.get(arch).initialize(compilation.base_allocator, &ir), + else => {}, + } +} + +fn generateAST() !void {} + +pub const Directory = struct { + handle: std.fs.Dir, + path: []const u8, +}; + +pub const Package = struct { + directory: Directory, + /// Relative to the package main directory + source_path: []const u8, + dependencies: StringHashMap(*Package) = .{}, + + fn addDependency(package: *Package, allocator: Allocator, package_name: []const u8, new_dependency: *Package) !void { + try package.dependencies.ensureUnusedCapacity(allocator, 1); + package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency); + } +}; + +pub const File = struct { + status: Status = .not_loaded, + source_code: []const u8 = &.{}, + lexical_analyzer_result: lexical_analyzer.Result = undefined, + syntactic_analyzer_result: syntactic_analyzer.Result = undefined, + package_references: ArrayList(*Package) = .{}, + file_references: ArrayList(*File) = .{}, + relative_path: []const u8, + package: *Package, + + const Status = enum { + not_loaded, + loaded_into_memory, + lexed, + parsed, + }; + + fn addPackageReference(file: *File, allocator: Allocator, package: *Package) !void { + for (file.package_references.items) |other| { + if (other == package) return; + } + + try file.package_references.insert(allocator, 0, package); + } + + fn addFileReference(file: *File, allocator: Allocator, affected: *File) !void { + try file.file_references.append(allocator, affected); + } + + pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File { + const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); + _ = file_content; + const file = try allocator.create(File); + file.* = File{}; + + return file; + } + + fn lex(file: *File, allocator: Allocator) !void { + assert(file.status == .loaded_into_memory); + file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); + // if (!@import("builtin").is_test) { + // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); + // } + file.status = .lexed; + } + + fn parse(file: *File, allocator: Allocator) !void { + assert(file.status == .lexed); + file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); + // if (!@import("builtin").is_test) { + // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); + // } + file.status = .parsed; + } +}; diff --git a/src/emit.zig b/src/backend/emit.zig similarity index 53% rename from src/emit.zig rename to src/backend/emit.zig index b5d64e3..768b33d 100644 --- a/src/emit.zig +++ b/src/backend/emit.zig @@ -6,7 +6,13 @@ const assert = std.debug.assert; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; -const ir = @import("ir.zig"); +const ir = @import("intermediate_representation.zig"); + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const AutoHashMap = data_structures.AutoHashMap; + +const jit_callconv = .SysV; const Section = struct { content: []align(page_size) u8, @@ -31,17 +37,6 @@ const Result = struct { }; } - fn destroy(image: *Result) void { - inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { - const section_bytes = @field(image.sections, field_name).content; - switch (@import("builtin").os.tag) { - .linux => std.os.munmap(section_bytes), - .windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE), - else => @compileError("OS not supported"), - } - } - } - fn mmap(size: usize, flags: packed struct { executable: bool, }) ![]align(page_size) u8 { @@ -50,8 +45,13 @@ const Result = struct { const windows = std.os.windows; break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size]; }, - .linux => blk: { - const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; + .linux, .macos => |os_tag| blk: { + const execute_flag: switch (os_tag) { + .linux => u32, + .macos => c_int, + else => unreachable, + } = if (flags.executable) std.os.PROT.EXEC else 0; + const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag); const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); @@ -71,26 +71,210 @@ const Result = struct { image.sections.text.index += 1; } - fn getEntryPoint(image: *const Result, comptime Function: type) *const Function { + fn appendOnlyOpcodeSkipInstructionBytes(image: *Result, instruction: Instruction) void { + const instruction_descriptor = instruction_descriptors.get(instruction); + assert(instruction_descriptor.opcode_byte_count == instruction_descriptor.operand_offset); + image.appendCode(instruction_descriptor.getOpcode()); + + image.sections.text.index += instruction_descriptor.size - instruction_descriptor.opcode_byte_count; + } + + fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { comptime { - assert(@typeInfo(Function) == .Fn); + assert(@typeInfo(FunctionType) == .Fn); } assert(image.sections.text.content.len > 0); - return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); - } - - pub fn free(result: *Result, allocator: Allocator) void { - _ = allocator; - inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| { - switch (@import("builtin").os.tag) { - .windows => unreachable, - else => std.os.munmap(@field(result.sections, field_name).content), - } - } + return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); } }; +const Instruction = enum { + jmp_rel_8, + + const Descriptor = struct { + operands: [4]Operand, + operand_count: u3, + operand_offset: u5, + size: u8, + opcode: [2]u8, + opcode_byte_count: u8, + + fn getOperands(descriptor: Descriptor) []const Operand { + return descriptor.operands[0..descriptor.operand_count]; + } + + fn getOpcode(descriptor: Descriptor) []const u8 { + return descriptor.opcode[0..descriptor.opcode_byte_count]; + } + + fn new(opcode_bytes: []const u8, operands: []const Operand) Descriptor { + // TODO: prefixes + var result = Descriptor{ + .operands = undefined, + .operand_count = @intCast(operands.len), + .operand_offset = opcode_bytes.len, + .size = opcode_bytes.len, + .opcode = undefined, + .opcode_byte_count = opcode_bytes.len, + }; + + for (opcode_bytes, result.opcode[0..opcode_bytes.len]) |opcode_byte, *out_opcode| { + out_opcode.* = opcode_byte; + } + + for (operands, result.operands[0..operands.len]) |operand, *out_operand| { + out_operand.* = operand; + result.size += operand.size; + } + + return result; + } + }; + + const Operand = struct { + type: Type, + size: u8, + + const Type = enum { + rel, + }; + }; +}; + +const rel8 = Instruction.Operand{ + .type = .rel, + .size = @sizeOf(u8), +}; + +const instruction_descriptors = blk: { + var result = std.EnumArray(Instruction, Instruction.Descriptor).initUndefined(); + result.getPtr(.jmp_rel_8).* = Instruction.Descriptor.new(&.{0xeb}, &[_]Instruction.Operand{rel8}); + break :blk result; +}; + +const InstructionSelector = struct { + functions: ArrayList(Function), + const Function = struct { + instructions: ArrayList(Instruction) = .{}, + block_byte_counts: ArrayList(u16), + block_offsets: ArrayList(u32), + byte_count: u32 = 0, + relocations: ArrayList(Relocation) = .{}, + block_map: AutoHashMap(ir.BasicBlock.Index, u32) = .{}, + const Relocation = struct { + instruction: Instruction, + source: u16, + destination: u16, + block_offset: u16, + }; + }; +}; + +pub fn get(comptime arch: std.Target.Cpu.Arch) type { + const backend = switch (arch) { + .x86_64 => @import("x86_64.zig"), + else => @compileError("Architecture not supported"), + }; + _ = backend; + + return struct { + pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { + var result = try Result.create(); + var function_iterator = intermediate.functions.iterator(); + var instruction_selector = InstructionSelector{ + .functions = try ArrayList(InstructionSelector.Function).initCapacity(allocator, intermediate.functions.len), + }; + + while (function_iterator.next()) |ir_function| { + const function = instruction_selector.functions.addOneAssumeCapacity(); + function.* = .{ + .block_byte_counts = try ArrayList(u16).initCapacity(allocator, ir_function.blocks.items.len), + .block_offsets = try ArrayList(u32).initCapacity(allocator, ir_function.blocks.items.len), + }; + try function.block_map.ensureTotalCapacity(allocator, @intCast(ir_function.blocks.items.len)); + for (ir_function.blocks.items, 0..) |block_index, index| { + function.block_map.putAssumeCapacity(block_index, @intCast(index)); + } + + for (ir_function.blocks.items) |block_index| { + const block = intermediate.blocks.get(block_index); + function.block_offsets.appendAssumeCapacity(function.byte_count); + var block_byte_count: u16 = 0; + for (block.instructions.items) |instruction_index| { + const instruction = intermediate.instructions.get(instruction_index).*; + switch (instruction) { + .phi => unreachable, + .ret => unreachable, + .jump => |jump_index| { + const jump = intermediate.jumps.get(jump_index); + const relocation = InstructionSelector.Function.Relocation{ + .instruction = .jmp_rel_8, + .source = @intCast(function.block_map.get(jump.source) orelse unreachable), + .destination = @intCast(function.block_map.get(jump.destination) orelse unreachable), + .block_offset = block_byte_count, + }; + try function.relocations.append(allocator, relocation); + block_byte_count += instruction_descriptors.get(.jmp_rel_8).size; + try function.instructions.append(allocator, .jmp_rel_8); + }, + } + } + function.block_byte_counts.appendAssumeCapacity(block_byte_count); + function.byte_count += block_byte_count; + } + } + + for (instruction_selector.functions.items) |function| { + for (function.instructions.items) |instruction| switch (instruction) { + .jmp_rel_8 => result.appendOnlyOpcodeSkipInstructionBytes(instruction), + + // else => unreachable, + }; + } + + for (instruction_selector.functions.items) |function| { + var fix_size: bool = false; + _ = fix_size; + for (function.relocations.items) |relocation| { + std.debug.print("RELOC: {}\n", .{relocation}); + const source_block = relocation.source; + const destination_block = relocation.destination; + const source_offset = function.block_offsets.items[source_block]; + const destination_offset = function.block_offsets.items[destination_block]; + std.debug.print("Source offset: {}. Destination: {}\n", .{ source_offset, destination_offset }); + const instruction_descriptor = instruction_descriptors.get(relocation.instruction); + const instruction_offset = source_offset + relocation.block_offset; + const really_source_offset = instruction_offset + instruction_descriptor.size; + const displacement = @as(i64, destination_offset) - @as(i64, really_source_offset); + + const operands = instruction_descriptor.getOperands(); + switch (operands.len) { + 1 => switch (operands[0].size) { + @sizeOf(u8) => { + if (displacement >= std.math.minInt(i8) and displacement <= std.math.maxInt(i8)) { + const writer_index = instruction_offset + instruction_descriptor.operand_offset; + std.debug.print("Instruction offset: {}. Operand offset: {}. Writer index: {}. displacement: {}\n", .{ instruction_offset, instruction_descriptor.operand_offset, writer_index, displacement }); + result.sections.text.content[writer_index] = @bitCast(@as(i8, @intCast(displacement))); + } else { + unreachable; + } + }, + else => unreachable, + }, + else => unreachable, + } + } + } + + const text_section = result.sections.text.content[0..result.sections.text.index]; + for (text_section) |byte| { + std.debug.print("0x{x}\n", .{byte}); + } + } + }; +} + const Rex = enum(u8) { b = upper_4_bits | (1 << 0), x = upper_4_bits | (1 << 1), @@ -136,6 +320,7 @@ const prefix_rep = 0xf3; const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)}; const prefix_16_bit_operand = [1]u8{0x66}; +const jmp_rel_32 = 0xe9; const ret = 0xc3; const mov_a_imm = [1]u8{0xb8}; const mov_reg_imm8: u8 = 0xb0; @@ -160,12 +345,10 @@ fn movAImm(image: *Result, integer: anytype) void { } test "ret void" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); image.appendCodeByte(ret); - const function_pointer = image.getEntryPoint(fn () callconv(.C) void); + const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) void); function_pointer(); } @@ -185,13 +368,12 @@ fn getMaxInteger(comptime T: type) T { test "ret integer" { inline for (integer_types_to_test) |Int| { var image = try Result.create(); - defer image.free(std.testing.allocator); const expected_number = getMaxInteger(Int); movAImm(&image, expected_number); image.appendCodeByte(ret); - const function_pointer = image.getEntryPoint(fn () callconv(.C) Int); + const function_pointer = image.getEntryPoint(fn () callconv(jit_callconv) Int); const result = function_pointer(); try expect(result == expected_number); } @@ -234,15 +416,13 @@ fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRe test "ret integer argument" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const number = getMaxInteger(Int); movRmR(&image, Int, .a, .di); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int); + const functionPointer = image.getEntryPoint(fn (Int) callconv(jit_callconv) Int); const result = functionPointer(number); try expectEqual(number, result); } @@ -264,9 +444,7 @@ fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRe test "ret sub arguments" { inline for (integer_types_to_test) |Int| { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); @@ -274,7 +452,7 @@ test "ret sub arguments" { subRmR(&image, Int, .a, .si); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int); + const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(jit_callconv) Int); const result = functionPointer(a, b); try expectEqual(a - b, result); } @@ -348,17 +526,15 @@ fn TestIntegerBinaryOperation(comptime T: type) type { opcode: OpcodeRmR, pub fn runTest(test_case: @This()) !void { - const allocator = std.testing.allocator; for (0..10) |_| { var image = try Result.create(); - defer image.free(allocator); const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); movRmR(&image, T, .a, .di); dstRmSrcR(&image, T, test_case.opcode, .a, .si); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T); + const functionPointer = image.getEntryPoint(fn (T, T) callconv(jit_callconv) T); const expected = test_case.callback(a, b); const result = functionPointer(a, b); if (should_log) { @@ -371,9 +547,7 @@ fn TestIntegerBinaryOperation(comptime T: type) type { } test "call after" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const jump_patch_offset = image.sections.text.index + 1; image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); const jump_source = image.sections.text.index; @@ -382,14 +556,12 @@ test "call after" { @as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); functionPointer(); } test "call before" { - const allocator = std.testing.allocator; var image = try Result.create(); - defer image.free(allocator); const first_jump_patch_offset = image.sections.text.index + 1; const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; image.appendCode(&first_call); @@ -403,7 +575,7 @@ test "call before" { image.appendCode(&second_call); image.appendCodeByte(ret); - const functionPointer = image.getEntryPoint(fn () callconv(.C) void); + const functionPointer = image.getEntryPoint(fn () callconv(jit_callconv) void); functionPointer(); } diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig new file mode 100644 index 0000000..501319f --- /dev/null +++ b/src/backend/intermediate_representation.zig @@ -0,0 +1,243 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const print = std.debug.print; + +const Compilation = @import("../Compilation.zig"); +const Module = Compilation.Module; +const Package = Compilation.Package; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const BlockList = data_structures.BlockList; + +pub const Result = struct { + functions: BlockList(Function) = .{}, + blocks: BlockList(BasicBlock) = .{}, + instructions: BlockList(Instruction) = .{}, + jumps: BlockList(Jump) = .{}, +}; + +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_file: Compilation.Type.Index) !Result { + _ = main_file; + _ = package; + print("\nFunction count: {}\n", .{module.functions.len}); + + var function_iterator = module.functions.iterator(); + var builder = Builder{ + .allocator = compilation.base_allocator, + .module = module, + }; + + while (function_iterator.next()) |sema_function| { + print("\nFunction: {}\n", .{sema_function}); + + try builder.function(sema_function); + } + + return builder.ir; +} + +pub const BasicBlock = struct { + instructions: ArrayList(Instruction.Index) = .{}, + incomplete_phis: ArrayList(Instruction.Index) = .{}, + filled: bool = false, + sealed: bool = false, + + pub const List = BlockList(@This()); + pub const Index = List.Index; + + fn seal(basic_block: *BasicBlock) void { + for (basic_block.incomplete_phis.items) |incomplete_phi| { + _ = incomplete_phi; + unreachable; + } + + basic_block.sealed = true; + } +}; + +const Instruction = union(enum) { + jump: Jump.Index, + phi: Phi.Index, + ret: Ret, + + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Phi = struct { + foo: u32 = 0, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Ret = struct { + value: Instruction.Index, +}; + +pub const Jump = struct { + source: BasicBlock.Index, + destination: BasicBlock.Index, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +const Function = struct { + blocks: ArrayList(BasicBlock.Index) = .{}, + pub const List = BlockList(@This()); + pub const Index = List.Index; +}; + +pub const Builder = struct { + allocator: Allocator, + ir: Result = .{}, + module: *Module, + current_basic_block: BasicBlock.Index = BasicBlock.Index.invalid, + current_function_index: Function.Index = Function.Index.invalid, + + fn function(builder: *Builder, sema_function: Compilation.Function) !void { + builder.current_function_index = try builder.ir.functions.append(builder.allocator, .{}); + // TODO: arguments + builder.current_basic_block = try builder.newBlock(); + + const return_type = builder.module.types.get(builder.module.function_prototypes.get(sema_function.prototype).return_type); + const is_noreturn = return_type.* == .noreturn; + if (!is_noreturn) { + const exit_block = try builder.newBlock(); + const phi = try builder.appendToBlock(exit_block, .{ + .phi = Phi.Index.invalid, + }); + const ret = try builder.appendToBlock(exit_block, .{ + .ret = .{ + .value = phi, + }, + }); + _ = ret; + } + const sema_block = sema_function.getBodyBlock(builder.module); + try builder.block(sema_block, .{ .emit_exit_block = !is_noreturn }); + + try builder.dumpFunction(std.io.getStdErr().writer(), builder.current_function_index); + } + + fn dumpFunction(builder: *Builder, writer: anytype, index: Function.Index) !void { + const f = builder.ir.functions.get(index); + try writer.writeAll("Hello world!\n"); + print("Function blocks: {}\n", .{f.blocks.items.len}); + var function_instruction_index: usize = 0; + for (f.blocks.items, 0..) |block_index, function_block_index| { + print("#{}:\n", .{function_block_index}); + const function_block = builder.ir.blocks.get(block_index); + for (function_block.instructions.items) |instruction_index| { + const instruction = builder.ir.instructions.get(instruction_index); + print("%{}: {}\n", .{ function_instruction_index, instruction }); + function_instruction_index += 1; + } + + print("\n", .{}); + } + } + + fn blockInsideBasicBlock(builder: *Builder, sema_block: *Compilation.Block, block_index: BasicBlock.Index) !BasicBlock.Index { + builder.current_basic_block = block_index; + try builder.block(sema_block, .{}); + return builder.current_basic_block; + } + + const BlockOptions = packed struct { + emit_exit_block: bool = true, + }; + + fn block(builder: *Builder, sema_block: *Compilation.Block, options: BlockOptions) error{OutOfMemory}!void { + for (sema_block.statements.items) |sema_statement_index| { + const sema_statement = builder.module.values.get(sema_statement_index); + switch (sema_statement.*) { + .loop => |loop_index| { + const sema_loop = builder.module.loops.get(loop_index); + const sema_loop_condition = builder.module.values.get(sema_loop.condition); + const sema_loop_body = builder.module.values.get(sema_loop.body); + const condition: Compilation.Value.Index = switch (sema_loop_condition.*) { + .bool => |bool_value| switch (bool_value) { + true => Compilation.Value.Index.invalid, + false => unreachable, + }, + else => |t| @panic(@tagName(t)), + }; + + const original_block = builder.current_basic_block; + const jump_to_loop = try builder.append(.{ + .jump = undefined, + }); + const loop_body_block = try builder.newBlock(); + const loop_prologue_block = if (options.emit_exit_block) try builder.newBlock() else BasicBlock.Index.invalid; + + const loop_head_block = switch (condition.valid) { + false => loop_body_block, + true => unreachable, + }; + + builder.ir.instructions.get(jump_to_loop).jump = try builder.jump(.{ + .source = original_block, + .destination = loop_head_block, + }); + + const sema_body_block = builder.module.blocks.get(sema_loop_body.block); + builder.current_basic_block = try builder.blockInsideBasicBlock(sema_body_block, loop_body_block); + if (loop_prologue_block.valid) { + builder.ir.blocks.get(loop_prologue_block).seal(); + } + + if (sema_body_block.reaches_end) { + _ = try builder.append(.{ + .jump = try builder.jump(.{ + .source = builder.current_basic_block, + .destination = loop_head_block, + }), + }); + } + + builder.ir.blocks.get(builder.current_basic_block).filled = true; + builder.ir.blocks.get(loop_body_block).seal(); + if (!loop_head_block.eq(loop_body_block)) { + unreachable; + } + + if (loop_prologue_block.valid) { + builder.current_basic_block = loop_prologue_block; + } + }, + else => |t| @panic(@tagName(t)), + } + } + } + + fn jump(builder: *Builder, jump_descriptor: Jump) !Jump.Index { + const destination_block = builder.ir.blocks.get(jump_descriptor.destination); + assert(!destination_block.sealed); + return try builder.ir.jumps.append(builder.allocator, jump_descriptor); + } + + fn append(builder: *Builder, instruction: Instruction) !Instruction.Index { + assert(builder.current_basic_block.valid); + return builder.appendToBlock(builder.current_basic_block, instruction); + } + + fn appendToBlock(builder: *Builder, block_index: BasicBlock.Index, instruction: Instruction) !Instruction.Index { + const instruction_index = try builder.ir.instructions.append(builder.allocator, instruction); + try builder.ir.blocks.get(block_index).instructions.append(builder.allocator, instruction_index); + + return instruction_index; + } + + fn newBlock(builder: *Builder) !BasicBlock.Index { + const new_block_index = try builder.ir.blocks.append(builder.allocator, .{}); + const current_function = builder.ir.functions.get(builder.current_function_index); + const function_block_index = current_function.blocks.items.len; + try current_function.blocks.append(builder.allocator, new_block_index); + + print("Adding block: {}\n", .{function_block_index}); + + return new_block_index; + } +}; diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig new file mode 100644 index 0000000..e69de29 diff --git a/src/compiler.zig b/src/compiler.zig deleted file mode 100644 index b7c8214..0000000 --- a/src/compiler.zig +++ /dev/null @@ -1,21 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -const data_structures = @import("data_structures.zig"); - -const lexer = @import("lexer.zig"); -const parser = @import("parser.zig"); - -test { - _ = lexer; - _ = parser; -} - -pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void { - const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize)); - std.debug.print("File:\n\n```\n{s}\n```\n", .{file}); - const lexer_result = try lexer.lex(allocator, file); - const parser_result = try parser.parse(allocator, &lexer_result); - _ = parser_result; -} diff --git a/src/data_structures.zig b/src/data_structures.zig index 58fbed7..cc47ff3 100644 --- a/src/data_structures.zig +++ b/src/data_structures.zig @@ -1,4 +1,155 @@ const std = @import("std"); +const assert = std.debug.assert; +pub const Allocator = std.mem.Allocator; pub const ArrayList = std.ArrayListUnmanaged; -pub const HashMap = std.AutoHashMap; +pub const AutoHashMap = std.AutoHashMapUnmanaged; +pub const HashMap = std.HashMapUnmanaged; +pub const SegmentedList = std.SegmentedList; +pub const StringHashMap = std.StringHashMapUnmanaged; +pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged; + +pub fn BlockList(comptime T: type) type { + const item_count = 64; + const Block = struct { + items: [item_count]T = undefined, + bitset: Bitset = Bitset.initEmpty(), + + const Bitset = std.StaticBitSet(item_count); + + fn allocateIndex(block: *@This()) !u6 { + if (block.bitset.mask != std.math.maxInt(@TypeOf(block.bitset.mask))) { + const index = @ctz(~block.bitset.mask); + block.bitset.set(index); + return @intCast(index); + } else { + return error.OutOfMemory; + } + } + }; + + return struct { + blocks: ArrayList(Block) = .{}, + len: usize = 0, + first_block: u32 = 0, + + const List = @This(); + + pub const Index = packed struct(u32) { + block: u24, + index: u6, + _reserved: bool = false, + valid: bool = true, + + pub const invalid = Index{ + .valid = false, + .index = 0, + .block = 0, + }; + + pub fn eq(index: Index, other: Index) bool { + return @as(u32, @bitCast(index)) == @as(u32, @bitCast(other)); + } + }; + + pub const Iterator = struct { + block_index: u26, + element_index: u7, + list: *const List, + + pub fn next(i: *Iterator) ?T { + return if (i.nextPointer()) |ptr| ptr.* else null; + } + + pub fn nextPointer(i: *Iterator) ?*T { + if (i.element_index >= item_count) { + i.block_index += 1; + i.element_index = 0; + } + + while (i.block_index < i.list.blocks.items.len) : (i.block_index += 1) { + while (i.element_index < item_count) : (i.element_index += 1) { + if (i.list.blocks.items[i.block_index].bitset.isSet(i.element_index)) { + const index = i.element_index; + i.element_index += 1; + return &i.list.blocks.items[i.block_index].items[index]; + } + } + } + + return null; + } + }; + + pub fn iterator(list: *const List) Iterator { + return .{ + .block_index = 0, + .element_index = 0, + .list = list, + }; + } + + pub fn get(list: *List, index: Index) *T { + assert(index.valid); + return &list.blocks.items[index.block].items[index.index]; + } + + pub fn append(list: *List, allocator: Allocator, element: T) !Index { + try list.ensureCapacity(allocator, list.len + 1); + const max_allocation = list.blocks.items.len * item_count; + if (list.len < max_allocation) { + // Follow the guess + if (list.blocks.items[list.first_block].allocateIndex()) |index| { + list.blocks.items[list.first_block].items[index] = element; + list.len += 1; + return .{ + .index = index, + .block = @intCast(list.first_block), + }; + } else |_| { + @panic("TODO"); + } + } else { + const block_index = list.blocks.items.len; + const new_block = list.blocks.addOneAssumeCapacity(); + new_block.* = .{}; + const index = new_block.allocateIndex() catch unreachable; + new_block.items[index] = element; + list.len += 1; + return .{ + .index = index, + .block = @intCast(block_index), + }; + } + } + + pub fn ensureCapacity(list: *List, allocator: Allocator, new_capacity: usize) !void { + const max_allocation = list.blocks.items.len * item_count; + if (max_allocation < new_capacity) { + const block_count = new_capacity / item_count + @intFromBool(new_capacity % item_count != 0); + try list.blocks.ensureTotalCapacity(allocator, block_count); + } + } + + test "Bitset index allocation" { + const expect = std.testing.expect; + var block = Block{}; + for (0..item_count) |expected_index| { + const new_index = try block.allocateIndex(); + try expect(new_index == expected_index); + } + + _ = block.allocateIndex() catch return; + + return error.TestUnexpectedResult; + } + }; +} + +pub fn enumFromString(comptime E: type, string: []const u8) ?E { + return inline for (@typeInfo(E).Enum.fields) |enum_field| { + if (std.mem.eql(u8, string, enum_field.name)) { + break @field(E, enum_field.name); + } + } else null; +} diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig new file mode 100644 index 0000000..a5f26ce --- /dev/null +++ b/src/frontend/lexical_analyzer.zig @@ -0,0 +1,187 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const log = std.log; + +const equal = std.mem.eql; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; + +const Compilation = @import("../Compilation.zig"); +const fs = @import("../fs.zig"); + +pub const Token = packed struct(u64) { + start: u32, + len: u24, + id: Id, + + pub const Id = enum(u8) { + eof = 0x00, + identifier = 0x01, + number = 0x02, + string_literal = 0x03, + fixed_keyword_function = 0x04, + fixed_keyword_const = 0x05, + fixed_keyword_var = 0x06, + fixed_keyword_void = 0x07, + fixed_keyword_noreturn = 0x08, + fixed_keyword_comptime = 0x09, + fixed_keyword_while = 0x0a, + fixed_keyword_bool = 0x0b, + fixed_keyword_true = 0x0c, + fixed_keyword_false = 0x0d, + bang = '!', // 0x21 + hash = '#', // 0x23 + dollar_sign = '$', // 0x24 + modulus = '%', // 0x25 + ampersand = '&', // 0x26 + left_parenthesis = '(', // 0x28 + right_parenthesis = ')', // 0x29 + asterisk = '*', // 0x2a + plus = '+', // 0x2b + comma = ',', // 0x2c + minus = '-', // 0x2d + period = '.', // 0x2e + slash = '/', // 0x2f + colon = ':', // 0x3a + semicolon = ';', // 0x3b + less = '<', // 0x3c + equal = '=', // 0x3d + greater = '>', // 0x3e + question_mark = '?', // 0x3f + at = '@', // 0x40 + left_bracket = '[', // 0x5b + backlash = '\\', // 0x5c + right_bracket = ']', // 0x5d + caret = '^', // 0x5e + underscore = '_', // 0x5f + grave = '`', // 0x60 + left_brace = '{', // 0x7b + vertical_bar = '|', // 0x7c + right_brace = '}', // 0x7d + tilde = '~', // 0x7e + }; + + pub const Index = u32; +}; + +pub const FixedKeyword = enum { + @"comptime", + @"const", + @"var", + void, + noreturn, + function, + @"while", + bool, + true, + false, +}; + +pub const Result = struct { + tokens: ArrayList(Token), + time: u64, +}; + +pub fn analyze(allocator: Allocator, text: []const u8) !Result { + const time_start = std.time.Instant.now() catch unreachable; + var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); + var index: usize = 0; + + while (index < text.len) { + const start_index = index; + const start_character = text[index]; + const token_id: Token.Id = switch (start_character) { + 'a'...'z', 'A'...'Z', '_' => blk: { + while (true) { + const ch = text[index]; + if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { + index += 1; + continue; + } + + break; + } + + const identifier = text[start_index..][0 .. index - start_index]; + std.debug.print("Identifier: {s}\n", .{identifier}); + + if (start_character == 'u' or start_character == 's') { + var index_integer = start_index + 1; + while (text[index_integer] >= '0' and text[index_integer] <= '9') { + index_integer += 1; + } + + if (index_integer == index) { + unreachable; + } + } + + break :blk if (enumFromString(FixedKeyword, text[start_index..][0 .. index - start_index])) |fixed_keyword| switch (fixed_keyword) { + inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), + } else .identifier; + }, + '(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: { + index += 1; + break :blk @enumFromInt(operator); + }, + '0'...'9' => blk: { + while (text[index] >= '0' and text[index] <= '9') { + index += 1; + } + + break :blk .number; + }, + '\'' => { + unreachable; + }, + '"' => blk: { + index += 1; + + while (true) { + if (text[index] == '"' and text[index - 1] != '"') { + break; + } + + index += 1; + } + + index += 1; + + break :blk .string_literal; + }, + ' ', '\n', '\r', '\t' => { + index += 1; + continue; + }, + else => |foo| { + std.debug.panic("NI: '{c}'", .{foo}); + }, + }; + + const end_index = index; + + try tokens.append(allocator, .{ + .start = @intCast(start_index), + .len = @intCast(end_index - start_index), + .id = token_id, + }); + } + + const should_log = true; + if (should_log) { + for (tokens.items, 0..) |token, i| { + std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) }); + } + } + + const time_end = std.time.Instant.now() catch unreachable; + const time = time_end.since(time_start); + + return .{ + .tokens = tokens, + .time = time, + }; +} diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig new file mode 100644 index 0000000..65be4e4 --- /dev/null +++ b/src/frontend/semantic_analyzer.zig @@ -0,0 +1,731 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const Compilation = @import("../Compilation.zig"); +const File = Compilation.File; +const Module = Compilation.Module; +const Package = Compilation.Package; + +const Assignment = Compilation.Assignment; +const Block = Compilation.Block; +const Declaration = Compilation.Declaration; +const Field = Compilation.Field; +const Function = Compilation.Function; +const Loop = Compilation.Loop; +const Scope = Compilation.Scope; +const Struct = Compilation.Struct; +const Type = Compilation.Type; +const Value = Compilation.Value; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +const syntactic_analyzer = @import("syntactic_analyzer.zig"); +const ContainerDeclaration = syntactic_analyzer.ContainerDeclaration; +const Node = syntactic_analyzer.Node; +const SymbolDeclaration = syntactic_analyzer.SymbolDeclaration; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const HashMap = data_structures.AutoHashMap; + +const print = std.debug.print; + +const Analyzer = struct { + source_code: []const u8, + nodes: []const Node, + tokens: []const Token, + file: *File, + allocator: Allocator, + module: *Module, + + fn lazyGlobalDeclaration(analyzer: *Analyzer, node_index: Node.Index) void { + print("Global: {}", .{analyzer.nodes[node_index.unwrap()]}); + } + + fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index { + const comptime_node = analyzer.nodes[node_index.unwrap()]; + + const comptime_block = try analyzer.block(scope, .{ .none = {} }, comptime_node.left); + return try analyzer.module.values.append(analyzer.allocator, .{ + .block = comptime_block, + }); + } + + fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Assignment.Index { + print("Assign: #{}", .{node_index.value}); + const node = analyzer.nodes[node_index.unwrap()]; + assert(node.id == .assign); + const Result = struct { + left: Value.Index, + right: Value.Index, + }; + const result: Result = switch (node.left.valid) { + // In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result``` + false => .{ + .left = Value.Index.invalid, + .right = try analyzer.expression(scope, ExpectType.none, node.right), + }, + true => { + const left_node = analyzer.nodes[node.left.unwrap()]; + print("left node index: {}. Left node: {}", .{ node.left, left_node }); + // const id = analyzer.tokenIdentifier(.token); + // print("id: {s}\n", .{id}); + const left = try analyzer.expression(scope, ExpectType.none, node.left); + _ = left; + unreachable; + }, + }; + + print("Assignment: L: {}. R: {}\n", .{ result.left, result.right }); + + if (result.left.valid and analyzer.module.values.get(result.left).isComptime() and analyzer.module.values.get(result.right).isComptime()) { + unreachable; + } else { + const assignment_index = try analyzer.module.assignments.append(analyzer.allocator, .{ + .store = result.left, + .load = result.right, + }); + return assignment_index; + } + } + + fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Block.Index { + var reaches_end = true; + const block_node = analyzer.nodes[node_index.unwrap()]; + var statement_nodes = ArrayList(Node.Index){}; + switch (block_node.id) { + .block_one, .comptime_block_one => { + try statement_nodes.append(analyzer.allocator, block_node.left); + }, + .block_zero, .comptime_block_zero => {}, + else => |t| @panic(@tagName(t)), + } + + const is_comptime = switch (block_node.id) { + .comptime_block_zero, .comptime_block_one => true, + .block_zero, .block_one => false, + else => |t| @panic(@tagName(t)), + }; + _ = is_comptime; + + var statements = ArrayList(Value.Index){}; + + for (statement_nodes.items) |statement_node_index| { + if (!reaches_end) { + unreachable; + } + + const statement_node = analyzer.nodes[statement_node_index.unwrap()]; + const statement_value = switch (statement_node.id) { + inline .assign, .simple_while => |statement_id| blk: { + const specific_value_index = switch (statement_id) { + .assign => try analyzer.assign(scope, statement_node_index), + .simple_while => statement: { + const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{ + .condition = Value.Index.invalid, + .body = Value.Index.invalid, + .breaks = false, + }); + const loop_structure = analyzer.module.loops.get(loop_index); + const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left); + const while_body = try analyzer.expression(scope, expect_type, statement_node.right); + loop_structure.condition = while_condition; + loop_structure.body = while_body; + + reaches_end = loop_structure.breaks or while_condition.valid; + + break :statement loop_index; + }, + else => unreachable, + }; + const value = @unionInit(Value, switch (statement_id) { + .assign => "assign", + .simple_while => "loop", + else => unreachable, + }, specific_value_index); + const value_index = try analyzer.module.values.append(analyzer.allocator, value); + break :blk value_index; + }, + else => |t| @panic(@tagName(t)), + }; + try statements.append(analyzer.allocator, statement_value); + } + + return try analyzer.module.blocks.append(analyzer.allocator, .{ + .statements = statements, + .reaches_end = reaches_end, + }); + } + + fn whileExpression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Loop.Index { + _ = node; + _ = expect_type; + _ = scope; + _ = analyzer; + } + + fn resolve(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, value: *Value) !void { + const node_index = switch (value.*) { + .unresolved => |unresolved| unresolved.node_index, + else => |t| @panic(@tagName(t)), + }; + value.* = try analyzer.resolveNode(scope, expect_type, node_index); + } + + fn doIdentifier(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node: Node) !Value.Index { + assert(node.id == .identifier); + const identifier_hash = try analyzer.identifierFromToken(node.token); + // TODO: search in upper scopes too + const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash); + if (identifier_scope_lookup.found_existing) { + const declaration_index = identifier_scope_lookup.value_ptr.*; + const declaration = analyzer.module.declarations.get(declaration_index); + const init_value = analyzer.module.values.get(declaration.init_value); + try analyzer.resolve(scope, expect_type, init_value); + if (init_value.* != .runtime and declaration.mutability == .@"const") { + return declaration.init_value; + } else { + unreachable; + } + } else { + @panic("TODO: not found"); + } + } + + fn resolveNode(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) anyerror!Value { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .identifier => unreachable, + .compiler_intrinsic_one => blk: { + const intrinsic_name = analyzer.tokenIdentifier(node.token + 1); + const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[node.left.unwrap()]; + switch (import_argument.id) { + .string_literal => { + const import_name = analyzer.tokenStringLiteral(import_argument.token); + const imported_file = try analyzer.module.importFile(analyzer.allocator, analyzer.file, import_name); + + if (imported_file.is_new) { + // TODO: fix error + analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file) catch return error.OutOfMemory; + } else { + unreachable; + } + + break :blk .{ + .type = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file), + }; + }, + else => unreachable, + } + }, + } + unreachable; + }, + .function_definition => blk: { + const function_prototype_index = try analyzer.functionPrototype(node.left); + + const function_body = try analyzer.block(scope, .{ + .type_index = analyzer.functionPrototypeReturnType(function_prototype_index), + }, node.right); + + const function_index = try analyzer.module.functions.append(analyzer.allocator, .{ + .prototype = function_prototype_index, + .body = function_body, + }); + break :blk .{ + .function = function_index, + }; + }, + .keyword_true => unreachable, + .simple_while => unreachable, + // .assign => try analyzer.assign(scope, node_index), + .block_zero, .block_one => blk: { + const block_index = try analyzer.block(scope, expect_type, node_index); + break :blk .{ + .block = block_index, + }; + }, + else => |t| @panic(@tagName(t)), + }; + } + + fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Value.Index { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .identifier => analyzer.doIdentifier(scope, expect_type, node), + .keyword_true => blk: { + switch (expect_type) { + .none => {}, + .type_index => |expected_type| { + if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) { + @panic("TODO: compile error"); + } + }, + } + + break :blk bool_true; + }, + .block_zero => try analyzer.module.values.append(analyzer.allocator, .{ + .block = try analyzer.block(scope, expect_type, node_index), + }), + else => |t| @panic(@tagName(t)), + }; + } + + fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index { + const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index); + return function_prototype.return_type; + } + + fn functionPrototype(analyzer: *Analyzer, node_index: Node.Index) !Function.Prototype.Index { + const node = analyzer.nodes[node_index.unwrap()]; + switch (node.id) { + .simple_function_prototype => { + const arguments: ?[]const Field.Index = blk: { + const argument_node = analyzer.nodes[node.left.get() orelse break :blk null]; + switch (argument_node.id) { + else => |t| @panic(@tagName(t)), + } + }; + const return_type_node = analyzer.nodes[node.right.unwrap()]; + const return_type: Type.Index = switch (return_type_node.id) { + .identifier => { + unreachable; + }, + .keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) }, + else => |t| @panic(@tagName(t)), + }; + + return try analyzer.module.function_prototypes.append(analyzer.allocator, .{ + .arguments = arguments, + .return_type = return_type, + }); + }, + else => |t| @panic(@tagName(t)), + } + } + + fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index { + _ = declaration; + _ = scope; + _ = analyzer; + // switch (declaration.*) { + // .unresolved => |node_index| { + // const declaration_node = analyzer.nodes[node_index.unwrap()]; + // return switch (declaration_node.id) { + // .simple_variable_declaration => blk: { + // const expect_type = switch (declaration_node.left.valid) { + // true => unreachable, + // false => @unionInit(ExpectType, "none", {}), + // }; + // + // const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right); + // const value = analyzer.module.values.get(initialization_expression); + // if (value.is_comptime and value.is_const) { + // break :blk initialization_expression; + // } + // + // unreachable; + // }, + // else => |t| @panic(@tagName(t)), + // }; + // }, + // .struct_type => unreachable, + // } + + @panic("TODO: analyzeDeclaration"); + } + + fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void { + if (symbol_declaration.type_node.get()) |type_node_index| { + _ = type_node_index; + @panic("TODO: type node"); + } + const initialization_node = analyzer.nodes[symbol_declaration.initialization_node.unwrap()]; + switch (initialization_node.id) { + .compiler_intrinsic_one => { + const intrinsic_name = analyzer.tokenIdentifier(initialization_node.token + 1); + const intrinsic = inline for (@typeInfo(Intrinsic).Enum.fields) |intrinsic_enum_field| { + if (equal(u8, intrinsic_name, intrinsic_enum_field.name)) { + break @field(Intrinsic, intrinsic_enum_field.name); + } + } else unreachable; + print("Intrinsic: {s}", .{@tagName(intrinsic)}); + switch (intrinsic) { + .import => { + const import_argument = analyzer.nodes[initialization_node.left.get()]; + switch (import_argument.id) { + .string_literal => unreachable, + else => unreachable, + } + }, + } + // const intrinsic_node_index = initialization_node.left.unwrap(); + // const intrinsic_node = analyzer.nodes[intrinsic_node_index]; + // + // switch (intrinsic_node.id) { + // .string_literal => + // } + // print("intrinsic: {}", .{intrinsic_node.id}); + + // _ = a; + }, + else => unreachable, + } + print("Init node: {}\n", .{initialization_node}); + @panic("TODO"); + } + + fn symbolDeclaration(analyzer: *Analyzer, node_index: Node.Index) SymbolDeclaration { + const node = analyzer.nodes[node_index.unwrap()]; + return switch (node.id) { + .simple_variable_declaration => .{ + .type_node = node.left, + .initialization_node = node.right, + .mutability_token = node.token, + }, + else => unreachable, + }; + } + + fn structType(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Type.Index { + _ = index; + const new_scope = try analyzer.allocateScope(.{ .parent = parent_scope }); + const scope = new_scope.ptr; + + const is_file = !parent_scope.valid; + assert(is_file); + + const struct_index = try analyzer.module.structs.append(analyzer.allocator, .{ + .scope = new_scope.index, + }); + const struct_type = analyzer.module.structs.get(struct_index); + const type_index = try analyzer.module.types.append(analyzer.allocator, .{ + .@"struct" = struct_index, + }); + scope.type = type_index; + + _ = struct_type; + assert(container_declaration.members.len > 0); + + const count = blk: { + var result: struct { + fields: u32 = 0, + declarations: u32 = 0, + } = .{}; + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + + switch (member_type) { + .declaration => result.declarations += 1, + .field => result.fields += 1, + } + } + break :blk result; + }; + + var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations); + var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields); + + for (container_declaration.members) |member_index| { + const member = analyzer.nodes[member_index.unwrap()]; + const member_type = getContainerMemberType(member.id); + const array_list = switch (member_type) { + .declaration => &declaration_nodes, + .field => &field_nodes, + }; + array_list.appendAssumeCapacity(member_index); + } + + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime" => {}, + .simple_variable_declaration => { + const mutability: Compilation.Mutability = switch (analyzer.tokens[declaration_node.token].id) { + .fixed_keyword_const => .@"const", + .fixed_keyword_var => .@"var", + else => |t| @panic(@tagName(t)), + }; + const expected_identifier_token_index = declaration_node.token + 1; + const expected_identifier_token = analyzer.tokens[expected_identifier_token_index]; + if (expected_identifier_token.id != .identifier) { + print("Error: found: {}", .{expected_identifier_token.id}); + @panic("Expected identifier"); + } + // TODO: Check if it is a keyword + + const identifier_index = try analyzer.identifierFromToken(expected_identifier_token_index); + + const declaration_name = analyzer.tokenIdentifier(expected_identifier_token_index); + // Check if the symbol name is already occupied in the same scope + const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index); + if (scope_lookup.found_existing) { + std.debug.panic("Existing name in lookup: {s}", .{declaration_name}); + } + + // Check if the symbol name is already occupied in parent scopes + var upper_scope_index = scope.parent; + + while (upper_scope_index.valid) { + @panic("TODO: upper scope"); + } + + const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{ + .name = declaration_name, + .scope_type = .global, + .mutability = mutability, + .init_value = try analyzer.module.values.append(analyzer.allocator, .{ + .unresolved = .{ + .node_index = declaration_node.right, + }, + }), + }); + + scope_lookup.value_ptr.* = container_declaration_index; + }, + else => unreachable, + } + } + + // TODO: consider iterating over scope declarations instead? + for (declaration_nodes.items) |declaration_node_index| { + const declaration_node = analyzer.nodes[declaration_node_index.unwrap()]; + switch (declaration_node.id) { + .@"comptime" => _ = try analyzer.comptimeBlock(scope, declaration_node_index), + .simple_variable_declaration => {}, + else => |t| @panic(@tagName(t)), + } + } + + for (field_nodes.items) |field_index| { + const field_node = analyzer.nodes[field_index.unwrap()]; + _ = field_node; + + @panic("TODO: fields"); + } + + return type_index; + } + + const MemberType = enum { + declaration, + field, + }; + + fn getContainerMemberType(member_id: Node.Id) MemberType { + return switch (member_id) { + .@"comptime" => .declaration, + .simple_variable_declaration => .declaration, + else => unreachable, + }; + } + + fn identifierFromToken(analyzer: *Analyzer, token_index: Token.Index) !u32 { + const identifier = analyzer.tokenIdentifier(token_index); + const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier)); + + const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key); + + if (lookup_result.found_existing) { + return lookup_result.key_ptr.*; + } else { + return key; + } + } + + fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .identifier); + const identifier = analyzer.source_code[token.start..][0..token.len]; + + return identifier; + } + + fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 { + const token = analyzer.tokens[token_index]; + assert(token.id == .string_literal); + // Eat double quotes + const start = token.start + 1; + const len = token.len - 2; + const string_literal = analyzer.source_code[start..][0..len]; + + return string_literal; + } + + const ScopeAllocation = struct { + ptr: *Scope, + index: Scope.Index, + }; + + fn allocateScope(analyzer: *Analyzer, scope_value: Scope) !ScopeAllocation { + const scope_index = try analyzer.module.scopes.append(analyzer.allocator, scope_value); + const scope = analyzer.module.scopes.get(scope_index); + + return .{ + .ptr = scope, + .index = scope_index, + }; + } +}; + +const ExpectType = union(enum) { + none, + type_index: Type.Index, + + pub const none = ExpectType{ + .none = {}, + }; + pub const boolean = ExpectType{ + .type_index = type_boolean, + }; +}; + +const type_boolean = Type.Index{ + .block = 0, + .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool), +}; + +const bool_false = Value.Index{ + .block = 0, + .index = 1, +}; + +const bool_true = Value.Index{ + .block = 0, + .index = 1, +}; + +const Intrinsic = enum { + import, +}; + +const FixedTypeKeyword = enum { + void, + noreturn, + bool, + + const offset = 0; +}; + +const HardwareUnsignedIntegerType = enum { + u8, + u16, + u32, + u64, + + const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len; +}; + +const HardwareSignedIntegerType = enum { + s8, + s16, + s32, + s64, + + const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len; +}; + +pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { + inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {})); + } + + inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .unsigned, + .bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) { + .u8 => 8, + .u16 => 16, + .u32 => 32, + .u64 => 64, + }, + }, + }); + } + + inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| { + _ = try module.types.append(compilation.base_allocator, .{ + .integer = .{ + .signedness = .signed, + .bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) { + .s8 => 8, + .s16 => 16, + .s32 => 32, + .s64 => 64, + }, + }, + }); + } + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = false, + }); + + _ = try module.values.append(compilation.base_allocator, .{ + .bool = true, + }); + + return analyzeExistingPackage(compilation, module, package); +} + +pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Type.Index { + const package_import = try module.importPackage(compilation.base_allocator, package); + assert(!package_import.is_new); + const package_file = package_import.file; + + return try analyzeFile(compilation.base_allocator, module, package_file); +} + +pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Type.Index { + assert(file.status == .parsed); + + var analyzer = Analyzer{ + .source_code = file.source_code, + .nodes = file.syntactic_analyzer_result.nodes.items, + .tokens = file.lexical_analyzer_result.tokens.items, + .file = file, + .allocator = allocator, + .module = module, + }; + + const result = try analyzer.structType(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 }); + return result; +} + +fn mainNodeToContainerDeclaration(allocator: Allocator, file: *File) !ContainerDeclaration { + const main_node = getNode(file, 0); + var list_buffer: [2]Node.Index = undefined; + const left_node = getNode(file, main_node.left.value); + const node_list: []const Node.Index = blk: { + if (left_node.id != .node_list) { + const len = @as(u2, @intFromBool(main_node.left.valid)) + @as(u2, @intFromBool(main_node.right.valid)) - @as(u2, @intFromBool(main_node.left.valid and main_node.right.valid and main_node.left.value == main_node.right.value)); + assert(len > 0); + list_buffer[0] = main_node.left; + list_buffer[1] = main_node.right; + break :blk list_buffer[0..len]; + } else { + @panic("TODO: get list"); + } + }; + + const owned_node_list = try allocator.alloc(Node.Index, node_list.len); + @memcpy(owned_node_list, node_list); + + // Deal properly with this allocation + return .{ + .members = owned_node_list, + }; +} + +fn getNode(file: *const File, index: u32) *Node { + return &file.syntactic_analyzer_result.nodes.items[index]; +} diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig new file mode 100644 index 0000000..36f7fcd --- /dev/null +++ b/src/frontend/syntactic_analyzer.zig @@ -0,0 +1,649 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const equal = std.mem.eql; +const log = std.log; + +const data_structures = @import("../data_structures.zig"); +const ArrayList = data_structures.ArrayList; +const enumFromString = data_structures.enumFromString; +const HashMap = data_structures.HashMap; + +const lexical_analyzer = @import("lexical_analyzer.zig"); +const Token = lexical_analyzer.Token; + +pub const Result = struct { + nodes: ArrayList(Node), + time: u64, +}; + +pub const Options = packed struct { + is_comptime: bool, +}; + +// TODO: pack it to be more efficient +pub const Node = packed struct(u128) { + token: u32, + id: Id, + left: Node.Index, + right: Node.Index, + + pub const Index = packed struct(u32) { + value: u31, + valid: bool = true, + + pub const invalid = Index{ + .value = 0, + .valid = false, + }; + + pub fn get(index: Index) ?u32 { + return if (index.valid) index.value else null; + } + + pub fn unwrap(index: Index) u32 { + assert(index.valid); + return index.value; + } + }; + + pub const Range = struct { + start: u32, + end: u32, + }; + + pub const Id = enum(u32) { + main = 0, + identifier = 1, + number = 2, + @"return" = 3, + block_one = 4, + function_declaration_no_arguments = 5, + container_declaration = 6, + string_literal = 7, + compiler_intrinsic_one = 8, + simple_variable_declaration = 9, + assign = 10, + @"comptime" = 11, + node_list = 12, + block_zero = 13, + simple_while = 14, + simple_function_prototype = 15, + function_definition = 16, + keyword_noreturn = 17, + keyword_true = 18, + comptime_block_zero = 19, + comptime_block_one = 20, + }; +}; + +const Error = error{ + unexpected_token, + not_implemented, + OutOfMemory, +}; + +const Analyzer = struct { + tokens: []const Token, + token_i: u32 = 0, + nodes: ArrayList(Node) = .{}, + file: []const u8, + allocator: Allocator, + temporal_node_heap: ArrayList(Node.Index) = .{}, + + fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { + if (analyzer.tokens[analyzer.token_i].id == token_id) { + const result = analyzer.token_i; + analyzer.token_i += 1; + return result; + } else { + return error.unexpected_token; + } + } + + fn getIdentifier(analyzer: *const Analyzer, token: Token) []const u8 { + assert(token.id == .identifier); + const identifier = analyzer.file[token.start..][0..token.len]; + return identifier; + } + + fn containerMembers(analyzer: *Analyzer) !Members { + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.token_i < analyzer.tokens.len) { + const first = analyzer.token_i; + const member_node: Node = switch (analyzer.tokens[first].id) { + .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .left_brace => blk: { + analyzer.token_i += 1; + const comptime_block = try analyzer.block(.{ .is_comptime = true }); + + break :blk .{ + .id = .@"comptime", + .token = first, + .left = comptime_block, + .right = Node.Index.invalid, + }; + }, + else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}), + }, + .fixed_keyword_const, .fixed_keyword_var => blk: { + analyzer.token_i += 1; + _ = try analyzer.expectToken(.identifier); + + // TODO: type + _ = try analyzer.expectToken(.equal); + + // TODO: do this in a function + const init_node = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => unreachable, + .hash => try analyzer.compilerIntrinsic(), + .left_parenthesis => try analyzer.function(), + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + _ = try analyzer.expectToken(.semicolon); + + // TODO: + const type_node = Node.Index.invalid; + const top_level_decl = .{ + .id = .simple_variable_declaration, + .token = first, + .left = type_node, + .right = init_node, + }; + + break :blk top_level_decl; + }, + .identifier => { + unreachable; + }, + else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}), + }; + + const member_index = try analyzer.addNode(member_node); + try analyzer.temporal_node_heap.append(analyzer.allocator, member_index); + } + + const members_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const members: Members = switch (members_array.len) { + 1 => .{ + .len = 1, + .left = members_array[0], + .right = Node.Index.invalid, + }, + 2 => .{ + .len = 2, + .left = members_array[0], + .right = members_array[1], + }, + else => |len| std.debug.panic("Len: {}", .{len}), + }; + + return members; + } + + fn function(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const function_prototype = try analyzer.functionPrototype(); + const is_comptime = false; + _ = is_comptime; + const function_body = try analyzer.block(.{ .is_comptime = false }); + return analyzer.addNode(.{ + .id = .function_definition, + .token = token, + .left = function_prototype, + .right = function_body, + }); + } + + fn functionPrototype(analyzer: *Analyzer) !Node.Index { + const token = analyzer.token_i; + const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); + const return_type = try analyzer.typeExpression(); + + return analyzer.addNode(.{ + .id = .simple_function_prototype, + .token = token, + .left = arguments, + .right = return_type, + }); + } + + fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { + if (maybe_start_token) |start_token| { + _ = try analyzer.expectToken(start_token); + } + + var list = ArrayList(Node.Index){}; + + while (analyzer.tokens[analyzer.token_i].id != end_token) { + @panic("TODO: argument list"); + } + + _ = try analyzer.expectToken(end_token); + + if (list.items.len != 0) { + @panic("TODO: arguments"); + } else { + return Node.Index.invalid; + } + } + + fn block(analyzer: *Analyzer, options: Options) !Node.Index { + const left_brace = try analyzer.expectToken(.left_brace); + const node_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(node_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_brace) { + const first_statement_token = analyzer.tokens[analyzer.token_i]; + const statement_index = switch (first_statement_token.id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => { + unreachable; + }, + else => blk: { + const identifier = analyzer.getIdentifier(first_statement_token); + std.debug.print("Starting statement with identifier: {s}\n", .{identifier}); + const result = try analyzer.assignExpression(); + _ = try analyzer.expectToken(.semicolon); + break :blk result; + }, + }, + .fixed_keyword_while => try analyzer.whileStatement(options), + else => unreachable, + }; + try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); + } + + _ = try analyzer.expectToken(.right_brace); + + const statement_array = analyzer.temporal_node_heap.items[node_heap_top..]; + const node: Node = switch (statement_array.len) { + 0 => .{ + .id = switch (options.is_comptime) { + true => .comptime_block_zero, + false => .block_zero, + }, + .token = left_brace, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }, + 1 => .{ + .id = switch (options.is_comptime) { + true => .comptime_block_one, + false => .block_one, + }, + .token = left_brace, + .left = statement_array[0], + .right = Node.Index.invalid, + }, + else => |len| std.debug.panic("len: {}", .{len}), + }; + return analyzer.addNode(node); + } + + fn whileStatement(analyzer: *Analyzer, options: Options) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { + const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); + + _ = try analyzer.expectToken(.left_parenthesis); + // TODO: + const while_condition = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + + const while_block = try analyzer.block(options); + + return analyzer.addNode(.{ + .id = .simple_while, + .token = while_identifier_index, + .left = while_condition, + .right = while_block, + }); + } + + fn assignExpression(analyzer: *Analyzer) !Node.Index { + const expr = try analyzer.expression(); + const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { + .semicolon => return expr, + .equal => .assign, + else => unreachable, + }; + + const node = Node{ + .id = expression_id, + .token = blk: { + const token_i = analyzer.token_i; + analyzer.token_i += 1; + break :blk token_i; + }, + .left = expr, + .right = try analyzer.expression(), + }; + std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); + return analyzer.addNode(node); + } + + fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { + const hash = try analyzer.expectToken(.hash); + _ = try analyzer.expectToken(.identifier); + _ = try analyzer.expectToken(.left_parenthesis); + + const temporal_heap_top = analyzer.temporal_node_heap.items.len; + defer analyzer.temporal_node_heap.shrinkRetainingCapacity(temporal_heap_top); + + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try analyzer.temporal_node_heap.append(analyzer.allocator, parameter); + + switch (analyzer.tokens[analyzer.token_i].id) { + .comma => analyzer.token_i += 1, + .right_parenthesis => continue, + else => unreachable, + } + } + + // Consume the right parenthesis + analyzer.token_i += 1; + + const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; + + return switch (parameters.len) { + 1 => analyzer.addNode(.{ + .id = .compiler_intrinsic_one, + .token = hash, + .left = parameters[0], + .right = Node.Index.invalid, + }), + else => unreachable, + }; + } + + fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { + return analyzer.expressionPrecedence(0); + } + + fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { + var result = try analyzer.prefixExpression(); + + var banned_precedence: i32 = -1; + + while (analyzer.token_i < analyzer.tokens.len) { + const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { + .equal, .semicolon, .right_parenthesis, .right_brace => -1, + else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), + }; + + if (precedence < minimum_precedence) { + break; + } + + if (precedence == banned_precedence) { + break; + } + + // TODO: fix this + const node_index = try analyzer.expressionPrecedence(1); + _ = node_index; + unreachable; + } + + return result; + } + + fn prefixExpression(analyzer: *Analyzer) !Node.Index { + switch (analyzer.tokens[analyzer.token_i].id) { + // .bang => .bool_not, + // .minus => .negation, + // .tilde => .bit_not, + // .minus_percent => .negation_wrap, + // .ampersand => .address_of, + // .keyword_try => .@"try", + // .keyword_await => .@"await", + + else => |pref| { + _ = pref; + return analyzer.primaryExpression(); + }, + } + + return error.not_implemented; + } + + fn primaryExpression(analyzer: *Analyzer) !Node.Index { + const result = switch (analyzer.tokens[analyzer.token_i].id) { + .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { + .colon => unreachable, + else => try analyzer.curlySuffixExpression(), + }, + .string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(), + // todo:? + // .left_brace => try analyzer.block(), + else => |id| { + log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); + unreachable; + }, + }; + + return result; + } + + fn curlySuffixExpression(analyzer: *Analyzer) !Node.Index { + const left = try analyzer.typeExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .left_brace => unreachable, + else => left, + }; + } + + fn noReturn(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_noreturn, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + + fn boolTrue(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + assert(analyzer.tokens[token_i].id == .fixed_keyword_true); + analyzer.token_i += 1; + return analyzer.addNode(.{ + .id = .keyword_true, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + } + + fn typeExpression(analyzer: *Analyzer) !Node.Index { + return switch (analyzer.tokens[analyzer.token_i].id) { + .identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false => try analyzer.errorUnionExpression(), + else => |id| blk: { + log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)}); + + const result = try analyzer.errorUnionExpression(); + + break :blk result; + }, + }; + } + + fn errorUnionExpression(analyzer: *Analyzer) !Node.Index { + const suffix_expression = try analyzer.suffixExpression(); + + return switch (analyzer.tokens[analyzer.token_i].id) { + .bang => unreachable, + else => suffix_expression, + }; + } + + fn suffixExpression(analyzer: *Analyzer) !Node.Index { + var result = try analyzer.primaryTypeExpression(); + + while (true) { + if (analyzer.suffixOperator()) |_| { + unreachable; + } else { + if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) { + analyzer.token_i += 1; + + var expression_list = ArrayList(Node.Index){}; + while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { + const parameter = try analyzer.expression(); + try expression_list.append(analyzer.allocator, parameter); + analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { + .comma, .right_parenthesis => true, + .colon, .right_brace, .right_bracket => unreachable, + else => unreachable, + }); + } + + _ = try analyzer.expectToken(.right_parenthesis); + @panic("TODO"); + } else { + return result; + } + } + } + + unreachable; + } + + fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { + const token_i = analyzer.token_i; + const token = analyzer.tokens[token_i]; + return switch (token.id) { + .string_literal => blk: { + analyzer.token_i += 1; + break :blk analyzer.addNode(.{ + .id = .string_literal, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + }, + .identifier => switch (analyzer.tokens[token_i + 1].id) { + .colon => unreachable, + else => blk: { + const identifier = analyzer.getIdentifier(token); + std.debug.print("identifier: {s}\n", .{identifier}); + analyzer.token_i += 1; + if (equal(u8, identifier, "_")) { + break :blk Node.Index.invalid; + } else break :blk analyzer.addNode(.{ + .id = .identifier, + .token = token_i, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + }, + }, + .fixed_keyword_noreturn => try analyzer.noReturn(), + .fixed_keyword_true => try analyzer.boolTrue(), + else => |foo| { + switch (foo) { + .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }), + else => std.debug.panic("{s}", .{@tagName(foo)}), + } + }, + }; + } + + // TODO: + fn suffixOperator(analyzer: *Analyzer) ?bool { + _ = analyzer; + + return null; + } + + fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { + const index = analyzer.nodes.items.len; + try analyzer.nodes.append(analyzer.allocator, node); + std.debug.print("Adding node #{} {s}\n", .{ index, @tagName(node.id) }); + return Node.Index{ + .value = @intCast(index), + }; + } +}; + +const Members = struct { + len: usize, + left: Node.Index, + right: Node.Index, + + pub fn toRange(members: Members) Node.Range { + return switch (members.len) { + 0 => unreachable, + 1 => .{ + .start = members.left.value, + .end = members.left.value, + }, + 2 => .{ + .start = members.left.value, + .end = members.right.value, + }, + else => unreachable, + }; + } +}; + +pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { + const start = std.time.Instant.now() catch unreachable; + var analyzer = Analyzer{ + .tokens = tokens, + .file = file, + .allocator = allocator, + }; + const node_index = try analyzer.addNode(.{ + .id = .main, + .token = 0, + .left = Node.Index.invalid, + .right = Node.Index.invalid, + }); + + assert(node_index.value == 0); + assert(node_index.valid); + const members = try analyzer.containerMembers(); + const member_range = members.toRange(); + analyzer.nodes.items[0].left = .{ .value = @intCast(member_range.start) }; + analyzer.nodes.items[0].right = .{ .value = @intCast(member_range.end) }; + + const end = std.time.Instant.now() catch unreachable; + + analyzer.temporal_node_heap.clearAndFree(allocator); + + return .{ + .nodes = analyzer.nodes, + .time = end.since(start), + }; +} + +const ExpressionMutabilityQualifier = enum { + @"const", + @"var", +}; + +const Keyword = enum { + @"return", + @"fn", + @"while", + void, + noreturn, +}; + +// These types are meant to be used by the semantic analyzer +pub const ContainerDeclaration = struct { + members: []const Node.Index, +}; + +pub const SymbolDeclaration = struct { + type_node: Node.Index, + initialization_node: Node.Index, + mutability_token: Token.Index, +}; diff --git a/src/ir.zig b/src/ir.zig deleted file mode 100644 index 20b0eba..0000000 --- a/src/ir.zig +++ /dev/null @@ -1,143 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const parser = @import("parser.zig"); - -const void_type = Type{ - .id = .void, -}; - -const Type = struct { - id: Id, - - fn isPrimitive(T: Type) bool { - return switch (T.id) { - .void => true, - }; - } - const Id = enum { - void, - }; -}; - -const Error = error{ - type_mismatch, - internal, - arguments_not_used, -}; - -const TopLevelDeclaration = struct { - type: Id, - index: u31, - - const Id = enum { - function, - expression, - }; -}; - -const Instruction = struct { - id: Id, - index: u16, - - const Id = enum { - ret_void, - }; -}; - -const ret_void = Instruction{ - .id = .ret_void, - .index = 0, -}; - -const ret = struct { - is_type: bool, -}; - -const Function = struct { - instructions: ArrayList(Instruction), - return_type: Type, -}; - -pub const Result = struct { - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - instructions: struct {} = .{}, - - pub fn free(result: *Result, allocator: Allocator) void { - for (result.functions.items) |*function| { - function.instructions.clearAndFree(allocator); - } - result.functions.clearAndFree(allocator); - result.top_level_declarations.clearAndFree(allocator); - } -}; - -const Analyzer = struct { - parser: *const parser.Result, - top_level_declarations: ArrayList(TopLevelDeclaration), - functions: ArrayList(Function), - allocator: Allocator, - - fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result { - var analyzer = Analyzer{ - .parser = parser_result, - .top_level_declarations = ArrayList(TopLevelDeclaration){}, - .allocator = allocator, - .functions = ArrayList(Function){}, - }; - - for (parser_result.functions.items) |ast_function| { - if (ast_function.statements.items.len != 0) { - for (ast_function.statements.items) |statement| { - _ = statement; - @panic("TODO: statement"); - } - } else { - if (ast_function.arguments.items.len != 0) { - return Error.arguments_not_used; - } - - try analyzer.expectPrimitiveType(void_type, ast_function.return_type); - - const function_index = analyzer.functions.items.len; - - var function = Function{ - .instructions = ArrayList(Instruction){}, - .return_type = void_type, - }; - - function.instructions.append(allocator, ret_void) catch return Error.internal; - - analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{ - .type = .function, - .index = @intCast(function_index), - }) catch return Error.internal; - - analyzer.functions.append(allocator, function) catch return Error.internal; - } - } - - return .{ - .top_level_declarations = analyzer.top_level_declarations, - .functions = analyzer.functions, - }; - } - - fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void { - assert(type_value.isPrimitive()); - const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal; - - if (!equal(u8, @tagName(type_value.id), type_identifier)) { - return Error.type_mismatch; - } - } -}; - -pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result { - return Analyzer.analyze(allocator, parser_result); -} diff --git a/src/lexer.zig b/src/lexer.zig deleted file mode 100644 index ad9b440..0000000 --- a/src/lexer.zig +++ /dev/null @@ -1,158 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const equal = std.mem.eql; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; - -const fs = @import("fs.zig"); -const parser = @import("parser.zig"); - -pub const TokenTypeMap = blk: { - var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined; - - result[@intFromEnum(TokenId.identifier)] = Identifier; - result[@intFromEnum(TokenId.operator)] = Operator; - result[@intFromEnum(TokenId.number)] = Number; - - break :blk result; -}; - -pub const Identifier = parser.Node; - -pub const TokenId = enum { - identifier, - operator, - number, -}; - -pub const Operator = enum(u8) { - left_parenthesis = '(', - right_parenthesis = ')', - left_brace = '{', - right_brace = '}', - equal = '=', - colon = ':', - semicolon = ';', -}; - -pub const Number = struct { - content: union(enum) { - float: f64, - integer: Integer, - }, - - const Integer = struct { - value: u64, - is_negative: bool, - }; -}; - -pub const Result = struct { - arrays: struct { - identifier: ArrayList(Identifier), - operator: ArrayList(Operator), - number: ArrayList(Number), - id: ArrayList(TokenId), - }, - file: []const u8, - time: u64 = 0, - - pub fn free(result: *Result, allocator: Allocator) void { - inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| { - @field(result.arrays, field.name).clearAndFree(allocator); - } - } - - fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void { - // const index = result.arrays.id.items.len; - @field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value); - result.arrays.id.appendAssumeCapacity(token_id); - // log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value }); - } -}; - -pub fn lex(allocator: Allocator, text: []const u8) !Result { - const time_start = std.time.Instant.now() catch unreachable; - - var index: usize = 0; - - var result = Result{ - .arrays = .{ - .identifier = try ArrayList(Identifier).initCapacity(allocator, text.len), - .operator = try ArrayList(Operator).initCapacity(allocator, text.len), - .number = try ArrayList(Number).initCapacity(allocator, text.len), - .id = try ArrayList(TokenId).initCapacity(allocator, text.len), - }, - .file = text, - }; - - defer { - const time_end = std.time.Instant.now() catch unreachable; - result.time = time_end.since(time_start); - } - - while (index < text.len) { - const first_char = text[index]; - switch (first_char) { - 'a'...'z', 'A'...'Z', '_' => { - const start = index; - while (true) { - const ch = text[index]; - if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) { - index += 1; - continue; - } - break; - } - - result.appendToken(.identifier, .{ - .left = @intCast(start), - .right = @intCast(index), - .type = .identifier, - }); - }, - '(', ')', '{', '}', '-', '=', ';' => |operator| { - result.appendToken(.operator, @enumFromInt(operator)); - index += 1; - }, - '0'...'9' => { - const start = index; - - while (text[index] >= '0' and text[index] <= '9') { - index += 1; - } - const end = index; - const number_slice = text[start..end]; - const number = try std.fmt.parseInt(u64, number_slice, 10); - result.appendToken(.number, .{ - .content = .{ - .integer = .{ - .value = number, - .is_negative = false, - }, - }, - }); - }, - ' ', '\n', '\r', '\t' => index += 1, - else => |foo| { - index += 1; - std.debug.panic("NI: {c} 0x{x}", .{ foo, foo }); - }, - } - } - - return result; -} - -test "lexer" { - const allocator = std.testing.allocator; - const file_path = fs.first; - const file = try fs.readFile(allocator, file_path); - defer allocator.free(file); - var result = try lex(allocator, file); - defer result.free(allocator); -} diff --git a/src/main.zig b/src/main.zig index 37ce80e..93052c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,17 +2,28 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; -const compiler = @import("compiler.zig"); -const fs = @import("fs.zig"); +const Compilation = @import("Compilation.zig"); pub const seed = std.math.maxInt(u64); +const default_src_file = "src/test/main.b"; pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - try compiler.cycle(allocator, fs.first); + try singleCompilation(default_src_file); +} + +fn singleCompilation(main_file_path: []const u8) !void { + const allocator = std.heap.page_allocator; + const compilation = try Compilation.init(allocator); + + try compilation.compileModule(.{ + .main_package_path = main_file_path, + }); } test { - _ = compiler; + _ = Compilation; +} + +test "basic" { + try singleCompilation(default_src_file); } diff --git a/src/parser.zig b/src/parser.zig deleted file mode 100644 index a64c0ed..0000000 --- a/src/parser.zig +++ /dev/null @@ -1,434 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const log = std.log; - -const data_structures = @import("data_structures.zig"); -const ArrayList = data_structures.ArrayList; -const HashMap = data_structures.HashMap; - -const lexer = @import("lexer.zig"); - -pub const Result = struct { - function_map: ArrayList(lexer.Identifier), - nodes: ArrayList(Node), - - pub fn free(result: *Result, allocator: Allocator) void { - result.functions.clearAndFree(allocator); - } -}; - -pub const Node = packed struct(u64) { - type: Type, - left: Node.Index, - right: Node.Index, - - pub const Index = u27; - - pub const Type = enum(u10) { - root = 0, - identifier = 1, - number = 2, - @"return" = 3, - block_one = 4, - function_declaration_no_arguments = 5, - container_declaration = 6, - }; -}; - -const Error = error{ - unexpected_token, - not_implemented, - OutOfMemory, -}; - -pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result { - var parser = Parser{ - .allocator = allocator, - .nodes = ArrayList(Node){}, - .function_map = ArrayList(lexer.Identifier){}, - .lexer = .{ - .result = lexer_result, - }, - }; - errdefer parser.free(); - - const node_index = try parser.appendNode(Node{ - .type = .root, - .left = 0, - .right = 0, - }); - _ = node_index; - - const members = try parser.parseContainerMembers(); - _ = members; - - return Result{ - .function_map = parser.function_map, - .nodes = parser.nodes, - }; -} - -const ExpressionMutabilityQualifier = enum { - @"const", - @"var", -}; - -const Keyword = enum { - @"return", - @"fn", -}; - -const PeekResult = union(lexer.TokenId) { - identifier: lexer.Identifier, - operator: lexer.Operator, - number: lexer.Number, -}; - -const Lexer = struct { - result: *const lexer.Result, - indices: struct { - identifier: u32 = 0, - operator: u32 = 0, - number: u32 = 0, - id: u32 = 0, - } = .{}, - - fn hasTokens(l: *const Lexer) bool { - return l.indices.id < l.result.arrays.id.items.len; - } - - fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 { - assert(l.isCurrentToken(token_id)); - return @field(l.indices, @tagName(token_id)); - } - - fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void { - assert(l.isCurrentToken(token_id)); - l.indices.id += 1; - const index_ptr = &@field(l.indices, @tagName(token_id)); - const index = index_ptr.*; - const token_value = @field(l.result.arrays, @tagName(token_id)).items[index]; - log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value }); - - index_ptr.* += 1; - } - - fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool { - return l.result.arrays.id.items[l.indices.id] == token_id; - } - - fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 { - comptime { - assert(lexer.Identifier == Node); - } - - assert(identifier.type == .identifier); - - return l.result.file[identifier.left..][0 .. identifier.right - identifier.left]; - } - - fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => |token| blk: { - l.consume(expected_token_id); - break :blk token; - }, - else => error.not_implemented, - }; - } - - fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 { - const peek_result = l.peek() orelse return error.not_implemented; - return switch (peek_result) { - expected_token_id => blk: { - const index = l.currentTokenIndex(expected_token_id); - l.consume(expected_token_id); - break :blk index; - }, - else => error.not_implemented, - }; - } - - fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void { - const peek_result = l.peek() orelse return error.not_implemented; - switch (peek_result) { - expected_token_id => |token| { - if (expected_token != token) { - return error.not_implemented; - } - - l.consume(expected_token_id); - }, - else => |token| { - std.debug.panic("{s}", .{@tagName(token)}); - }, - } - } - - fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool { - return switch (l.peek() orelse unreachable) { - .operator => |operator| { - const result = operator == expected_operator; - if (result) { - l.consume(.operator); - } - return result; - }, - else => false, - }; - } - - fn peek(l: *const Lexer) ?PeekResult { - if (l.indices.id >= l.result.arrays.id.items.len) { - return null; - } - - return switch (l.result.arrays.id.items[l.indices.id]) { - inline else => |token| blk: { - const tag = @tagName(token); - const index = @field(l.indices, tag); - const array = &@field(l.result.arrays, tag); - - break :blk @unionInit(PeekResult, tag, array.items[index]); - }, - }; - } -}; - -const Parser = struct { - lexer: Lexer, - nodes: ArrayList(Node), - function_map: ArrayList(lexer.Identifier), - allocator: Allocator, - - fn appendNode(parser: *Parser, node: Node) !Node.Index { - const index = parser.nodes.items.len; - try parser.nodes.append(parser.allocator, node); - return @intCast(index); - } - - fn getNode(parser: *Parser, node_index: Node.Index) *Node { - return &parser.nodes.items[node_index]; - } - - fn free(parser: *Parser) void { - _ = parser; - } - - fn parseTypeExpression(parser: *Parser) !Node.Index { - // TODO: make this decent - return switch (parser.lexer.peek() orelse unreachable) { - .identifier => parser.nodeFromToken(.identifier), - else => unreachable, - }; - } - - fn parseFunctionDeclaration(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_parenthesis); - while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) { - return error.not_implemented; - } - - const t = try parser.parseTypeExpression(); - const function_declaration = try parser.appendNode(.{ - .type = .function_declaration_no_arguments, - .left = t, - .right = try parser.parseBlock(), - }); - return function_declaration; - } - - fn parseBlock(parser: *Parser) !Node.Index { - try parser.lexer.expectSpecificToken(.operator, .left_brace); - - var statements = ArrayList(Node.Index){}; - - while (!parser.lexer.maybeExpectOperator(.right_brace)) { - const statement = try parser.parseStatement(); - try statements.append(parser.allocator, statement); - } - - const node: Node = switch (statements.items.len) { - 0 => unreachable, - 1 => .{ - .type = .block_one, - .left = statements.items[0], - .right = 0, - }, - else => unreachable, - }; - log.debug("Parsed block!", .{}); - return parser.appendNode(node); - } - - fn parseStatement(parser: *Parser) !Node.Index { - // TODO: more stuff before - const expression = try parser.parseAssignExpression(); - try parser.lexer.expectSpecificToken(.operator, .semicolon); - - return expression; - } - - fn parseAssignExpression(parser: *Parser) !Node.Index { - const expression = try parser.parseExpression(); - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => return expression, - else => unreachable, - }, - else => unreachable, - } - - return error.not_implemented; - } - - fn parseExpression(parser: *Parser) Error!Node.Index { - return parser.parseExpressionPrecedence(0); - } - - fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index { - var expr_index = try parser.parsePrefixExpression(); - log.debug("Expr index: {}", .{expr_index}); - - var banned_precedence: i32 = -1; - while (parser.lexer.hasTokens()) { - const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .semicolon => -1, - else => @panic(@tagName(operator)), - }, - else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }), - }; - - if (precedence < minimum_precedence) { - break; - } - - if (precedence == banned_precedence) { - unreachable; - } - - const node_index = try parser.parseExpressionPrecedence(1); - _ = node_index; - - unreachable; - } - - log.err("Parsed expression precedence", .{}); - - return expr_index; - } - - fn parsePrefixExpression(parser: *Parser) !Node.Index { - switch (parser.lexer.peek() orelse unreachable) { - // .bang => .bool_not, - // .minus => .negation, - // .tilde => .bit_not, - // .minus_percent => .negation_wrap, - // .ampersand => .address_of, - // .keyword_try => .@"try", - // .keyword_await => .@"await", - - else => |pref| { - log.err("Pref: {s}", .{@tagName(pref)}); - return parser.parsePrimaryExpression(); - }, - } - - return error.not_implemented; - } - - fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index { - const node = try parser.appendNode(.{ - .type = @field(Node.Type, @tagName(token_id)), - .left = @intCast(parser.lexer.currentTokenIndex(token_id)), - .right = 0, - }); - parser.lexer.consume(token_id); - - return node; - } - - fn parsePrimaryExpression(parser: *Parser) !Node.Index { - const result = switch (parser.lexer.peek() orelse unreachable) { - .number => try parser.nodeFromToken(.number), - .identifier => |identifier| { - const identifier_name = parser.lexer.getIdentifier(identifier); - inline for (@typeInfo(Keyword).Enum.fields) |keyword| { - if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) { - .@"return" => blk: { - parser.lexer.consume(.identifier); - const node_ref = try parser.appendNode(.{ - .type = .@"return", - .left = try parser.parseExpression(), - .right = 0, - }); - break :blk node_ref; - }, - .@"fn" => blk: { - parser.lexer.consume(.identifier); - // TODO: figure out name association - break :blk try parser.parseFunctionDeclaration(); - }, - }; - } - - unreachable; - }, - else => |foo| { - std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo }); - }, - }; - - return result; - } - - fn parseContainerMembers(parser: *Parser) !void { - var container_nodes = ArrayList(Node.Index){}; - while (parser.lexer.hasTokens()) { - const container_node = switch (parser.lexer.peek() orelse unreachable) { - .identifier => |first_identifier_ref| blk: { - parser.lexer.consume(.identifier); - - const first_identifier = parser.lexer.getIdentifier(first_identifier_ref); - - if (std.mem.eql(u8, first_identifier, "comptime")) { - unreachable; - } else { - const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier); - _ = mutability_qualifier; - - const identifier = try parser.appendNode(.{ - .type = .identifier, - .left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)), - .right = 0, - }); - - switch (parser.lexer.peek() orelse unreachable) { - .operator => |operator| switch (operator) { - .colon => unreachable, - .equal => { - parser.lexer.consume(.operator); - - const expression = try parser.parseExpression(); - break :blk try parser.appendNode(.{ - .type = .container_declaration, - .left = expression, - .right = identifier, - }); - }, - else => unreachable, - }, - else => |foo| std.debug.panic("WTF: {}", .{foo}), - } - } - }, - else => |a| std.debug.panic("{}", .{a}), - }; - - try container_nodes.append(parser.allocator, container_node); - } - } -}; diff --git a/src/test/main.b b/src/test/main.nat similarity index 92% rename from src/test/main.b rename to src/test/main.nat index 8847d3f..157bd8a 100644 --- a/src/test/main.b +++ b/src/test/main.nat @@ -1,3 +1,3 @@ const main = fn() i32 { return 0; -} +};