Barebones semantic analysis

This commit is contained in:
David Gonzalez Martin 2023-09-09 20:57:12 -06:00
parent cd32603137
commit 15a7df3f14
12 changed files with 1364 additions and 414 deletions

View File

@ -1,32 +1,16 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "compiler",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
b.installDirectory(.{
.source_dir = std.Build.LazyPath.relative("lib"),
@ -34,31 +18,21 @@ pub fn build(b: *std.Build) void {
.install_subdir = "lib",
});
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const debug_command = addDebugCommand(b, exe);
const debug_step = b.step("debug", "Debug the app");
debug_step.dependOn(&debug_command.step);
const unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
@ -66,36 +40,39 @@ pub fn build(b: *std.Build) void {
});
const run_unit_tests = b.addRunArtifact(unit_tests);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
const debug_unit_tests_cmd = switch (@import("builtin").os.tag) {
const debug_unit_tests_cmd = addDebugCommand(b, unit_tests);
const debug_test_step = b.step("debug_test", "Run the tests through the debugger");
debug_test_step.dependOn(&debug_unit_tests_cmd.step);
}
fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run {
return switch (@import("builtin").os.tag) {
.linux => blk: {
const result = b.addSystemCommand(&.{"gf2"});
result.addArtifactArg(unit_tests);
result.addArgs(&.{ "-ex", "r" });
result.addArtifactArg(artifact);
if (artifact.kind == .@"test") {
result.addArgs(&.{ "-ex", "r" });
}
break :blk result;
},
.windows => blk: {
const result = b.addSystemCommand(&.{"remedybg"});
result.addArg("-g");
result.addArtifactArg(unit_tests);
result.addArtifactArg(artifact);
break :blk result;
},
.macos => blk: {
// Broken, but it compiles
// not tested
const result = b.addSystemCommand(&.{"gdb"});
result.addArtifactArg(unit_tests);
result.addArtifactArg(artifact);
break :blk result;
},
else => @compileError("Operating system not supported"),
};
const debug_test_step = b.step("debug_test", "Run the tests through the debugger");
debug_test_step.dependOn(&debug_unit_tests_cmd.step);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
}

View File

@ -1 +1,7 @@
const builtin = #import("builtin");
comptime {
_ = _start;
}
const _start = () noreturn {
while (true) {}
};

View File

@ -1,4 +1,5 @@
const start = #import("start.nat");
comptime {
_ = start;
}
const start = #import("start.nat");

View File

@ -2,18 +2,32 @@ const Compilation = @This();
const std = @import("std");
const assert = std.debug.assert;
const equal = std.mem.eql;
const print = std.debug.print;
const Allocator = std.mem.Allocator;
const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList;
const AutoHashMap = data_structures.AutoHashMap;
const BlockList = data_structures.BlockList;
const HashMap = data_structures.HashMap;
const SegmentedList = data_structures.SegmentedList;
const StringHashMap = data_structures.StringHashMap;
const StringArrayHashMap = data_structures.StringArrayHashMap;
const lexical_analyzer = @import("frontend/lexical_analyzer.zig");
const syntactic_analyzer = @import("frontend/syntactic_analyzer.zig");
const Node = syntactic_analyzer.Node;
const semantic_analyzer = @import("frontend/semantic_analyzer.zig");
const intermediate_representation = @import("backend/intermediate_representation.zig");
test {
_ = lexical_analyzer;
_ = syntactic_analyzer;
_ = semantic_analyzer;
_ = data_structures;
}
base_allocator: Allocator,
cwd_absolute_path: []const u8,
@ -43,71 +57,210 @@ pub fn init(allocator: Allocator) !*Compilation {
return compilation;
}
pub fn deinit(compilation: *Compilation) void {
const allocator = compilation.base_allocator;
allocator.free(compilation.cwd_absolute_path);
allocator.free(compilation.executable_absolute_path);
allocator.destroy(compilation);
}
pub const Struct = struct {
scope: Scope.Index,
initialization: Value.Index,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Type = union(enum) {
void,
noreturn,
bool,
integer: Integer,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Integer = struct {
bit_count: u16,
signedness: Signedness,
pub const Signedness = enum(u1) {
unsigned = 0,
signed = 1,
};
};
/// A scope contains a bunch of declarations
pub const Scope = struct {
parent: Scope.Index,
type: Type.Index,
declarations: AutoHashMap(u32, Declaration.Index) = .{},
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Declaration = union(enum) {
unresolved: Node.Index,
struct_type: Struct,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Function = struct {
body: Block.Index,
prototype: Prototype.Index,
pub const Prototype = struct {
arguments: ?[]const Field.Index,
return_type: Type.Index,
pub const List = BlockList(@This());
pub const Index = Prototype.List.Index;
};
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Block = struct {
foo: u32 = 0,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Field = struct {
foo: u32 = 0,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Loop = struct {
foo: u32 = 0,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Value = struct {
type: union(enum) {
declaration: Declaration.Index,
bool_true,
bool_false,
loop: Loop.Index,
function: Function.Index,
},
is_const: bool,
is_comptime: bool,
pub const List = BlockList(@This());
pub const Index = List.Index;
};
pub const Module = struct {
main_package: *Package,
import_table: StringArrayHashMap(*File) = .{},
string_table: AutoHashMap(u32, []const u8) = .{},
declarations: BlockList(Declaration) = .{},
structs: BlockList(Struct) = .{},
scopes: BlockList(Scope) = .{},
files: BlockList(File) = .{},
values: BlockList(Value) = .{},
functions: BlockList(Function) = .{},
fields: BlockList(Field) = .{},
function_prototypes: BlockList(Function.Prototype) = .{},
types: BlockList(Type) = .{},
blocks: BlockList(Block) = .{},
loops: BlockList(Loop) = .{},
pub const Descriptor = struct {
main_package_path: []const u8,
};
fn deinit(module: *Module, allocator: Allocator) void {
defer allocator.destroy(module);
const ImportFileResult = struct {
file: *File,
is_new: bool,
};
for (module.import_table.values()) |file| {
file.deinit(allocator);
const ImportPackageResult = struct {
file: *File,
is_new: bool,
is_package: bool,
};
pub fn importFile(module: *Module, allocator: Allocator, current_file: *File, import_name: []const u8) !ImportPackageResult {
if (equal(u8, import_name, "std")) {
return module.importPackage(allocator, module.main_package.dependencies.get("std").?);
}
var iterator = module.main_package.dependencies.valueIterator();
while (iterator.next()) |it| {
const package = it.*;
package.deinit(allocator);
if (equal(u8, import_name, "builtin")) {
return module.importPackage(allocator, module.main_package.dependencies.get("builtin").?);
}
module.main_package.deinit(allocator);
if (equal(u8, import_name, "main")) {
return module.importPackage(allocator, module.main_package);
}
module.import_table.clearAndFree(allocator);
}
if (current_file.package.dependencies.get(import_name)) |package| {
return module.importPackage(allocator, package);
}
fn importPackage(module: *Module, compilation: *Compilation, package: *Package) !ImportPackageResult {
const lookup_result = try module.import_table.getOrPut(compilation.base_allocator, package.directory.path);
errdefer _ = module.import_table.pop();
if (lookup_result.found_existing) {
const file: *File = lookup_result.value_ptr.*;
try file.addPackageReference(compilation.base_allocator, package);
if (!std.mem.endsWith(u8, import_name, ".nat")) {
unreachable;
}
const file = try compilation.base_allocator.create(File);
lookup_result.value_ptr.* = file;
file.* = File{
.relative_path = package.source_path,
.package = package,
const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name });
const file_relative_path = std.fs.path.basename(full_path);
const package = current_file.package;
const import = try module.getFile(allocator, full_path, file_relative_path, package);
try import.file.addFileReference(allocator, current_file);
const result = ImportPackageResult{
.file = import.file,
.is_new = import.is_new,
.is_package = false,
};
return result;
}
fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult {
const path_lookup = try module.import_table.getOrPut(allocator, full_path);
const file: *File = switch (path_lookup.found_existing) {
true => path_lookup.value_ptr.*,
false => blk: {
const new_file_index = try module.files.append(allocator, File{
.relative_path = relative_path,
.package = package,
});
const file = module.files.get(new_file_index);
path_lookup.value_ptr.* = file;
break :blk file;
},
};
try file.addPackageReference(compilation.base_allocator, package);
return .{
.file = file,
.is_new = true,
.is_new = !path_lookup.found_existing,
};
}
fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void {
pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult {
const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path });
const import = try module.getFile(allocator, full_path, package.source_path, package);
try import.file.addPackageReference(allocator, package);
return .{
.file = import.file,
.is_new = import.is_new,
.is_package = true,
};
}
pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void {
_ = module;
const source_file = try file.package.directory.handle.openFile(file.relative_path, .{});
defer source_file.close();
const file_size = try source_file.getEndPos();
var file_buffer = try allocator.alloc(u8, file_size);
const read_byte_count = try source_file.readAll(file_buffer);
assert(read_byte_count == file_size);
source_file.close();
//TODO: adjust file maximum size
file.source_code = file_buffer[0..read_byte_count];
@ -140,7 +293,6 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
builtin_file.close();
const module: *Module = try compilation.base_allocator.create(Module);
defer module.deinit(compilation.base_allocator);
module.* = Module{
.main_package = blk: {
const result = try compilation.base_allocator.create(Package);
@ -157,16 +309,14 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
};
const std_package_dir = "lib/std";
const package_descriptors = [2]struct {
name: []const u8,
directory_path: []const u8,
}{
.{
.name = "std",
.directory_path = try switch (@import("builtin").is_test) {
true => compilation.pathFromCwd(std_package_dir),
false => compilation.pathFromCompiler(std_package_dir),
},
.directory_path = try compilation.pathFromCwd(std_package_dir),
},
.{
.name = "builtin",
@ -178,7 +328,8 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
},
};
for (package_descriptors) |package_descriptor| {
var packages: [package_descriptors.len]*Package = undefined;
for (package_descriptors, &packages) |package_descriptor, *package_ptr| {
const package = try compilation.base_allocator.create(Package);
package.* = .{
.directory = .{
@ -189,21 +340,22 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
};
try module.main_package.addDependency(compilation.base_allocator, package_descriptor.name, package);
package_ptr.* = package;
}
assert(module.main_package.dependencies.size == 2);
_ = try module.importPackage(compilation, module.main_package.dependencies.get("std").?);
_ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?);
for (module.import_table.values()) |import| {
try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import);
}
}
const ImportPackageResult = struct {
file: *File,
is_new: bool,
};
const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0]);
try intermediate_representation.initialize(compilation, module, packages[0], main_declaration);
}
fn generateAST() !void {}
@ -222,17 +374,6 @@ pub const Package = struct {
try package.dependencies.ensureUnusedCapacity(allocator, 1);
package.dependencies.putAssumeCapacityNoClobber(package_name, new_dependency);
}
fn deinit(package: *Package, allocator: Allocator) void {
if (package.dependencies.size > 0) {
assert(package.dependencies.size == 2);
}
package.dependencies.clearAndFree(allocator);
allocator.free(package.source_path);
allocator.free(package.directory.path);
package.directory.handle.close();
allocator.destroy(package);
}
};
pub const File = struct {
@ -241,6 +382,7 @@ pub const File = struct {
lexical_analyzer_result: lexical_analyzer.Result = undefined,
syntactic_analyzer_result: syntactic_analyzer.Result = undefined,
package_references: ArrayList(*Package) = .{},
file_references: ArrayList(*File) = .{},
relative_path: []const u8,
package: *Package,
@ -259,6 +401,10 @@ pub const File = struct {
try file.package_references.insert(allocator, 0, package);
}
fn addFileReference(file: *File, allocator: Allocator, affected: *File) !void {
try file.file_references.append(allocator, affected);
}
pub fn fromRelativePath(allocator: Allocator, file_relative_path: []const u8) *File {
const file_content = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize));
_ = file_content;
@ -271,30 +417,18 @@ pub const File = struct {
fn lex(file: *File, allocator: Allocator) !void {
assert(file.status == .loaded_into_memory);
file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code);
if (!@import("builtin").is_test) {
print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time});
}
// if (!@import("builtin").is_test) {
// print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time});
// }
file.status = .lexed;
}
fn parse(file: *File, allocator: Allocator) !void {
assert(file.status == .lexed);
file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code);
if (!@import("builtin").is_test) {
print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time});
}
// if (!@import("builtin").is_test) {
// print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time});
// }
file.status = .parsed;
}
fn deinit(file: *File, allocator: Allocator) void {
defer allocator.destroy(file);
if (file.status == .parsed) {
file.syntactic_analyzer_result.free(allocator);
file.lexical_analyzer_result.free(allocator);
file.package_references.clearAndFree(allocator);
allocator.free(file.source_code);
} else {
unreachable;
}
}
};

View File

@ -31,17 +31,6 @@ const Result = struct {
};
}
fn destroy(image: *Result) void {
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
const section_bytes = @field(image.sections, field_name).content;
switch (@import("builtin").os.tag) {
.linux => std.os.munmap(section_bytes),
.windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE),
else => @compileError("OS not supported"),
}
}
}
fn mmap(size: usize, flags: packed struct {
executable: bool,
}) ![]align(page_size) u8 {
@ -79,16 +68,6 @@ const Result = struct {
assert(image.sections.text.content.len > 0);
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
}
pub fn free(result: *Result, allocator: Allocator) void {
_ = allocator;
inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| {
switch (@import("builtin").os.tag) {
.windows => unreachable,
else => std.os.munmap(@field(result.sections, field_name).content),
}
}
}
};
const Rex = enum(u8) {
@ -160,9 +139,7 @@ fn movAImm(image: *Result, integer: anytype) void {
}
test "ret void" {
const allocator = std.testing.allocator;
var image = try Result.create();
defer image.free(allocator);
image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
@ -185,7 +162,6 @@ fn getMaxInteger(comptime T: type) T {
test "ret integer" {
inline for (integer_types_to_test) |Int| {
var image = try Result.create();
defer image.free(std.testing.allocator);
const expected_number = getMaxInteger(Int);
movAImm(&image, expected_number);
@ -234,9 +210,7 @@ fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRe
test "ret integer argument" {
inline for (integer_types_to_test) |Int| {
const allocator = std.testing.allocator;
var image = try Result.create();
defer image.free(allocator);
const number = getMaxInteger(Int);
movRmR(&image, Int, .a, .di);
@ -264,9 +238,7 @@ fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRe
test "ret sub arguments" {
inline for (integer_types_to_test) |Int| {
const allocator = std.testing.allocator;
var image = try Result.create();
defer image.free(allocator);
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
@ -348,10 +320,8 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
opcode: OpcodeRmR,
pub fn runTest(test_case: @This()) !void {
const allocator = std.testing.allocator;
for (0..10) |_| {
var image = try Result.create();
defer image.free(allocator);
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
movRmR(&image, T, .a, .di);
@ -371,9 +341,7 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
}
test "call after" {
const allocator = std.testing.allocator;
var image = try Result.create();
defer image.free(allocator);
const jump_patch_offset = image.sections.text.index + 1;
image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
const jump_source = image.sections.text.index;
@ -387,9 +355,7 @@ test "call after" {
}
test "call before" {
const allocator = std.testing.allocator;
var image = try Result.create();
defer image.free(allocator);
const first_jump_patch_offset = image.sections.text.index + 1;
const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
image.appendCode(&first_call);

View File

@ -0,0 +1,9 @@
const Compilation = @import("../Compilation.zig");
const Module = Compilation.Module;
const Package = Compilation.Package;
pub fn initialize(compilation: *Compilation, module: *Module, package: *Package, main_declaration: Compilation.Declaration.Index) !void {
_ = main_declaration;
_ = package;
_ = module;
_ = compilation;
}

View File

@ -1,143 +0,0 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const equal = std.mem.eql;
const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList;
const parser = @import("parser.zig");
const void_type = Type{
.id = .void,
};
const Type = struct {
id: Id,
fn isPrimitive(T: Type) bool {
return switch (T.id) {
.void => true,
};
}
const Id = enum {
void,
};
};
const Error = error{
type_mismatch,
internal,
arguments_not_used,
};
const TopLevelDeclaration = struct {
type: Id,
index: u31,
const Id = enum {
function,
expression,
};
};
const Instruction = struct {
id: Id,
index: u16,
const Id = enum {
ret_void,
};
};
const ret_void = Instruction{
.id = .ret_void,
.index = 0,
};
const ret = struct {
is_type: bool,
};
const Function = struct {
instructions: ArrayList(Instruction),
return_type: Type,
};
pub const Result = struct {
top_level_declarations: ArrayList(TopLevelDeclaration),
functions: ArrayList(Function),
instructions: struct {} = .{},
pub fn free(result: *Result, allocator: Allocator) void {
for (result.functions.items) |*function| {
function.instructions.clearAndFree(allocator);
}
result.functions.clearAndFree(allocator);
result.top_level_declarations.clearAndFree(allocator);
}
};
const Analyzer = struct {
parser: *const parser.Result,
top_level_declarations: ArrayList(TopLevelDeclaration),
functions: ArrayList(Function),
allocator: Allocator,
fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result {
var analyzer = Analyzer{
.parser = parser_result,
.top_level_declarations = ArrayList(TopLevelDeclaration){},
.allocator = allocator,
.functions = ArrayList(Function){},
};
for (parser_result.functions.items) |ast_function| {
if (ast_function.statements.items.len != 0) {
for (ast_function.statements.items) |statement| {
_ = statement;
@panic("TODO: statement");
}
} else {
if (ast_function.arguments.items.len != 0) {
return Error.arguments_not_used;
}
try analyzer.expectPrimitiveType(void_type, ast_function.return_type);
const function_index = analyzer.functions.items.len;
var function = Function{
.instructions = ArrayList(Instruction){},
.return_type = void_type,
};
function.instructions.append(allocator, ret_void) catch return Error.internal;
analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{
.type = .function,
.index = @intCast(function_index),
}) catch return Error.internal;
analyzer.functions.append(allocator, function) catch return Error.internal;
}
}
return .{
.top_level_declarations = analyzer.top_level_declarations,
.functions = analyzer.functions,
};
}
fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void {
assert(type_value.isPrimitive());
const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal;
if (!equal(u8, @tagName(type_value.id), type_identifier)) {
return Error.type_mismatch;
}
}
};
pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result {
return Analyzer.analyze(allocator, parser_result);
}

View File

@ -1,7 +1,110 @@
const std = @import("std");
const assert = std.debug.assert;
pub const Allocator = std.mem.Allocator;
pub const ArrayList = std.ArrayListUnmanaged;
pub const HashMap = std.AutoHashMapUnmanaged;
pub const AutoHashMap = std.AutoHashMapUnmanaged;
pub const HashMap = std.HashMapUnmanaged;
pub const SegmentedList = std.SegmentedList;
pub const StringHashMap = std.StringHashMapUnmanaged;
pub const StringArrayHashMap = std.StringArrayHashMapUnmanaged;
pub fn BlockList(comptime T: type) type {
const item_count = 64;
const Block = struct {
items: [item_count]T = undefined,
bitset: Bitset = Bitset.initEmpty(),
const Bitset = std.StaticBitSet(item_count);
fn allocateIndex(block: *@This()) !u6 {
if (block.bitset.mask != std.math.maxInt(@TypeOf(block.bitset.mask))) {
const index = @ctz(~block.bitset.mask);
block.bitset.set(index);
return @intCast(index);
} else {
return error.OutOfMemory;
}
}
};
return struct {
blocks: ArrayList(Block) = .{},
len: usize = 0,
first_block: u32 = 0,
const List = @This();
pub const Index = packed struct(u32) {
valid: bool = true,
index: u6,
block: u25,
pub const invalid = Index{
.valid = false,
.index = 0,
.block = 0,
};
};
pub fn get(list: *List, index: Index) *T {
assert(index.valid);
return &list.blocks.items[index.block].items[index.index];
}
pub fn append(list: *List, allocator: Allocator, element: T) !Index {
try list.ensureCapacity(allocator, list.len + 1);
const max_allocation = list.blocks.items.len * item_count;
if (list.len < max_allocation) {
// Follow the guess
if (list.blocks.items[list.first_block].allocateIndex()) |index| {
list.blocks.items[list.first_block].items[index] = element;
return .{
.index = index,
.block = @intCast(list.first_block),
};
} else |_| {
@panic("TODO");
}
} else {
const block_index = list.blocks.items.len;
const new_block = list.blocks.addOneAssumeCapacity();
const index = new_block.allocateIndex() catch unreachable;
new_block.items[index] = element;
return .{
.index = index,
.block = @intCast(block_index),
};
}
}
pub fn ensureCapacity(list: *List, allocator: Allocator, new_capacity: usize) !void {
const max_allocation = list.blocks.items.len * item_count;
if (max_allocation < new_capacity) {
const block_count = new_capacity / item_count + @intFromBool(new_capacity % item_count != 0);
try list.blocks.ensureTotalCapacity(allocator, block_count);
}
}
test "Bitset index allocation" {
const expect = std.testing.expect;
var block = Block{};
for (0..item_count) |expected_index| {
const new_index = try block.allocateIndex();
try expect(new_index == expected_index);
}
_ = block.allocateIndex() catch return;
return error.TestUnexpectedResult;
}
};
}
pub fn enumFromString(comptime E: type, string: []const u8) ?E {
return inline for (@typeInfo(E).Enum.fields) |enum_field| {
if (std.mem.eql(u8, string, enum_field.name)) {
break @field(E, enum_field.name);
}
} else null;
}

View File

@ -7,6 +7,7 @@ const equal = std.mem.eql;
const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList;
const enumFromString = data_structures.enumFromString;
const Compilation = @import("../Compilation.zig");
const fs = @import("../fs.zig");
@ -17,29 +18,71 @@ pub const Token = packed struct(u64) {
id: Id,
pub const Id = enum(u8) {
identifier = 0,
number = 1,
string_literal = 2,
left_parenthesis = '(',
right_parenthesis = ')',
left_brace = '{',
right_brace = '}',
equal = '=',
colon = ':',
semicolon = ';',
hash = '#',
comma = ',',
bang = '!',
eof = 0x00,
identifier = 0x01,
number = 0x02,
string_literal = 0x03,
fixed_keyword_function = 0x04,
fixed_keyword_const = 0x05,
fixed_keyword_var = 0x06,
fixed_keyword_void = 0x07,
fixed_keyword_noreturn = 0x08,
fixed_keyword_comptime = 0x09,
fixed_keyword_while = 0x0a,
fixed_keyword_bool = 0x0b,
fixed_keyword_true = 0x0c,
fixed_keyword_false = 0x0d,
bang = '!', // 0x21
hash = '#', // 0x23
dollar_sign = '$', // 0x24
modulus = '%', // 0x25
ampersand = '&', // 0x26
left_parenthesis = '(', // 0x28
right_parenthesis = ')', // 0x29
asterisk = '*', // 0x2a
plus = '+', // 0x2b
comma = ',', // 0x2c
minus = '-', // 0x2d
period = '.', // 0x2e
slash = '/', // 0x2f
colon = ':', // 0x3a
semicolon = ';', // 0x3b
less = '<', // 0x3c
equal = '=', // 0x3d
greater = '>', // 0x3e
question_mark = '?', // 0x3f
at = '@', // 0x40
left_bracket = '[', // 0x5b
backlash = '\\', // 0x5c
right_bracket = ']', // 0x5d
caret = '^', // 0x5e
underscore = '_', // 0x5f
grave = '`', // 0x60
left_brace = '{', // 0x7b
vertical_bar = '|', // 0x7c
right_brace = '}', // 0x7d
tilde = '~', // 0x7e
};
pub const Index = u32;
};
pub const FixedKeyword = enum {
@"comptime",
@"const",
@"var",
void,
noreturn,
function,
@"while",
bool,
true,
false,
};
pub const Result = struct {
tokens: ArrayList(Token),
time: u64,
pub fn free(result: *Result, allocator: Allocator) void {
result.tokens.clearAndFree(allocator);
}
};
pub fn analyze(allocator: Allocator, text: []const u8) !Result {
@ -62,7 +105,23 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result {
break;
}
break :blk .identifier;
const identifier = text[start_index..][0 .. index - start_index];
std.debug.print("Identifier: {s}\n", .{identifier});
if (start_character == 'u' or start_character == 's') {
var index_integer = start_index + 1;
while (text[index_integer] >= '0' and text[index_integer] <= '9') {
index_integer += 1;
}
if (index_integer == index) {
unreachable;
}
}
break :blk if (enumFromString(FixedKeyword, text[start_index..][0 .. index - start_index])) |fixed_keyword| switch (fixed_keyword) {
inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)),
} else .identifier;
},
'(', ')', '{', '}', '-', '=', ';', '#' => |operator| blk: {
index += 1;
@ -75,9 +134,17 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result {
break :blk .number;
},
'\'' => {
unreachable;
},
'"' => blk: {
index += 1;
while (text[index] != '"') {
while (true) {
if (text[index] == '"' and text[index - 1] != '"') {
break;
}
index += 1;
}
@ -103,7 +170,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result {
});
}
const should_log = false;
const should_log = true;
if (should_log) {
for (tokens.items, 0..) |token, i| {
std.debug.print("#{} {s}\n", .{ i, @tagName(token.id) });

View File

@ -0,0 +1,668 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const equal = std.mem.eql;
const Compilation = @import("../Compilation.zig");
const File = Compilation.File;
const Module = Compilation.Module;
const Package = Compilation.Package;
const Block = Compilation.Block;
const Declaration = Compilation.Declaration;
const Field = Compilation.Field;
const Function = Compilation.Function;
const Scope = Compilation.Scope;
const Struct = Compilation.Struct;
const Type = Compilation.Type;
const Value = Compilation.Value;
const lexical_analyzer = @import("lexical_analyzer.zig");
const Token = lexical_analyzer.Token;
const syntactic_analyzer = @import("syntactic_analyzer.zig");
const ContainerDeclaration = syntactic_analyzer.ContainerDeclaration;
const Node = syntactic_analyzer.Node;
const SymbolDeclaration = syntactic_analyzer.SymbolDeclaration;
const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList;
const HashMap = data_structures.AutoHashMap;
const print = std.debug.print;
const Analyzer = struct {
source_code: []const u8,
nodes: []const Node,
tokens: []const Token,
file: *File,
allocator: Allocator,
module: *Module,
fn lazyGlobalDeclaration(analyzer: *Analyzer, node_index: Node.Index) void {
print("Global: {}", .{analyzer.nodes[node_index.unwrap()]});
}
fn comptimeBlock(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index {
const comptime_node = analyzer.nodes[node_index.unwrap()];
const comptime_block_node = analyzer.nodes[comptime_node.left.unwrap()];
var statement_node_indices = ArrayList(Node.Index){};
switch (comptime_block_node.id) {
.block_one => {
try statement_node_indices.append(analyzer.allocator, comptime_block_node.left);
},
else => |t| @panic(@tagName(t)),
}
var statement_values = ArrayList(Value.Index){};
for (statement_node_indices.items) |statement_node_index| {
const statement_node = analyzer.nodes[statement_node_index.unwrap()];
switch (statement_node.id) {
.assign => {
const assign_expression = try analyzer.assign(scope, statement_node_index);
try statement_values.append(analyzer.allocator, assign_expression);
},
else => |t| @panic(@tagName(t)),
}
}
// TODO
return Value.Index.invalid;
}
fn assign(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !Value.Index {
const node = analyzer.nodes[node_index.unwrap()];
print("\nAssign. Left: {}. Right: {}\n", .{ node.left, node.right });
// In an assignment, the node being invalid means a discarding underscore, like this: ```_ = result```
if (node.left.valid) {
@panic("Not discard");
} else {
return try analyzer.expression(scope, ExpectType{ .none = {} }, node.right);
}
}
fn block(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) !Block.Index {
const block_node = analyzer.nodes[node_index.unwrap()];
var statements = ArrayList(Node.Index){};
switch (block_node.id) {
.block_one => {
try statements.append(analyzer.allocator, block_node.left);
},
.block_zero => {},
else => |t| @panic(@tagName(t)),
}
for (statements.items) |statement_node_index| {
_ = try analyzer.expression(scope, expect_type, statement_node_index);
// const statement_node = analyzer.nodes[statement_node_index.unwrap()];
//
// switch (statement_node.id) {
// try .simple_while => {
// const while_condition = try analyzer.expression(scope, ExpectType.boolean, statement_node.left);
// _ = while_condition;
// const while_block = try analyzer.block(scope, expect_type, statement_node.right);
// _ = while_block;
// unreachable;
// },
// else => |t| @panic(@tagName(t)),
// }
}
return try analyzer.module.blocks.append(analyzer.allocator, .{});
}
fn expression(analyzer: *Analyzer, scope: *Scope, expect_type: ExpectType, node_index: Node.Index) error{OutOfMemory}!Value.Index {
const node = analyzer.nodes[node_index.unwrap()];
return switch (node.id) {
.identifier => blk: {
const identifier_hash = try analyzer.identifierFromToken(node.token);
// TODO: search in upper scopes too
const identifier_scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_hash);
if (identifier_scope_lookup.found_existing) {
const declaration_index = identifier_scope_lookup.value_ptr.*;
const declaration = analyzer.module.declarations.get(declaration_index);
break :blk try analyzer.analyzeDeclaration(scope, declaration);
} else {
@panic("TODO: not found");
}
},
.compiler_intrinsic_one => blk: {
const intrinsic_name = analyzer.tokenIdentifier(node.token + 1);
const intrinsic = data_structures.enumFromString(Intrinsic, intrinsic_name) orelse unreachable;
print("Intrinsic: {s}", .{@tagName(intrinsic)});
switch (intrinsic) {
.import => {
const import_argument = analyzer.nodes[node.left.unwrap()];
switch (import_argument.id) {
.string_literal => {
const import_name = analyzer.tokenStringLiteral(import_argument.token);
const imported_file = try analyzer.module.importFile(analyzer.allocator, analyzer.file, import_name);
if (imported_file.is_new) {
// TODO: fix error
analyzer.module.generateAbstractSyntaxTreeForFile(analyzer.allocator, imported_file.file) catch return error.OutOfMemory;
} else {
unreachable;
}
const file_struct_declaration_index = try analyzeFile(analyzer.allocator, analyzer.module, imported_file.file);
break :blk try analyzer.module.values.append(analyzer.allocator, .{
.type = .{
.declaration = file_struct_declaration_index,
},
.is_const = true,
.is_comptime = true,
});
},
else => unreachable,
}
},
}
unreachable;
},
.function_definition => blk: {
const function_prototype_index = try analyzer.functionPrototype(node.left);
const function_body = try analyzer.block(scope, .{
.type_index = analyzer.functionPrototypeReturnType(function_prototype_index),
}, node.right);
const function_index = try analyzer.module.functions.append(analyzer.allocator, .{
.prototype = function_prototype_index,
.body = function_body,
});
const value_index = try analyzer.module.values.append(analyzer.allocator, .{
.type = .{
.function = function_index,
},
.is_const = true,
.is_comptime = true,
});
break :blk value_index;
},
.keyword_true => blk: {
switch (expect_type) {
.none => {},
.type_index => |expected_type| {
if (@as(u32, @bitCast(type_boolean)) != @as(u32, @bitCast(expected_type))) {
@panic("TODO: compile error");
}
},
}
break :blk bool_true;
},
.simple_while => blk: {
const while_condition = try analyzer.expression(scope, ExpectType.boolean, node.left);
_ = while_condition;
const while_body = try analyzer.block(scope, expect_type, node.right);
_ = while_body;
const loop_index = try analyzer.module.loops.append(analyzer.allocator, .{});
const value_index = try analyzer.module.values.append(analyzer.allocator, .{
.type = .{
.loop = loop_index,
},
// TODO:
.is_const = false,
.is_comptime = false,
});
break :blk value_index;
},
else => |t| @panic(@tagName(t)),
};
}
fn functionPrototypeReturnType(analyzer: *Analyzer, function_prototype_index: Function.Prototype.Index) Type.Index {
const function_prototype = analyzer.module.function_prototypes.get(function_prototype_index);
return function_prototype.return_type;
}
fn functionPrototype(analyzer: *Analyzer, node_index: Node.Index) !Function.Prototype.Index {
const node = analyzer.nodes[node_index.unwrap()];
switch (node.id) {
.simple_function_prototype => {
const arguments: ?[]const Field.Index = blk: {
const argument_node = analyzer.nodes[node.left.get() orelse break :blk null];
switch (argument_node.id) {
else => |t| @panic(@tagName(t)),
}
};
const return_type_node = analyzer.nodes[node.right.unwrap()];
const return_type: Type.Index = switch (return_type_node.id) {
.identifier => {
unreachable;
},
.keyword_noreturn => .{ .block = 0, .index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.noreturn) },
else => |t| @panic(@tagName(t)),
};
return try analyzer.module.function_prototypes.append(analyzer.allocator, .{
.arguments = arguments,
.return_type = return_type,
});
},
else => |t| @panic(@tagName(t)),
}
}
fn analyzeDeclaration(analyzer: *Analyzer, scope: *Scope, declaration: *Declaration) !Value.Index {
switch (declaration.*) {
.unresolved => |node_index| {
const declaration_node = analyzer.nodes[node_index.unwrap()];
return switch (declaration_node.id) {
.simple_variable_declaration => blk: {
const expect_type = switch (declaration_node.left.valid) {
true => unreachable,
false => @unionInit(ExpectType, "none", {}),
};
const initialization_expression = try analyzer.expression(scope, expect_type, declaration_node.right);
const value = analyzer.module.values.get(initialization_expression);
if (value.is_comptime and value.is_const) {
break :blk initialization_expression;
}
unreachable;
},
else => |t| @panic(@tagName(t)),
};
},
.struct_type => unreachable,
}
@panic("TODO: analyzeDeclaration");
}
fn containerMember(analyzer: *Analyzer, scope: *Scope, node_index: Node.Index) !void {
const node = analyzer.nodes[node_index.unwrap()];
switch (node.id) {
.simple_variable_declaration => {},
.@"comptime" => {
_ = try analyzer.comptimeBlock(scope, node_index);
},
else => std.debug.panic("Tag: {}", .{node.id}),
}
}
fn globalSymbolDeclaration(analyzer: *Analyzer, symbol_declaration: SymbolDeclaration) !void {
if (symbol_declaration.type_node.get()) |type_node_index| {
_ = type_node_index;
@panic("TODO: type node");
}
const initialization_node = analyzer.nodes[symbol_declaration.initialization_node.unwrap()];
switch (initialization_node.id) {
.compiler_intrinsic_one => {
const intrinsic_name = analyzer.tokenIdentifier(initialization_node.token + 1);
const intrinsic = inline for (@typeInfo(Intrinsic).Enum.fields) |intrinsic_enum_field| {
if (equal(u8, intrinsic_name, intrinsic_enum_field.name)) {
break @field(Intrinsic, intrinsic_enum_field.name);
}
} else unreachable;
print("Intrinsic: {s}", .{@tagName(intrinsic)});
switch (intrinsic) {
.import => {
const import_argument = analyzer.nodes[initialization_node.left.get()];
switch (import_argument.id) {
.string_literal => unreachable,
else => unreachable,
}
},
}
// const intrinsic_node_index = initialization_node.left.unwrap();
// const intrinsic_node = analyzer.nodes[intrinsic_node_index];
//
// switch (intrinsic_node.id) {
// .string_literal =>
// }
// print("intrinsic: {}", .{intrinsic_node.id});
// _ = a;
},
else => unreachable,
}
print("Init node: {}\n", .{initialization_node});
@panic("TODO");
}
fn symbolDeclaration(analyzer: *Analyzer, node_index: Node.Index) SymbolDeclaration {
const node = analyzer.nodes[node_index.unwrap()];
return switch (node.id) {
.simple_variable_declaration => .{
.type_node = node.left,
.initialization_node = node.right,
.mutability_token = node.token,
},
else => unreachable,
};
}
fn structDeclaration(analyzer: *Analyzer, parent_scope: Scope.Index, container_declaration: syntactic_analyzer.ContainerDeclaration, index: Node.Index) !Declaration.Index {
_ = index;
const new_scope = try analyzer.allocateScope(parent_scope, Type.Index.invalid);
const scope = new_scope.ptr;
const is_file = !parent_scope.valid;
assert(is_file);
// TODO: do it properly
const declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{
.struct_type = .{
.scope = new_scope.index,
.initialization = if (is_file) Value.Index.invalid else unreachable,
},
});
// TODO:
assert(container_declaration.members.len > 0);
const count = blk: {
var result: struct {
fields: u32 = 0,
declarations: u32 = 0,
} = .{};
for (container_declaration.members) |member_index| {
const member = analyzer.nodes[member_index.unwrap()];
const member_type = getContainerMemberType(member.id);
switch (member_type) {
.declaration => result.declarations += 1,
.field => result.fields += 1,
}
}
break :blk result;
};
var declaration_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.declarations);
var field_nodes = try ArrayList(Node.Index).initCapacity(analyzer.allocator, count.fields);
for (container_declaration.members) |member_index| {
const member = analyzer.nodes[member_index.unwrap()];
const member_type = getContainerMemberType(member.id);
const array_list = switch (member_type) {
.declaration => &declaration_nodes,
.field => &field_nodes,
};
array_list.appendAssumeCapacity(member_index);
}
for (declaration_nodes.items) |declaration_node_index| {
const declaration_node = analyzer.nodes[declaration_node_index.unwrap()];
switch (declaration_node.id) {
.@"comptime" => {},
.simple_variable_declaration => {
const expected_identifier_token_index = declaration_node.token + 1;
const expected_identifier_token = analyzer.tokens[expected_identifier_token_index];
if (expected_identifier_token.id != .identifier) {
print("Error: found: {}", .{expected_identifier_token.id});
@panic("Expected identifier");
}
// TODO: Check if it is a keyword
const identifier_index = try analyzer.identifierFromToken(expected_identifier_token_index);
const declaration_name = analyzer.tokenIdentifier(expected_identifier_token_index);
// Check if the symbol name is already occupied in the same scope
const scope_lookup = try scope.declarations.getOrPut(analyzer.allocator, identifier_index);
if (scope_lookup.found_existing) {
std.debug.panic("Existing name in lookup: {s}", .{declaration_name});
}
// Check if the symbol name is already occupied in parent scopes
var upper_scope_index = scope.parent;
while (upper_scope_index.valid) {
@panic("TODO: upper scope");
}
const container_declaration_index = try analyzer.module.declarations.append(analyzer.allocator, .{
.unresolved = declaration_node_index,
});
scope_lookup.value_ptr.* = container_declaration_index;
},
else => unreachable,
}
}
// TODO: consider iterating over scope declarations instead?
for (declaration_nodes.items) |declaration_node_index| {
const declaration_node = analyzer.nodes[declaration_node_index.unwrap()];
switch (declaration_node.id) {
.@"comptime", .simple_variable_declaration => try analyzer.containerMember(scope, declaration_node_index),
else => unreachable,
}
}
for (field_nodes.items) |field_index| {
const field_node = analyzer.nodes[field_index.unwrap()];
_ = field_node;
@panic("TODO: fields");
}
return declaration_index;
}
const MemberType = enum {
declaration,
field,
};
fn getContainerMemberType(member_id: Node.Id) MemberType {
return switch (member_id) {
.@"comptime" => .declaration,
.simple_variable_declaration => .declaration,
else => unreachable,
};
}
fn identifierFromToken(analyzer: *Analyzer, token_index: Token.Index) !u32 {
const identifier = analyzer.tokenIdentifier(token_index);
const key: u32 = @truncate(std.hash.Wyhash.hash(0, identifier));
const lookup_result = try analyzer.module.string_table.getOrPut(analyzer.allocator, key);
if (lookup_result.found_existing) {
return lookup_result.key_ptr.*;
} else {
return key;
}
}
fn tokenIdentifier(analyzer: *Analyzer, token_index: Token.Index) []const u8 {
const token = analyzer.tokens[token_index];
assert(token.id == .identifier);
const identifier = analyzer.source_code[token.start..][0..token.len];
return identifier;
}
fn tokenStringLiteral(analyzer: *Analyzer, token_index: Token.Index) []const u8 {
const token = analyzer.tokens[token_index];
assert(token.id == .string_literal);
// Eat double quotes
const start = token.start + 1;
const len = token.len - 2;
const string_literal = analyzer.source_code[start..][0..len];
return string_literal;
}
const ScopeAllocation = struct {
ptr: *Scope,
index: Scope.Index,
};
fn allocateScope(analyzer: *Analyzer, parent_scope: Scope.Index, scope_type: Type.Index) !ScopeAllocation {
const scope_index = try analyzer.module.scopes.append(analyzer.allocator, .{
.parent = parent_scope,
.type = scope_type,
});
const scope = analyzer.module.scopes.get(scope_index);
return .{
.ptr = scope,
.index = scope_index,
};
}
};
const ExpectType = union(enum) {
none,
type_index: Type.Index,
pub const boolean = ExpectType{
.type_index = type_boolean,
};
};
const type_boolean = Type.Index{
.block = 0,
.index = FixedTypeKeyword.offset + @intFromEnum(FixedTypeKeyword.bool),
};
const bool_false = Value.Index{
.block = 0,
.index = 1,
};
const bool_true = Value.Index{
.block = 0,
.index = 1,
};
const Intrinsic = enum {
import,
};
const FixedTypeKeyword = enum {
void,
noreturn,
bool,
const offset = 0;
};
const HardwareUnsignedIntegerType = enum {
u8,
u16,
u32,
u64,
const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len;
};
const HardwareSignedIntegerType = enum {
s8,
s16,
s32,
s64,
const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len;
};
pub fn initialize(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index {
inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, @unionInit(Type, enum_field.name, {}));
}
inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, .{
.integer = .{
.signedness = .unsigned,
.bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) {
.u8 => 8,
.u16 => 16,
.u32 => 32,
.u64 => 64,
},
},
});
}
inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, .{
.integer = .{
.signedness = .signed,
.bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) {
.s8 => 8,
.s16 => 16,
.s32 => 32,
.s64 => 64,
},
},
});
}
_ = try module.values.append(compilation.base_allocator, .{
.type = .{
.bool_false = {},
},
.is_const = true,
.is_comptime = true,
});
_ = try module.values.append(compilation.base_allocator, .{
.type = .{
.bool_true = {},
},
.is_const = true,
.is_comptime = true,
});
return analyzeExistingPackage(compilation, module, package);
}
pub fn analyzeExistingPackage(compilation: *Compilation, module: *Module, package: *Package) !Declaration.Index {
const package_import = try module.importPackage(compilation.base_allocator, package);
assert(!package_import.is_new);
const package_file = package_import.file;
return try analyzeFile(compilation.base_allocator, module, package_file);
}
pub fn analyzeFile(allocator: Allocator, module: *Module, file: *File) !Declaration.Index {
assert(file.status == .parsed);
var analyzer = Analyzer{
.source_code = file.source_code,
.nodes = file.syntactic_analyzer_result.nodes.items,
.tokens = file.lexical_analyzer_result.tokens.items,
.file = file,
.allocator = allocator,
.module = module,
};
const result = try analyzer.structDeclaration(Scope.Index.invalid, try mainNodeToContainerDeclaration(allocator, file), .{ .value = 0 });
return result;
}
fn mainNodeToContainerDeclaration(allocator: Allocator, file: *File) !ContainerDeclaration {
const main_node = getNode(file, 0);
var list_buffer: [2]Node.Index = undefined;
const left_node = getNode(file, main_node.left.value);
const node_list: []const Node.Index = blk: {
if (left_node.id != .node_list) {
const len = @as(u2, @intFromBool(main_node.left.valid)) + @as(u2, @intFromBool(main_node.right.valid)) - @as(u2, @intFromBool(main_node.left.valid and main_node.right.valid and main_node.left.value == main_node.right.value));
assert(len > 0);
list_buffer[0] = main_node.left;
list_buffer[1] = main_node.right;
break :blk list_buffer[0..len];
} else {
@panic("TODO: get list");
}
};
const owned_node_list = try allocator.alloc(Node.Index, node_list.len);
@memcpy(owned_node_list, node_list);
// Deal properly with this allocation
return .{
.members = owned_node_list,
};
}
fn getNode(file: *const File, index: u32) *Node {
return &file.syntactic_analyzer_result.nodes.items[index];
}

View File

@ -6,6 +6,7 @@ const log = std.log;
const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList;
const enumFromString = data_structures.enumFromString;
const HashMap = data_structures.HashMap;
const lexical_analyzer = @import("lexical_analyzer.zig");
@ -14,26 +15,40 @@ const Token = lexical_analyzer.Token;
pub const Result = struct {
nodes: ArrayList(Node),
time: u64,
pub fn free(result: *Result, allocator: Allocator) void {
result.nodes.clearAndFree(allocator);
}
};
pub const Node = packed struct(u96) {
// TODO: pack it to be more efficient
pub const Node = packed struct(u128) {
token: u32,
id: Id,
left: Node.Index,
right: Node.Index,
pub const Index = u27;
pub const Index = packed struct(u32) {
value: u31,
valid: bool = true,
pub const invalid = Index{
.value = 0,
.valid = false,
};
pub fn get(index: Index) ?u32 {
return if (index.valid) index.value else null;
}
pub fn unwrap(index: Index) u32 {
assert(index.valid);
return index.value;
}
};
pub const Range = struct {
start: u32,
end: u32,
};
pub const Id = enum(u10) {
pub const Id = enum(u32) {
main = 0,
identifier = 1,
number = 2,
@ -46,6 +61,13 @@ pub const Node = packed struct(u96) {
simple_variable_declaration = 9,
assign = 10,
@"comptime" = 11,
node_list = 12,
block_zero = 13,
simple_while = 14,
simple_function_prototype = 15,
function_definition = 16,
keyword_noreturn = 17,
keyword_true = 18,
};
};
@ -63,10 +85,6 @@ const Analyzer = struct {
allocator: Allocator,
temporal_node_heap: ArrayList(Node.Index) = .{},
fn free(analyzer: *Analyzer) void {
_ = analyzer;
}
fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 {
if (analyzer.tokens[analyzer.token_i].id == token_id) {
const result = analyzer.token_i;
@ -90,56 +108,50 @@ const Analyzer = struct {
while (analyzer.token_i < analyzer.tokens.len) {
const first = analyzer.token_i;
const member_node: Node = switch (analyzer.tokens[first].id) {
.identifier => blk: {
const first_identifier_token = analyzer.tokens[first];
analyzer.token_i += 1;
.fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.left_brace => blk: {
analyzer.token_i += 1;
const comptime_block = try analyzer.block();
const identifier = analyzer.getIdentifier(first_identifier_token);
if (equal(u8, identifier, "comptime")) {
switch (analyzer.tokens[analyzer.token_i].id) {
.left_brace => {
const comptime_block = try analyzer.block();
break :blk .{
.id = .@"comptime",
.token = first,
.left = comptime_block,
.right = 0,
};
},
else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}),
}
} else {
const is_const = equal(u8, identifier, "const");
const is_var = equal(u8, identifier, "var");
assert(is_const or is_var);
_ = try analyzer.expectToken(.identifier);
// TODO: type
_ = try analyzer.expectToken(.equal);
// TODO: do this in a function
const init_node = switch (analyzer.tokens[analyzer.token_i].id) {
.identifier => unreachable,
.hash => try analyzer.compilerIntrinsic(),
else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}),
};
_ = try analyzer.expectToken(.semicolon);
// TODO:
const type_node = 0;
const top_level_decl = .{
.id = .simple_variable_declaration,
break :blk .{
.id = .@"comptime",
.token = first,
.left = type_node,
.right = init_node,
.left = comptime_block,
.right = Node.Index.invalid,
};
},
else => |foo| std.debug.panic("NI: {s}", .{@tagName(foo)}),
},
.fixed_keyword_const, .fixed_keyword_var => blk: {
analyzer.token_i += 1;
_ = try analyzer.expectToken(.identifier);
break :blk top_level_decl;
}
// TODO: type
_ = try analyzer.expectToken(.equal);
// TODO: do this in a function
const init_node = switch (analyzer.tokens[analyzer.token_i].id) {
.identifier => unreachable,
.hash => try analyzer.compilerIntrinsic(),
.left_parenthesis => try analyzer.function(),
else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}),
};
_ = try analyzer.expectToken(.semicolon);
// TODO:
const type_node = Node.Index.invalid;
const top_level_decl = .{
.id = .simple_variable_declaration,
.token = first,
.left = type_node,
.right = init_node,
};
break :blk top_level_decl;
},
.identifier => {
unreachable;
},
else => |t| std.debug.panic("NI: {s}", .{@tagName(t)}),
};
@ -150,6 +162,11 @@ const Analyzer = struct {
const members_array = analyzer.temporal_node_heap.items[node_heap_top..];
const members: Members = switch (members_array.len) {
1 => .{
.len = 1,
.left = members_array[0],
.right = Node.Index.invalid,
},
2 => .{
.len = 2,
.left = members_array[0],
@ -161,6 +178,51 @@ const Analyzer = struct {
return members;
}
fn function(analyzer: *Analyzer) !Node.Index {
const token = analyzer.token_i;
const function_prototype = try analyzer.functionPrototype();
const function_body = try analyzer.block();
return analyzer.addNode(.{
.id = .function_definition,
.token = token,
.left = function_prototype,
.right = function_body,
});
}
fn functionPrototype(analyzer: *Analyzer) !Node.Index {
const token = analyzer.token_i;
const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis);
const return_type = try analyzer.typeExpression();
return analyzer.addNode(.{
.id = .simple_function_prototype,
.token = token,
.left = arguments,
.right = return_type,
});
}
fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index {
if (maybe_start_token) |start_token| {
_ = try analyzer.expectToken(start_token);
}
var list = ArrayList(Node.Index){};
while (analyzer.tokens[analyzer.token_i].id != end_token) {
@panic("TODO: argument list");
}
_ = try analyzer.expectToken(end_token);
if (list.items.len != 0) {
@panic("TODO: arguments");
} else {
return Node.Index.invalid;
}
}
fn block(analyzer: *Analyzer) !Node.Index {
const left_brace = try analyzer.expectToken(.left_brace);
const node_heap_top = analyzer.temporal_node_heap.items.len;
@ -174,11 +236,17 @@ const Analyzer = struct {
const statement_array = analyzer.temporal_node_heap.items[node_heap_top..];
const node: Node = switch (statement_array.len) {
0 => .{
.id = .block_zero,
.token = left_brace,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
},
1 => .{
.id = .block_one,
.token = left_brace,
.left = statement_array[0],
.right = 0,
.right = Node.Index.invalid,
},
else => |len| std.debug.panic("len: {}", .{len}),
};
@ -187,10 +255,41 @@ const Analyzer = struct {
fn statement(analyzer: *Analyzer) !Node.Index {
// TODO: more stuff before
const result = try analyzer.assignExpression();
_ = try analyzer.expectToken(.semicolon);
const first_statement_token = analyzer.tokens[analyzer.token_i];
return switch (first_statement_token.id) {
.identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.colon => {
unreachable;
},
else => blk: {
const identifier = analyzer.getIdentifier(first_statement_token);
std.debug.print("Starting statement with identifier: {s}\n", .{identifier});
const result = try analyzer.assignExpression();
_ = try analyzer.expectToken(.semicolon);
break :blk result;
},
},
.fixed_keyword_while => try analyzer.whileStatement(),
else => unreachable,
};
}
return result;
fn whileStatement(analyzer: *Analyzer) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index {
const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while);
_ = try analyzer.expectToken(.left_parenthesis);
// TODO:
const while_condition = try analyzer.expression();
_ = try analyzer.expectToken(.right_parenthesis);
const while_block = try analyzer.block();
return analyzer.addNode(.{
.id = .simple_while,
.token = while_identifier_index,
.left = while_condition,
.right = while_block,
});
}
fn assignExpression(analyzer: *Analyzer) !Node.Index {
@ -242,13 +341,13 @@ const Analyzer = struct {
.id = .compiler_intrinsic_one,
.token = hash,
.left = parameters[0],
.right = 0,
.right = Node.Index.invalid,
}),
else => unreachable,
};
}
fn expression(analyzer: *Analyzer) !Node.Index {
fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index {
return analyzer.expressionPrecedence(0);
}
@ -259,7 +358,7 @@ const Analyzer = struct {
while (analyzer.token_i < analyzer.tokens.len) {
const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) {
.equal, .semicolon, .right_parenthesis => -1,
.equal, .semicolon, .right_parenthesis, .right_brace => -1,
else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }),
};
@ -305,7 +404,8 @@ const Analyzer = struct {
.colon => unreachable,
else => try analyzer.curlySuffixExpression(),
},
.string_literal => try analyzer.curlySuffixExpression(),
.string_literal, .fixed_keyword_true, .fixed_keyword_false => try analyzer.curlySuffixExpression(),
.left_brace => try analyzer.block(),
else => |id| {
log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)});
unreachable;
@ -324,9 +424,33 @@ const Analyzer = struct {
};
}
fn noReturn(analyzer: *Analyzer) !Node.Index {
const token_i = analyzer.token_i;
assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn);
analyzer.token_i += 1;
return analyzer.addNode(.{
.id = .keyword_noreturn,
.token = token_i,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
}
fn boolTrue(analyzer: *Analyzer) !Node.Index {
const token_i = analyzer.token_i;
assert(analyzer.tokens[token_i].id == .fixed_keyword_true);
analyzer.token_i += 1;
return analyzer.addNode(.{
.id = .keyword_true,
.token = token_i,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
}
fn typeExpression(analyzer: *Analyzer) !Node.Index {
return switch (analyzer.tokens[analyzer.token_i].id) {
.string_literal, .identifier => try analyzer.errorUnionExpression(),
.identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false => try analyzer.errorUnionExpression(),
else => |id| blk: {
log.warn("By default, calling errorUnionExpression with {s}", .{@tagName(id)});
@ -354,7 +478,21 @@ const Analyzer = struct {
unreachable;
} else {
if (analyzer.tokens[analyzer.token_i].id == .left_parenthesis) {
unreachable;
analyzer.token_i += 1;
var expression_list = ArrayList(Node.Index){};
while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) {
const parameter = try analyzer.expression();
try expression_list.append(analyzer.allocator, parameter);
analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) {
.comma, .right_parenthesis => true,
.colon, .right_brace, .right_bracket => unreachable,
else => unreachable,
});
}
_ = try analyzer.expectToken(.right_parenthesis);
@panic("TODO");
} else {
return result;
}
@ -366,28 +504,34 @@ const Analyzer = struct {
fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index {
const token_i = analyzer.token_i;
return switch (analyzer.tokens[token_i].id) {
const token = analyzer.tokens[token_i];
return switch (token.id) {
.string_literal => blk: {
analyzer.token_i += 1;
break :blk analyzer.addNode(.{
.id = .string_literal,
.token = token_i,
.left = 0,
.right = 0,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
},
.identifier => switch (analyzer.tokens[token_i + 1].id) {
.colon => unreachable,
else => analyzer.addNode(.{
.id = .identifier,
.token = blk: {
analyzer.token_i += 1;
break :blk token_i;
},
.left = 0,
.right = 0,
}),
else => blk: {
const identifier = analyzer.getIdentifier(token);
analyzer.token_i += 1;
if (equal(u8, identifier, "_")) {
break :blk Node.Index.invalid;
} else break :blk analyzer.addNode(.{
.id = .identifier,
.token = token_i,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
},
},
.fixed_keyword_noreturn => try analyzer.noReturn(),
.fixed_keyword_true => try analyzer.boolTrue(),
else => |foo| {
switch (foo) {
.identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.getIdentifier(analyzer.tokens[token_i]) }),
@ -405,9 +549,13 @@ const Analyzer = struct {
}
fn addNode(analyzer: *Analyzer, node: Node) !Node.Index {
std.debug.print("Adding node {s}\n", .{@tagName(node.id)});
const index = analyzer.nodes.items.len;
try analyzer.nodes.append(analyzer.allocator, node);
return @intCast(index);
return Node.Index{
.value = @intCast(index),
};
}
};
@ -420,12 +568,12 @@ const Members = struct {
return switch (members.len) {
0 => unreachable,
1 => .{
.start = members.left,
.end = members.left,
.start = members.left.value,
.end = members.left.value,
},
2 => .{
.start = members.left,
.end = members.right,
.start = members.left.value,
.end = members.right.value,
},
else => unreachable,
};
@ -439,19 +587,19 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R
.file = file,
.allocator = allocator,
};
errdefer analyzer.free();
const node_index = try analyzer.addNode(.{
.id = .main,
.token = 0,
.left = 0,
.right = 0,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
assert(node_index == 0);
assert(node_index.value == 0);
assert(node_index.valid);
const members = try analyzer.containerMembers();
const member_range = members.toRange();
analyzer.nodes.items[0].left = @intCast(member_range.start);
analyzer.nodes.items[0].right = @intCast(member_range.end);
analyzer.nodes.items[0].left = .{ .value = @intCast(member_range.start) };
analyzer.nodes.items[0].right = .{ .value = @intCast(member_range.end) };
const end = std.time.Instant.now() catch unreachable;
@ -471,4 +619,18 @@ const ExpressionMutabilityQualifier = enum {
const Keyword = enum {
@"return",
@"fn",
@"while",
void,
noreturn,
};
// These types are meant to be used by the semantic analyzer
pub const ContainerDeclaration = struct {
members: []const Node.Index,
};
pub const SymbolDeclaration = struct {
type_node: Node.Index,
initialization_node: Node.Index,
mutability_token: Token.Index,
};

View File

@ -3,7 +3,6 @@ const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const Compilation = @import("Compilation.zig");
const fs = @import("fs.zig");
pub const seed = std.math.maxInt(u64);
const default_src_file = "src/test/main.b";
@ -13,17 +12,18 @@ pub fn main() !void {
}
fn singleCompilation(main_file_path: []const u8) !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const compilation = try Compilation.init(gpa.allocator());
defer compilation.deinit();
const allocator = std.heap.page_allocator;
const compilation = try Compilation.init(allocator);
try compilation.compileModule(.{
.main_package_path = main_file_path,
});
}
test {
_ = Compilation;
}
test "basic" {
try singleCompilation(default_src_file);
}