instruction selection

This commit is contained in:
David Gonzalez Martin 2023-11-06 13:30:35 -06:00
parent f63fcbcb62
commit 77e54285f5
25 changed files with 7819 additions and 1761 deletions

6
.vscode/launch.json vendored
View File

@ -8,8 +8,10 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "Launch", "name": "Launch",
"program": "${workspaceFolder}/zig-out/bin/compiler", "program": "${workspaceFolder}/zig-out/bin/nativity",
"args": [], "args": [
"test/hello_world/main.nat"
],
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",
"preLaunchTask": "zig build" "preLaunchTask": "zig build"
}, },

View File

@ -1,15 +1,18 @@
const std = @import("std"); const std = @import("std");
var all: bool = false; var all: bool = false;
fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget, optimization: std.builtin.OptimizeMode, unit_tests: []const []const u8, test_step: *std.Build.Step) !void { pub fn build(b: *std.Build) !void {
const name = if (all) try std.mem.concat(b.allocator, u8, &.{ "nativity_", @tagName(optimization) }) else "nativity"; all = b.option(bool, "all", "All") orelse false;
const target = b.standardTargetOptions(.{});
const optimization = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{ const exe = b.addExecutable(.{
.name = name, .name = "nativity",
.root_source_file = .{ .path = "src/main.zig" }, .root_source_file = .{ .path = "src/main.zig" },
.target = target, .target = target,
.optimize = optimization, .optimize = optimization,
.use_llvm = true,
.use_lld = false,
}); });
b.installArtifact(exe); b.installArtifact(exe);
b.installDirectory(.{ b.installDirectory(.{
.source_dir = std.Build.LazyPath.relative("lib"), .source_dir = std.Build.LazyPath.relative("lib"),
@ -17,100 +20,50 @@ fn everythingForTargetAndOptimization(b: *std.Build, target: std.zig.CrossTarget
.install_subdir = "lib", .install_subdir = "lib",
}); });
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}
const run_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "run_", @tagName(optimization) }) else "run", "Run the app");
run_step.dependOn(&run_cmd.step);
const debug_command = addDebugCommand(b, exe);
const debug_step = b.step(if (all) try std.mem.concat(b.allocator, u8, &.{ "debug_", @tagName(optimization) }) else "debug", "Debug the app");
debug_step.dependOn(&debug_command.step);
const zig_tests = b.addTest(.{ const zig_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" }, .root_source_file = .{ .path = "src/main.zig" },
.target = target, .target = target,
.optimize = optimization, .optimize = optimization,
}); });
const run_zig_tests = b.addRunArtifact(zig_tests); const run_command = b.addRunArtifact(exe);
run_zig_tests.has_side_effects = true;
test_step.dependOn(&run_zig_tests.step);
for (unit_tests) |unit_test_main_source_file| { const debug_command = switch (@import("builtin").os.tag) {
const unit_test = b.addRunArtifact(exe);
unit_test.has_side_effects = true;
unit_test.addArg(unit_test_main_source_file);
test_step.dependOn(&unit_test.step);
}
}
fn addDebugCommand(b: *std.Build, artifact: *std.Build.Step.Compile) *std.Build.Step.Run {
return switch (@import("builtin").os.tag) {
.linux => blk: { .linux => blk: {
const result = b.addSystemCommand(&.{"gf2"}); const result = b.addSystemCommand(&.{"gf2"});
result.addArtifactArg(artifact); result.addArg("--args");
result.addArtifactArg(exe);
if (artifact.kind == .@"test") {
result.addArgs(&.{ "-ex", "r" });
}
break :blk result; break :blk result;
}, },
.windows => blk: { .windows => blk: {
const result = b.addSystemCommand(&.{"remedybg"}); const result = b.addSystemCommand(&.{"remedybg"});
result.addArg("-g"); result.addArg("-g");
result.addArtifactArg(artifact); result.addArtifactArg(exe);
break :blk result; break :blk result;
}, },
.macos => blk: { .macos => blk: {
// not tested // not tested
const result = b.addSystemCommand(&.{"gdb"}); const result = b.addSystemCommand(&.{"lldb"});
result.addArtifactArg(artifact); result.addArg("--");
result.addArtifactArg(exe);
break :blk result; break :blk result;
}, },
else => @compileError("Operating system not supported"), else => @compileError("OS not supported"),
}; };
}
pub fn build(b: *std.Build) !void { const test_command = b.addRunArtifact(zig_tests);
all = b.option(bool, "all", "All") orelse false;
var unit_test_list = std.ArrayList([]const u8).init(b.allocator); if (b.args) |args| {
var test_dir = try std.fs.cwd().openIterableDir("test", .{ .access_sub_paths = true }); run_command.addArgs(args);
defer test_dir.close(); test_command.addArgs(args);
debug_command.addArgs(args);
var test_dir_iterator = test_dir.iterate();
while (try test_dir_iterator.next()) |entry| {
switch (entry.kind) {
.directory => {
const dir_name = entry.name;
const main_unit_test_source_file = try std.mem.concat(b.allocator, u8, &.{ "test/", dir_name, "/main.nat" });
try unit_test_list.append(main_unit_test_source_file);
},
.file => {},
else => @panic("Don't put crap on test directory"),
}
} }
const target = b.standardTargetOptions(.{}); const run_step = b.step("run", "Test the Nativity compiler");
const unit_tests = unit_test_list.items; run_step.dependOn(&run_command.step);
const test_step = b.step("test", "Test the Nativity compiler"); const test_step = b.step("test", "Test the Nativity compiler");
test_step.dependOn(&test_command.step);
if (all) { const debug_step = b.step("debug", "Debug the Nativity compiler");
inline for (@typeInfo(std.builtin.OptimizeMode).Enum.fields) |enum_field| { debug_step.dependOn(&debug_command.step);
const optimization = @field(std.builtin.OptimizeMode, enum_field.name);
try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step);
}
} else {
const optimization = b.standardOptimizeOption(.{});
_ = try everythingForTargetAndOptimization(b, target, optimization, unit_tests, test_step);
}
} }

16
lib/std/builtin.nat Normal file
View File

@ -0,0 +1,16 @@
const Os = enum{
linux,
macos,
windows,
};
const Cpu = enum{
aarch64,
x86_64,
};
const Abi = enum{
none,
gnu,
msvc,
};

46
lib/std/os.nat Normal file
View File

@ -0,0 +1,46 @@
const current = #import("builtin").os;
const system = switch (current) {
.linux => linux,
.macos => macos,
.windows => windows,
};
const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize {
switch (current) {
.linux => return #syscall(1, file_descriptor, bytes_ptr, bytes_len),
.macos => return macos.write(file_descriptor, bytes_ptr, bytes_len),
.windows => {
var written_bytes: u32 = 0;
if (windows.WriteFile(file_descriptor, bytes_ptr, bytes_len, @written_bytes, false) != 0) {
return written_bytes;
} else {
unreachable;
}
},
}
}
const FileDescriptor = system.FileDescriptor;
const print = fn(bytes_ptr: [@]const u8, bytes_len: usize) void {
const file_descriptor = switch (current) {
.linux, .macos => 2,
.windows => windows.GetStdHandle(windows.STD_OUTPUT_HANDLE),
};
_ = write(file_descriptor, bytes_ptr, bytes_len);
}
const exit = fn(exit_code: s32) noreturn {
switch (current) {
.linux => _ = #syscall(231, exit_code),
.macos => macos.exit(exit_code),
.windows => windows.ExitProcess(exit_code),
}
unreachable;
}
const linux = #import("os/linux.nat");
const macos = #import("os/macos.nat");
const windows = #import("os/windows.nat");

1
lib/std/os/linux.nat Normal file
View File

@ -0,0 +1 @@
const FileDescriptor = s32;

3
lib/std/os/macos.nat Normal file
View File

@ -0,0 +1,3 @@
const FileDescriptor = s32;
const write = fn (file_descriptor: FileDescriptor, bytes_ptr: [@]const u8, bytes_len: usize) ssize extern;
const exit = fn (exit_code: u32) noreturn extern;

3
lib/std/os/windows.nat Normal file
View File

@ -0,0 +1,3 @@
const HANDLE = u64;
const FileDescriptor = HANDLE;
const GetStdHandle = fn(handle_descriptor: u32) HANDLE extern;

View File

@ -1,9 +1,9 @@
const std = #import("std");
comptime { comptime {
_ = _start; _ = _start;
} }
const _start = fn () noreturn { const _start = fn () noreturn {
const result = #import("main").main(); const result = #import("main").main();
_ = #syscall(231, result); std.os.exit(0);
unreachable; }
};

View File

@ -2,4 +2,7 @@ comptime {
_ = start; _ = start;
} }
const builtin = #import("builtin.nat");
const os = #import("os.nat");
const print = os.print;
const start = #import("start.nat"); const start = #import("start.nat");

View File

@ -13,6 +13,7 @@ const AutoHashMap = data_structures.AutoHashMap;
const BlockList = data_structures.BlockList; const BlockList = data_structures.BlockList;
const HashMap = data_structures.HashMap; const HashMap = data_structures.HashMap;
const SegmentedList = data_structures.SegmentedList; const SegmentedList = data_structures.SegmentedList;
const StringKeyMap = data_structures.StringKeyMap;
const StringHashMap = data_structures.StringHashMap; const StringHashMap = data_structures.StringHashMap;
const StringArrayHashMap = data_structures.StringArrayHashMap; const StringArrayHashMap = data_structures.StringArrayHashMap;
@ -67,12 +68,57 @@ pub const Struct = struct {
pub const Allocation = List.Allocation; pub const Allocation = List.Allocation;
}; };
pub const Enum = struct {
scope: Scope.Index,
fields: ArrayList(Enum.Field.Index) = .{},
type: Type.Index,
pub const Field = struct {
name: u32,
value: Value.Index,
parent: Enum.Index,
pub const List = BlockList(@This());
pub const Index = Enum.Field.List.Index;
pub const Allocation = Enum.Field.List.Allocation;
};
pub const List = BlockList(@This());
pub const Index = List.Index;
pub const Allocation = List.Allocation;
};
pub const Array = struct {
element_type: Type.Index,
element_count: u32,
pub const List = BlockList(@This());
pub const Index = List.Index;
pub const Allocation = List.Allocation;
};
pub const Type = union(enum) { pub const Type = union(enum) {
void, void,
noreturn, noreturn,
bool, bool,
type,
comptime_int,
integer: Type.Integer, integer: Type.Integer,
slice: Slice,
pointer: Pointer,
@"struct": Struct.Index, @"struct": Struct.Index,
@"enum": Enum.Index,
function: Function.Prototype.Index,
array: Array,
const Slice = struct {
element_type: Type.Index,
};
const Pointer = struct {
element_type: Type.Index,
many: bool,
@"const": bool,
};
pub const List = BlockList(@This()); pub const List = BlockList(@This());
pub const Index = List.Index; pub const Index = List.Index;
pub const Allocation = List.Allocation; pub const Allocation = List.Allocation;
@ -85,14 +131,37 @@ pub const Type = union(enum) {
signed = 1, signed = 1,
}; };
pub fn getSize(integer: Type.Integer) u64 { pub fn getSize(integer: Integer) u64 {
return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0); return integer.bit_count / @bitSizeOf(u8) + @intFromBool(integer.bit_count % @bitSizeOf(u8) != 0);
} }
pub fn getIndex(integer: Integer) Compilation.Type.Index {
return .{
.block = 0,
.index = @ctz(integer.bit_count) - @ctz(@as(u8, 8)) + @as(u6, switch (integer.signedness) {
.signed => Compilation.HardwareSignedIntegerType.offset,
.unsigned => Compilation.HardwareUnsignedIntegerType.offset,
}),
};
}
// pub fn get(bit_count: u16, comptime signedness: Signedness) @This().Type(signedness) {
// _ = bit_count;
// }
fn Type(comptime signedness: Signedness) type {
return switch (signedness) {
.unsigned => HardwareUnsignedIntegerType,
.signed => HardwareSignedIntegerType,
};
}
}; };
pub fn getSize(type_info: Type) u64 { pub fn getSize(type_info: Type) u64 {
return switch (type_info) { return switch (type_info) {
.integer => |integer| integer.getSize(), .integer => |integer| integer.getSize(),
.pointer => 8,
.comptime_int => @panic("This call should never happen"),
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
} }
@ -100,9 +169,106 @@ pub const Type = union(enum) {
pub fn getAlignment(type_info: Type) u64 { pub fn getAlignment(type_info: Type) u64 {
return switch (type_info) { return switch (type_info) {
.integer => |integer| @min(16, integer.getSize()), .integer => |integer| @min(16, integer.getSize()),
.pointer => 8,
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
} }
pub const @"void" = FixedTypeKeyword.void.toType();
pub const boolean = FixedTypeKeyword.bool.toType();
pub const ssize = FixedTypeKeyword.ssize.toType();
pub const @"usize" = FixedTypeKeyword.usize.toType();
pub const @"noreturn" = FixedTypeKeyword.noreturn.toType();
pub const @"type" = FixedTypeKeyword.type.toType();
pub const @"comptime_int" = FixedTypeKeyword.comptime_int.toType();
pub const string_literal = ExtraCommonType.string_literal.toType();
pub const @"u8" = Type.Integer.getIndex(.{
.bit_count = 8,
.signedness = .unsigned,
});
};
// Each time an enum is added here, a corresponding insertion in the initialization must be made
pub const Values = enum {
bool_false,
bool_true,
@"unreachable",
pub fn getIndex(value: Values) Value.Index {
const absolute: u32 = @intFromEnum(value);
const foo = @as(Value.Index, undefined);
const ElementT = @TypeOf(@field(foo, "index"));
const BlockT = @TypeOf(@field(foo, "block"));
const divider = std.math.maxInt(ElementT);
const element_index: ElementT = @intCast(absolute % divider);
const block_index: BlockT = @intCast(absolute / divider);
return .{
.index = element_index,
.block = block_index,
};
}
};
pub const Intrinsic = enum {
@"error",
import,
syscall,
};
pub const FixedTypeKeyword = enum {
void,
noreturn,
bool,
usize,
ssize,
type,
comptime_int,
const offset = 0;
fn toType(fixed_type_keyword: FixedTypeKeyword) Type.Index {
return Type.Index.fromInteger(offset + @intFromEnum(fixed_type_keyword));
}
};
pub const HardwareUnsignedIntegerType = enum {
u8,
u16,
u32,
u64,
pub const offset = @typeInfo(FixedTypeKeyword).Enum.fields.len;
};
pub const HardwareSignedIntegerType = enum {
s8,
s16,
s32,
s64,
pub const offset = HardwareUnsignedIntegerType.offset + @typeInfo(HardwareUnsignedIntegerType).Enum.fields.len;
};
pub const ExtraCommonType = enum {
string_literal,
pub const offset = HardwareSignedIntegerType.offset + @typeInfo(HardwareSignedIntegerType).Enum.fields.len;
fn toType(t: ExtraCommonType) Type.Index {
return Type.Index.fromInteger(offset + @intFromEnum(t));
}
};
pub const extra_common_type_data = blk: {
var result: [@typeInfo(ExtraCommonType).Enum.fields.len]Type = undefined;
result[@intFromEnum(ExtraCommonType.string_literal)] = .{
.pointer = .{
.many = true,
.@"const" = true,
.element_type = Type.u8,
},
};
break :blk result;
}; };
/// A scope contains a bunch of declarations /// A scope contains a bunch of declarations
@ -131,7 +297,14 @@ pub const Declaration = struct {
scope_type: ScopeType, scope_type: ScopeType,
mutability: Mutability, mutability: Mutability,
init_value: Value.Index, init_value: Value.Index,
name: []const u8, name: u32,
argument_index: ?u32,
type: Type.Index,
pub const Reference = struct {
value: Declaration.Index,
type: Type.Index,
};
pub const List = BlockList(@This()); pub const List = BlockList(@This());
pub const Index = List.Index; pub const Index = List.Index;
@ -139,15 +312,31 @@ pub const Declaration = struct {
}; };
pub const Function = struct { pub const Function = struct {
scope: Scope.Index,
body: Block.Index, body: Block.Index,
prototype: Prototype.Index, prototype: Type.Index,
pub const Prototype = struct { pub const Prototype = struct {
arguments: ?[]const Field.Index, arguments: ?[]const Declaration.Index,
return_type: Type.Index, return_type: Type.Index,
attributes: Attributes = .{},
pub const List = BlockList(@This()); pub const List = BlockList(@This());
pub const Index = Prototype.List.Index; pub const Index = Prototype.List.Index;
pub const Attributes = packed struct {
@"extern": bool = false,
@"export": bool = false,
@"inline": Inline = .none,
calling_convention: CallingConvention = .system_v,
pub const Inline = enum {
none,
suggestion_optimizer,
force_semantic,
force_optimizer,
};
};
}; };
pub fn getBodyBlock(function: Function, module: *Module) *Block { pub fn getBodyBlock(function: Function, module: *Module) *Block {
@ -168,7 +357,8 @@ pub const Block = struct {
}; };
pub const Field = struct { pub const Field = struct {
foo: u32 = 0, name: u32,
type: Type.Index,
pub const List = BlockList(@This()); pub const List = BlockList(@This());
pub const Index = List.Index; pub const Index = List.Index;
@ -239,10 +429,23 @@ pub const Return = struct {
pub const Allocation = List.Allocation; pub const Allocation = List.Allocation;
}; };
pub const Cast = struct {
value: Value.Index,
type: Type.Index,
pub const List = BlockList(@This());
pub const Index = List.Index;
pub const Allocation = List.Allocation;
};
pub const CallingConvention = enum {
system_v,
};
pub const Value = union(enum) { pub const Value = union(enum) {
unresolved: Unresolved, unresolved: Unresolved,
declaration: Declaration.Index, declaration: Declaration.Index,
declaration_reference: Declaration.Index, declaration_reference: Declaration.Reference,
void, void,
bool: bool, bool: bool,
undefined, undefined,
@ -258,35 +461,118 @@ pub const Value = union(enum) {
call: Call.Index, call: Call.Index,
argument_list: ArgumentList, argument_list: ArgumentList,
@"return": Return.Index, @"return": Return.Index,
argument: Declaration.Index,
string_literal: u32,
enum_field: Enum.Field.Index,
extern_function: Function.Prototype.Index,
sign_extend: Cast.Index,
zero_extend: Cast.Index,
pub const List = BlockList(@This()); pub const List = BlockList(@This());
pub const Index = List.Index; pub const Index = List.Index;
pub const Allocation = List.Allocation; pub const Allocation = List.Allocation;
pub const Integer = struct {
value: u64,
type: Type.Index,
signedness: Type.Integer.Signedness,
pub fn getBitCount(integer: Integer, module: *Module) u16 {
return module.types.get(integer.type).integer.bit_count;
}
};
pub fn isComptime(value: Value) bool { pub fn isComptime(value: Value) bool {
return switch (value) { return switch (value) {
.bool, .void, .undefined, .function => true, .bool, .void, .undefined, .function, .type, .enum_field => true,
else => false, .integer => |integer| integer.type.eq(Type.comptime_int),
}; .call => false,
}
pub fn getType(value: *Value, module: *Module) Type.Index {
return switch (value.*) {
.call => |call_index| module.calls.get(call_index).type,
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
} }
};
pub const Integer = struct { pub fn getType(value: Value, module: *Module) Type.Index {
value: u64, const result = switch (value) {
type: Type.Integer, .call => |call_index| module.calls.get(call_index).type,
.integer => |integer| integer.type,
.declaration_reference => |declaration_reference| declaration_reference.type,
.string_literal => |string_literal_hash| module.string_literal_types.get(@intCast(module.getStringLiteral(string_literal_hash).?.len)).?,
.type => Type.type,
.enum_field => |enum_field_index| module.enums.get(module.enum_fields.get(enum_field_index).parent).type,
.function => |function_index| module.functions.get(function_index).prototype,
else => |t| @panic(@tagName(t)),
};
return result;
}
// pub fn setType(value: *Value, new_type: Type.Index) void {
// switch (value.*) {
// .integer => value.integer.type = new_type,
// else => |t| @panic(@tagName(t)),
// }
// }
const TypeCheckError = error{
integer_size,
pointer_many_differ,
pointer_element_type_differ,
};
pub fn typeCheck(value: *Value, module: *Module, type_to_check_index: Type.Index) TypeCheckError!void {
const value_type_index = value.getType(module);
if (!value_type_index.eq(type_to_check_index)) {
const value_type = module.types.get(value_type_index);
const check_type = module.types.get(type_to_check_index);
if (std.meta.activeTag(value_type.*) == std.meta.activeTag(check_type.*)) {
switch (value_type.*) {
.integer => |coercee_int| {
if (check_type.integer.getSize() < coercee_int.getSize()) {
return error.integer_size;
}
},
.pointer => |coercee_pointer| {
if (coercee_pointer.many != check_type.pointer.many) {
return error.pointer_many_differ;
}
if (!coercee_pointer.element_type.eq(check_type.pointer.element_type)) {
if (check_type.pointer.many) {
const coercee_element_type = module.types.get(coercee_pointer.element_type);
switch (coercee_element_type.*) {
.array => |array| if (!array.element_type.eq(check_type.pointer.element_type)) {
return error.pointer_element_type_differ;
},
else => |t| @panic(@tagName(t)),
}
}
}
},
else => |t| @panic(@tagName(t)),
}
} else {
switch (check_type.*) {
.integer => {
switch (value_type.*) {
.comptime_int => switch (value.*) {
.integer => value.integer.type = type_to_check_index,
.declaration_reference => value.declaration_reference.type = type_to_check_index,
else => |t| @panic(@tagName(t)),
},
else => |t| @panic(@tagName(t)),
}
},
else => |t| @panic(@tagName(t)),
}
}
}
}
}; };
pub const Module = struct { pub const Module = struct {
main_package: *Package, main_package: *Package,
import_table: StringArrayHashMap(*File) = .{}, import_table: StringArrayHashMap(*File) = .{},
string_table: AutoHashMap(u32, []const u8) = .{}, string_table: StringKeyMap([]const u8) = .{},
declarations: BlockList(Declaration) = .{}, declarations: BlockList(Declaration) = .{},
structs: BlockList(Struct) = .{}, structs: BlockList(Struct) = .{},
scopes: BlockList(Scope) = .{}, scopes: BlockList(Scope) = .{},
@ -301,12 +587,22 @@ pub const Module = struct {
assignments: BlockList(Assignment) = .{}, assignments: BlockList(Assignment) = .{},
syscalls: BlockList(Syscall) = .{}, syscalls: BlockList(Syscall) = .{},
calls: BlockList(Call) = .{}, calls: BlockList(Call) = .{},
argument_list: BlockList(ArgumentList) = .{}, argument_lists: BlockList(ArgumentList) = .{},
returns: BlockList(Return) = .{}, returns: BlockList(Return) = .{},
string_literals: StringKeyMap([]const u8) = .{},
enums: BlockList(Enum) = .{},
enum_fields: BlockList(Enum.Field) = .{},
function_name_map: data_structures.AutoArrayHashMap(Function.Index, u32) = .{},
arrays: BlockList(Array) = .{},
casts: BlockList(Cast) = .{},
string_literal_types: data_structures.AutoArrayHashMap(u32, Type.Index) = .{},
array_types: data_structures.AutoArrayHashMap(Array, Type.Index) = .{},
entry_point: ?u32 = null, entry_point: ?u32 = null,
pub const Descriptor = struct { pub const Descriptor = struct {
main_package_path: []const u8, main_package_path: []const u8,
executable_path: []const u8,
target: std.Target,
}; };
const ImportFileResult = struct { const ImportFileResult = struct {
@ -343,8 +639,10 @@ pub const Module = struct {
unreachable; unreachable;
} }
const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_name }); const current_file_relative_path_to_package_directory = std.fs.path.dirname(current_file.relative_path) orelse "";
const file_relative_path = std.fs.path.basename(full_path); const import_file_relative_path = try std.fs.path.join(allocator, &.{ current_file_relative_path_to_package_directory, import_name });
const full_path = try std.fs.path.join(allocator, &.{ current_file.package.directory.path, import_file_relative_path });
const file_relative_path = import_file_relative_path;
const package = current_file.package; const package = current_file.package;
const import_file = try module.getFile(allocator, full_path, file_relative_path, package); const import_file = try module.getFile(allocator, full_path, file_relative_path, package);
@ -358,12 +656,6 @@ pub const Module = struct {
return result; return result;
} }
fn lookupDeclaration(module: *Module, hashed: u32) !noreturn {
_ = hashed;
_ = module;
while (true) {}
}
fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult { fn getFile(module: *Module, allocator: Allocator, full_path: []const u8, relative_path: []const u8, package: *Package) !ImportFileResult {
const path_lookup = try module.import_table.getOrPut(allocator, full_path); const path_lookup = try module.import_table.getOrPut(allocator, full_path);
const file, const index = switch (path_lookup.found_existing) { const file, const index = switch (path_lookup.found_existing) {
@ -399,6 +691,7 @@ pub const Module = struct {
pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult { pub fn importPackage(module: *Module, allocator: Allocator, package: *Package) !ImportPackageResult {
const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path }); const full_path = try std.fs.path.resolve(allocator, &.{ package.directory.path, package.source_path });
print("Import full path: {s}\n", .{full_path});
const import_file = try module.getFile(allocator, full_path, package.source_path, package); const import_file = try module.getFile(allocator, full_path, package.source_path, package);
try import_file.ptr.addPackageReference(allocator, package); try import_file.ptr.addPackageReference(allocator, package);
@ -408,8 +701,8 @@ pub const Module = struct {
}; };
} }
pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file: *File) !void { pub fn generateAbstractSyntaxTreeForFile(module: *Module, allocator: Allocator, file_index: File.Index) !void {
_ = module; const file = module.files.get(file_index);
const source_file = file.package.directory.handle.openFile(file.relative_path, .{}) catch |err| { const source_file = file.package.directory.handle.openFile(file.relative_path, .{}) catch |err| {
std.debug.panic("Can't find file {s} in directory {s} for error {s}", .{ file.relative_path, file.package.directory.path, @errorName(err) }); std.debug.panic("Can't find file {s} in directory {s} for error {s}", .{ file.relative_path, file.package.directory.path, @errorName(err) });
}; };
@ -425,8 +718,70 @@ pub const Module = struct {
file.source_code = file_buffer[0..read_byte_count]; file.source_code = file_buffer[0..read_byte_count];
file.status = .loaded_into_memory; file.status = .loaded_into_memory;
try file.lex(allocator); try file.lex(allocator, file_index);
try file.parse(allocator); print("Start of parsing file #{}\n", .{file_index.uniqueInteger()});
try file.parse(allocator, file_index);
print("End of parsing file #{}\n", .{file_index.uniqueInteger()});
}
fn getString(map: *StringKeyMap([]const u8), key: u32) ?[]const u8 {
return map.getValue(key);
}
fn addString(map: *StringKeyMap([]const u8), allocator: Allocator, string: []const u8) !u32 {
const lookup_result = try map.getOrPut(allocator, string, string);
{
const lookup_name = map.getValue(lookup_result.key) orelse unreachable;
assert(equal(u8, lookup_name, string));
}
return lookup_result.key;
}
pub fn getName(module: *Module, key: u32) ?[]const u8 {
return getString(&module.string_table, key);
}
pub fn addName(module: *Module, allocator: Allocator, name: []const u8) !u32 {
return addString(&module.string_table, allocator, name);
}
pub fn getStringLiteral(module: *Module, key: u32) ?[]const u8 {
return getString(&module.string_literals, key);
}
pub fn addStringLiteral(module: *Module, allocator: Allocator, string_literal: []const u8) !u32 {
const result = addString(&module.string_literals, allocator, string_literal);
const len: u32 = @intCast(string_literal.len);
// try analyzer.module.
const string_literal_type_gop = try module.string_literal_types.getOrPut(allocator, len);
if (!string_literal_type_gop.found_existing) {
const array = Array{
.element_type = Type.u8,
.element_count = len,
};
const array_type_gop = try module.array_types.getOrPut(allocator, array);
if (!array_type_gop.found_existing) {
const array_type_allocation = try module.types.append(allocator, .{
.array = array,
});
array_type_gop.value_ptr.* = array_type_allocation.index;
}
const array_type_index = array_type_gop.value_ptr.*;
const pointer_type_allocation = try module.types.append(allocator, .{
.pointer = .{
.@"const" = true,
.many = true,
.element_type = array_type_index,
},
});
string_literal_type_gop.value_ptr.* = pointer_type_allocation.index;
}
return result;
} }
}; };
@ -448,7 +803,18 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
// TODO: generate an actual file // TODO: generate an actual file
const builtin_file_name = "builtin.nat"; const builtin_file_name = "builtin.nat";
var cache_dir = try compilation.build_directory.openDir("cache", .{}); var cache_dir = try compilation.build_directory.openDir("cache", .{});
const builtin_file = try cache_dir.createFile(builtin_file_name, .{ .truncate = false }); const builtin_file = try cache_dir.createFile(builtin_file_name, .{});
try builtin_file.writer().print(
\\const builtin = #import("std").builtin;
\\const cpu = builtin.Cpu.{s};
\\const os = builtin.Os.{s};
\\const abi = builtin.Abi.{s};
\\
, .{
@tagName(descriptor.target.cpu.arch),
@tagName(descriptor.target.os.tag),
@tagName(descriptor.target.abi),
});
builtin_file.close(); builtin_file.close();
const module: *Module = try compilation.base_allocator.create(Module); const module: *Module = try compilation.base_allocator.create(Module);
@ -508,14 +874,80 @@ pub fn compileModule(compilation: *Compilation, descriptor: Module.Descriptor) !
_ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?); _ = try module.importPackage(compilation.base_allocator, module.main_package.dependencies.get("std").?);
for (module.import_table.values()) |import| { for (module.import_table.values()) |import| {
try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, import); try module.generateAbstractSyntaxTreeForFile(compilation.base_allocator, module.files.indexOf(import));
} }
const main_declaration = try semantic_analyzer.initialize(compilation, module, packages[0], .{ .block = 0, .index = 0 }); inline for (@typeInfo(FixedTypeKeyword).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, switch (@field(FixedTypeKeyword, enum_field.name)) {
.usize => @unionInit(Type, "integer", .{
.bit_count = 64,
.signedness = .unsigned,
}),
.ssize => @unionInit(Type, "integer", .{
.bit_count = 64,
.signedness = .signed,
}),
else => @unionInit(Type, enum_field.name, {}),
});
}
var ir = try intermediate_representation.initialize(compilation, module, packages[0], main_declaration); inline for (@typeInfo(HardwareUnsignedIntegerType).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, .{
.integer = .{
.signedness = .unsigned,
.bit_count = switch (@field(HardwareUnsignedIntegerType, enum_field.name)) {
.u8 => 8,
.u16 => 16,
.u32 => 32,
.u64 => 64,
},
},
});
}
try emit.get(.x86_64).initialize(compilation.base_allocator, &ir); inline for (@typeInfo(HardwareSignedIntegerType).Enum.fields) |enum_field| {
_ = try module.types.append(compilation.base_allocator, .{
.integer = .{
.signedness = .signed,
.bit_count = switch (@field(HardwareSignedIntegerType, enum_field.name)) {
.s8 => 8,
.s16 => 16,
.s32 => 32,
.s64 => 64,
},
},
});
}
for (extra_common_type_data) |type_data| {
_ = try module.types.append(compilation.base_allocator, type_data);
}
_ = try module.values.append(compilation.base_allocator, .{
.bool = false,
});
_ = try module.values.append(compilation.base_allocator, .{
.bool = true,
});
_ = try module.values.append(compilation.base_allocator, .{
.@"unreachable" = {},
});
const value_allocation = try module.values.append(compilation.base_allocator, .{
.unresolved = .{
.node_index = .{ .value = 0 },
},
});
try semantic_analyzer.initialize(compilation, module, packages[0], value_allocation.ptr);
const ir = try intermediate_representation.initialize(compilation, module);
switch (descriptor.target.cpu.arch) {
inline else => |arch| try emit.get(arch).initialize(compilation.base_allocator, ir, descriptor),
}
} }
fn generateAST() !void {} fn generateAST() !void {}
@ -544,6 +976,7 @@ pub const File = struct {
syntactic_analyzer_result: syntactic_analyzer.Result = undefined, syntactic_analyzer_result: syntactic_analyzer.Result = undefined,
package_references: ArrayList(*Package) = .{}, package_references: ArrayList(*Package) = .{},
file_references: ArrayList(*File) = .{}, file_references: ArrayList(*File) = .{},
type: Type.Index = Type.Index.invalid,
relative_path: []const u8, relative_path: []const u8,
package: *Package, package: *Package,
@ -569,18 +1002,18 @@ pub const File = struct {
try file.file_references.append(allocator, affected); try file.file_references.append(allocator, affected);
} }
fn lex(file: *File, allocator: Allocator) !void { fn lex(file: *File, allocator: Allocator, file_index: File.Index) !void {
assert(file.status == .loaded_into_memory); assert(file.status == .loaded_into_memory);
file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code); file.lexical_analyzer_result = try lexical_analyzer.analyze(allocator, file.source_code, file_index);
// if (!@import("builtin").is_test) { // if (!@import("builtin").is_test) {
// print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time}); // print("[LEXICAL ANALYSIS] {} ns\n", .{file.lexical_analyzer_result.time});
// } // }
file.status = .lexed; file.status = .lexed;
} }
fn parse(file: *File, allocator: Allocator) !void { fn parse(file: *File, allocator: Allocator, file_index: File.Index) !void {
assert(file.status == .lexed); assert(file.status == .lexed);
file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code); file.syntactic_analyzer_result = try syntactic_analyzer.analyze(allocator, file.lexical_analyzer_result.tokens.items, file.source_code, file_index);
// if (!@import("builtin").is_test) { // if (!@import("builtin").is_test) {
// print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time}); // print("[SYNTACTIC ANALYSIS] {} ns\n", .{file.syntactic_analyzer_result.time});
// } // }

0
src/backend/aarch64.zig Normal file
View File

281
src/backend/elf.zig Normal file
View File

@ -0,0 +1,281 @@
const std = @import("std");
const assert = std.debug.assert;
const equal = std.mem.eql;
const data_structures = @import("../data_structures.zig");
const Allocator = data_structures.Allocator;
const ArrayList = data_structures.ArrayList;
const emit = @import("emit.zig");
pub const Writer = struct {
bytes: ArrayList(u8),
allocator: Allocator,
pub fn init(allocator: Allocator) !Writer {
return .{
.bytes = try ArrayList(u8).initCapacity(allocator, 0x10000),
.allocator = allocator,
};
}
pub fn getHeader(writer: *Writer) *Header {
return @ptrCast(@alignCast(writer.bytes.items.ptr));
}
pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void {
const section_fields = @typeInfo(@TypeOf(image.sections)).Struct.fields;
const section_count = blk: {
var result: u16 = 0;
inline for (section_fields) |section_field| {
const section_size = @field(image.sections, section_field.name).index;
result += @intFromBool(section_size > 0);
}
break :blk result;
};
const program_header_count = section_count;
const program_start_offset = @sizeOf(Header) + program_header_count * @sizeOf(ProgramHeader);
var section_offsets: [section_fields.len]u32 = undefined;
const program_end_offset = blk: {
var result: u32 = program_start_offset;
inline for (section_fields, 0..) |section_field, section_index| {
const section = &@field(image.sections, section_field.name);
if (section.index > 0) {
const section_offset = std.mem.alignForward(u32, result, section.alignment);
section_offsets[section_index] = section_offset;
result = std.mem.alignForward(u32, section_offset + @as(u32, @intCast(section.index)), section.alignment);
}
}
break :blk result;
};
const elf_file_end_offset = program_end_offset + @sizeOf(SectionHeader) * section_count;
try writer.bytes.resize(writer.allocator, elf_file_end_offset);
const base_address = 0x200000;
writer.getHeader().* = Header{
.endianness = .little,
.machine = switch (image.target.cpu.arch) {
.x86_64 => .AMD64,
else => unreachable,
},
.os_abi = switch (image.target.os.tag) {
.linux => .systemv,
else => unreachable,
},
.entry = base_address + section_offsets[0] + image.entry_point,
.section_header_offset = program_end_offset,
.program_header_count = program_header_count,
.section_header_count = section_count,
.name_section_header_index = 0,
};
var program_header_offset: usize = @sizeOf(Header);
var section_header_offset = program_end_offset;
inline for (section_fields, section_offsets) |section_field, section_offset| {
const section_name = section_field.name;
const section = &@field(image.sections, section_name);
if (section.index > 0) {
const program_header: *ProgramHeader = @ptrCast(@alignCast(writer.bytes.items[program_header_offset..].ptr));
program_header.* = .{
.type = .load,
.flags = .{
.executable = equal(u8, section_name, "text"),
.writable = equal(u8, section_name, "data"),
.readable = true,
},
.offset = 0,
.virtual_address = base_address,
.physical_address = base_address,
.size_in_file = section.index,
.size_in_memory = section.index,
.alignment = 0,
};
const source = section.content[0..section.index];
const destination = writer.bytes.items[section_offset..][0..source.len];
@memcpy(destination, source);
const section_header: *SectionHeader = @ptrCast(@alignCast(writer.bytes.items[section_header_offset..].ptr));
section_header.* = .{
.name_offset = 0,
.type = .program_data,
.flags = .{
.alloc = equal(u8, section_name, "text"),
.executable = equal(u8, section_name, "text"),
.writable = equal(u8, section_name, "data"),
},
.address = base_address + section_offset,
.offset = section_offset,
.size = section.index,
.link = 0,
.info = 0,
.alignment = 0,
.entry_size = 0,
};
}
}
}
pub fn writeToFile(writer: *const Writer, file_path: []const u8) !void {
std.debug.print("Writing file to {s}\n", .{file_path});
const flags = switch (@import("builtin").os.tag) {
.windows => .{},
else => .{
.mode = 0o777,
},
};
const file_descriptor = try std.fs.cwd().createFile(file_path, flags);
try file_descriptor.writeAll(writer.bytes.items);
file_descriptor.close();
}
pub fn writeToFileAbsolute(writer: *const Writer, absolute_file_path: []const u8) !void {
const file = try std.fs.createFileAbsolute(absolute_file_path, .{});
defer file.close();
try file.writeAll(writer.bytes.items);
}
};
const Header = extern struct {
magic: u8 = 0x7f,
elf_id: [3]u8 = "ELF".*,
bit_count: BitCount = .@"64",
endianness: Endianness = .little,
header_version: u8 = 1,
os_abi: ABI,
abi_version: u8 = 0,
padding: [7]u8 = [_]u8{0} ** 7,
object_type: ObjectFileType = .executable, // e_type
machine: Machine,
version: u32 = 1,
entry: u64,
program_header_offset: u64 = std.mem.alignForward(u16, @sizeOf(Header), @alignOf(ProgramHeader)),
section_header_offset: u64,
flags: u32 = 0,
header_size: u16 = 0x40,
program_header_size: u16 = @sizeOf(ProgramHeader),
program_header_count: u16 = 1,
section_header_size: u16 = @sizeOf(SectionHeader),
section_header_count: u16,
name_section_header_index: u16,
const BitCount = enum(u8) {
@"32" = 1,
@"64" = 2,
};
const ABI = enum(u8) {
systemv = 0,
};
const ObjectFileType = enum(u16) {
none = 0,
relocatable = 1,
executable = 2,
dynamic = 3,
core = 4,
lo_os = 0xfe00,
hi_os = 0xfeff,
lo_proc = 0xff00,
hi_proc = 0xffff,
};
const Machine = enum(u16) {
AMD64 = 0x3e,
};
const Endianness = enum(u8) {
little = 1,
big = 2,
};
};
const ProgramHeader = extern struct {
type: Type = .load,
flags: Flags,
offset: u64,
virtual_address: u64,
physical_address: u64,
size_in_file: u64,
size_in_memory: u64,
alignment: u64 = 0,
const Type = enum(u32) {
null = 0,
load = 1,
dynamic = 2,
interpreter = 3,
note = 4,
shlib = 5, // reserved
program_header = 6,
tls = 7,
lo_os = 0x60000000,
hi_os = 0x6fffffff,
lo_proc = 0x70000000,
hi_proc = 0x7fffffff,
};
const Flags = packed struct(u32) {
executable: bool,
writable: bool,
readable: bool,
reserved: u29 = 0,
};
};
const SectionHeader = extern struct {
name_offset: u32,
type: Type,
flags: Flags,
address: u64,
offset: u64,
size: u64,
// section index
link: u32,
info: u32,
alignment: u64,
entry_size: u64,
// type
const Type = enum(u32) {
null = 0,
program_data = 1,
symbol_table = 2,
string_table = 3,
relocation_entries_addends = 4,
symbol_hash_table = 5,
dynamic_linking_info = 6,
notes = 7,
program_space_no_data = 8,
relocation_entries = 9,
reserved = 10,
dynamic_linker_symbol_table = 11,
array_of_constructors = 14,
array_of_destructors = 15,
array_of_pre_constructors = 16,
section_group = 17,
extended_section_indices = 18,
number_of_defined_types = 19,
start_os_specific = 0x60000000,
};
const Flags = packed struct(u64) {
writable: bool,
alloc: bool,
executable: bool,
reserved: bool = false,
mergeable: bool = false,
contains_null_terminated_strings: bool = false,
info_link: bool = false,
link_order: bool = false,
os_non_conforming: bool = false,
section_group: bool = false,
tls: bool = false,
_reserved: u53 = 0,
};
};

View File

@ -6,17 +6,25 @@ const assert = std.debug.assert;
const expect = std.testing.expect; const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual; const expectEqual = std.testing.expectEqual;
const Compilation = @import("../Compilation.zig");
const ir = @import("intermediate_representation.zig"); const ir = @import("intermediate_representation.zig");
const data_structures = @import("../data_structures.zig"); const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList; const ArrayList = data_structures.ArrayList;
const AutoHashMap = data_structures.AutoHashMap; const AutoHashMap = data_structures.AutoHashMap;
const mmap = data_structures.mmap;
const elf = @import("elf.zig");
const pe = @import("pe.zig");
const macho = @import("macho.zig");
const jit_callconv = .SysV; const jit_callconv = .SysV;
const Section = struct { const Section = struct {
content: []align(page_size) u8, content: []align(page_size) u8,
index: usize = 0, index: usize = 0,
alignment: u32 = 0x10,
}; };
pub const Result = struct { pub const Result = struct {
@ -26,63 +34,34 @@ pub const Result = struct {
data: Section, data: Section,
}, },
entry_point: u32 = 0, entry_point: u32 = 0,
target: std.Target,
pub fn create() !Result { pub fn create(target: std.Target) !Result {
return Result{ return Result{
.sections = .{ .sections = .{
.text = .{ .content = try mmap(page_size, .{ .executable = true }) }, .text = .{ .content = try mmap(page_size, .{ .executable = true }) },
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
.data = .{ .content = try mmap(page_size, .{ .executable = false }) }, .data = .{ .content = try mmap(page_size, .{ .executable = false }) },
}, },
}; .target = target,
}
fn mmap(size: usize, flags: packed struct {
executable: bool,
}) ![]align(page_size) u8 {
return switch (@import("builtin").os.tag) {
.windows => blk: {
const windows = std.os.windows;
break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size];
},
.linux, .macos => |os_tag| blk: {
const jit = switch (os_tag) {
.macos => 0x800,
.linux => 0,
else => unreachable,
};
const execute_flag: switch (os_tag) {
.linux => u32,
.macos => c_int,
else => unreachable,
} = if (flags.executable) std.os.PROT.EXEC else 0;
const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag);
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit;
break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
},
else => @compileError("OS not supported"),
}; };
} }
pub fn appendCode(image: *Result, code: []const u8) void { pub fn appendCode(image: *Result, code: []const u8) void {
std.debug.print("New code: ", .{});
for (code) |byte| {
std.debug.print("0x{x} ", .{byte});
}
std.debug.print("\n", .{});
const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
@memcpy(destination, code); @memcpy(destination, code);
image.sections.text.index += code.len; image.sections.text.index += code.len;
} }
pub fn appendCodeByte(image: *Result, code_byte: u8) void { pub fn appendCodeByte(image: *Result, code_byte: u8) void {
std.debug.print("New code: 0x{x}\n", .{code_byte});
image.sections.text.content[image.sections.text.index] = code_byte; image.sections.text.content[image.sections.text.index] = code_byte;
image.sections.text.index += 1; image.sections.text.index += 1;
} }
fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType { fn getEntryPoint(image: *const Result, comptime FunctionType: type) *const FunctionType {
if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) {
data_structures.pthread_jit_write_protect_np(true);
}
comptime { comptime {
assert(@typeInfo(FunctionType) == .Fn); assert(@typeInfo(FunctionType) == .Fn);
} }
@ -90,6 +69,18 @@ pub const Result = struct {
assert(image.sections.text.content.len > 0); assert(image.sections.text.content.len > 0);
return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point])); return @as(*const FunctionType, @ptrCast(&image.sections.text.content[image.entry_point]));
} }
fn writeElf(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void {
var writer = try elf.Writer.init(allocator);
try writer.writeToMemory(image);
try writer.writeToFile(executable_relative_path);
}
fn writePe(image: *const Result, allocator: Allocator, executable_relative_path: []const u8) !void {
var writer = try pe.Writer.init(allocator);
try writer.writeToMemory(image);
try writer.writeToFile(executable_relative_path);
}
}; };
pub fn InstructionSelector(comptime Instruction: type) type { pub fn InstructionSelector(comptime Instruction: type) type {
@ -117,33 +108,45 @@ pub fn InstructionSelector(comptime Instruction: type) type {
pub fn get(comptime arch: std.Target.Cpu.Arch) type { pub fn get(comptime arch: std.Target.Cpu.Arch) type {
const backend = switch (arch) { const backend = switch (arch) {
.x86_64 => @import("x86_64.zig"), .x86_64 => @import("x86_64.zig"),
else => @compileError("Architecture not supported"), .aarch64 => @import("aarch64.zig"),
else => {},
}; };
return struct { return struct {
pub fn initialize(allocator: Allocator, intermediate: *ir.Result) !void { pub fn initialize(allocator: Allocator, intermediate: *ir.Result, descriptor: Compilation.Module.Descriptor) !void {
std.debug.print("Entry point: {}\n", .{intermediate.entry_point}); switch (arch) {
var mir = try backend.MIR.generate(allocator, intermediate); .x86_64 => {
try mir.allocateRegisters(allocator, intermediate); var mir = try backend.MIR.selectInstructions(allocator, intermediate, descriptor.target);
const result = try mir.encode(intermediate); try mir.allocateRegisters();
const os = descriptor.target.os.tag;
_ = os;
const image = try mir.encode();
_ = image;
const text_section = result.sections.text.content[0..result.sections.text.index]; // switch (os) {
for (text_section) |byte| { // .linux => try image.writeElf(allocator, descriptor.executable_path),
std.debug.print("0x{x}\n", .{byte}); // .windows => try image.writePe(allocator, descriptor.executable_path),
} // else => unreachable,
// }
switch (@import("builtin").os.tag) { },
.linux => switch (@import("builtin").cpu.arch == arch) { else => {
true => { const file = try std.fs.cwd().readFileAlloc(allocator, "main", std.math.maxInt(u64));
std.debug.print("Executing...\n", .{}); try macho.interpretFile(allocator, descriptor, file);
const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn);
entryPoint();
std.debug.print("This should not print...\n", .{});
},
false => {},
}, },
else => {},
} }
// switch (@import("builtin").os.tag) {
// .linux => switch (@import("builtin").cpu.arch == arch) {
// true => {
// std.debug.print("Executing...\n", .{});
// const entryPoint = result.getEntryPoint(fn () callconv(.SysV) noreturn);
// entryPoint();
// std.debug.print("This should not print...\n", .{});
// },
// false => {},
// },
// else => {},
// }
} }
}; };
} }

File diff suppressed because it is too large Load Diff

682
src/backend/macho.zig Normal file
View File

@ -0,0 +1,682 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const equal = std.mem.eql;
const print = std.debug.print;
const Compilation = @import("../Compilation.zig");
const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList;
const mmap = data_structures.mmap;
const Header = extern struct {
magic: u32 = magic,
cpu_type: CpuType,
cpu_subtype: extern union {
arm: ArmSubType,
x86: X86SubType,
},
file_type: FileType,
load_command_count: u32,
load_command_size: u32,
flags: Flags,
reserved: u32 = 0,
const magic = 0xfeedfacf;
const CpuType = enum(u32) {
VAX = 0x00000001,
ROMP = 0x00000002,
NS32032 = 0x00000004,
NS32332 = 0x00000005,
MC680x0 = 0x00000006,
x86 = 0x00000007,
MIPS = 0x00000008,
NS32352 = 0x00000009,
MC98000 = 0x0000000A,
HPPA = 0x0000000B,
ARM = 0x0000000C,
MC88000 = 0x0000000D,
SPARC = 0x0000000E,
i860be = 0x0000000F,
i860_le = 0x00000010,
RS6000 = 0x00000011,
PowerPC = 0x00000012,
arm64 = 0x0000000C | abi64,
x86_64 = 0x00000007 | abi64,
const abi64 = 0x01000000;
};
const ArmSubType = enum(u32) {
all = 0x00000000,
ARM_A500_ARCH = 0x00000001,
ARM_A500 = 0x00000002,
ARM_A440 = 0x00000003,
ARM_M4 = 0x00000004,
ARM_V4T = 0x00000005,
ARM_V6 = 0x00000006,
ARM_V5TEJ = 0x00000007,
ARM_XSCALE = 0x00000008,
ARM_V7 = 0x00000009,
ARM_V7F = 0x0000000A,
ARM_V7S = 0x0000000B,
ARM_V7K = 0x0000000C,
ARM_V8 = 0x0000000D,
ARM_V6M = 0x0000000E,
ARM_V7M = 0x0000000F,
ARM_V7EM = 0x00000010,
_,
};
const X86SubType = enum(u32) {
All = 0x00000003,
@"486" = 0x00000004,
@"486SX" = 0x00000084,
Pentium_M5 = 0x00000056,
Celeron = 0x00000067,
Celeron_Mobile = 0x00000077,
Pentium_3 = 0x00000008,
Pentium_3_M = 0x00000018,
Pentium_3_XEON = 0x00000028,
Pentium_4 = 0x0000000A,
Itanium = 0x0000000B,
Itanium_2 = 0x0000001B,
XEON = 0x0000000C,
XEON_MP = 0x0000001C,
_,
};
const FileType = enum(u32) {
relocatable_object = 0x00000001,
demand_paged_executable = 0x00000002,
fixed_vm_shared_library = 0x00000003,
core = 0x00000004,
preloaded_executable = 0x00000005,
dynamic_shared_library = 0x00000006,
dynamic_link_editor = 0x00000007,
dynamic_bundle = 0x00000008,
shared_library_stub = 0x00000009,
debug_companion = 0x0000000A,
x86_64_kext = 0x0000000B,
archive = 0x0000000C,
};
const Flags = packed struct(u32) {
no_undefined_references: bool = true,
incrementally_linked: bool = false,
dynamic_linker_input: bool = true,
dynamic_linker_bound_undefined_references: bool = false,
prebound_dynamic_undefined_references: bool = false,
split_ro_and_rw_segments: bool = false,
_: bool = false,
two_level_namespace_bindings: bool = true,
no_symbol_multiple_definition_in_subimages: bool = false,
no_dyld_prebinding_agent_notification: bool = false,
can_redo_prebinding: bool = false,
bind_two_level_namespaces_to_libraries: bool = false,
safe_to_split_sections_for_dead_code_stripping: bool = false,
canonicalized_by_unprebinding: bool = false,
final_external_weak_symbols: bool = false,
final_weak_symbols: bool = false,
all_stacks_execute_protected: bool = false,
safe_for_zero_uid: bool = false,
safe_for_ugid: bool = false,
no_check_dependent_dylibs_for_reexport: bool = false,
load_at_random_address: bool = false,
no_load_command_for_unreferenced_dylib: bool = true,
thread_local_variable_section: bool = false,
run_with_non_executable_heap: bool = false,
code_linked_for_application_use: bool = false,
nlist_external_symbols_not_all_dyld_info_symbols: bool = false,
allow_lc_min_version_macos_lc_build_version: bool = false,
reserved: u4 = 0,
dylib_only: bool = false,
};
};
const UniversalHeader = extern struct {
magic: u32 = magic,
binary_count: u32,
const magic = 0xcafebabe;
};
const LoadCommand = extern struct {
type: Type,
size: u32,
const Type = enum(u32) {
segment32 = 0x01,
symbol_table = 0x02,
symbol_table_information = 0x0b,
load_dylib = 0x0c,
id_dylib = 0x0d,
load_dylinker = 0x0e,
id_dylinker = 0x0f,
optional_dynamic_library = 0x18,
segment64 = 0x19,
uuid_number = 0x1b,
code_signature = 0x1d,
compressed_linkedit_table = 0x22,
function_starts = 0x26,
data_in_code = 0x29,
source_version = 0x2a,
minimum_os_version = 0x32,
dyld_exports_trie = 0x80000033,
dyld_chained_fixups = 0x80000034,
dyld_main_entry_point = 0x80000028,
};
const Segment64 = extern struct {
type: Type = .segment64,
size: u32,
name: [16]u8,
address: u64,
address_size: u64,
file_offset: u64,
file_size: u64,
maximum_virtual_memory_protections: VirtualMemoryProtection,
initial_virtual_memory_protections: VirtualMemoryProtection,
section_count: u32,
flags: Flags,
const VirtualMemoryProtection = packed struct(u32) {
read: bool,
write: bool,
execute: bool,
reserved: u29 = 0,
};
const Flags = packed struct(u32) {
vm_space_high_part: bool = false,
vm_fixed_library: bool = false,
no_relocation: bool = false,
protected_segment: bool = false,
read_only_after_relocations: bool = false,
reserved: u27 = 0,
};
const Section = extern struct {
name: [16]u8,
segment_name: [16]u8,
address: u64,
size: u64,
file_offset: u32,
alignment: u32,
relocation_file_offset: u32,
relocation_count: u32,
type: Section.Type,
reserved: u8 = 0,
flags: Section.Flags,
reserved0: u32 = 0,
reserved1: u32 = 0,
reserved2: u32 = 0,
comptime {
assert(@sizeOf(Section) == 80);
}
const Type = enum(u8) {
regular = 0,
only_non_lazy_symbol_pointers = 0b110,
only_lazy_symbol_pointers_only_symbol_stubs = 0b111,
zero_fill_on_demand_section = 0b1100,
only_lazy_pointers_to_lazy_loaded_dylibs = 0b10000,
};
const Flags = packed struct(u16) {
local_relocations: bool = false,
external_relocations: bool = false,
some_machine_instructions: bool = false,
reserved: u5 = 0,
reserved2: u1 = 0,
debug_section: bool = false,
i386_code_stubs: bool = false,
live_blocks_if_reference_live_blocks: bool = false,
no_dead_stripping: bool = false,
strip_static_symbols_dyldlink_flag: bool = false,
coalesced_symbols: bool = false,
only_machine_instructions: bool = false,
};
};
fn getSize(section_count: u32) u32 {
return @sizeOf(LoadCommand.Segment64) + section_count * @sizeOf(LoadCommand.Segment64.Section);
}
};
const LinkeditData = extern struct {
type: Type,
size: u32 = 16,
data_offset: u32,
data_size: u32,
};
const SymbolTable = extern struct {
type: Type,
size: u32 = 24,
symbol_offset: u32,
symbol_count: u32,
string_table_offset: u32,
string_table_size: u32,
};
const SymbolTableInformation = extern struct {
type: Type,
size: u32 = 80,
local_symbol_index: u32,
local_symbol_count: u32,
external_symbol_index: u32,
external_symbol_count: u32,
undefined_symbol_index: u32,
undefined_symbol_count: u32,
content_table_offset: u32,
content_table_entry_count: u32,
module_table_offset: u32,
module_table_entry_count: u32,
referenced_symbol_table_offset: u32,
referenced_symbol_table_entry_count: u32,
indirect_symbol_table_offset: u32,
indirect_symbol_table_entry_count: u32,
external_relocation_offset: u32,
external_relocation_entry_count: u32,
local_relocation_offset: u32,
local_relocation_entry_count: u32,
};
const Dylinker = extern struct {
type: Type,
size: u32,
name_offset: u32 = 12,
};
const Dylib = extern struct {
type: Type,
size: u32,
name_offset: u32,
timestamp: u32,
current_version: u32,
compatibility_version: u32,
};
const Uuid = extern struct {
type: Type,
size: u32,
uuid: [16]u8,
};
const MinimumVersion = extern struct {
type: Type,
size: u32,
version: u32,
sdk: u32,
};
const SourceVersion = extern struct {
type: Type,
size: u32,
version: u64,
};
const EntryPoint = extern struct {
type: Type,
size: u32,
entry_offset: u64,
stack_size: u64,
};
};
const Writer = struct {
items: []u8,
index: usize = 0,
address_offset: usize = 0,
file_offset: usize = 0,
load_command_size: u32,
segment_count: u16,
segment_index: u16 = 0,
segment_offset: u16 = @sizeOf(Header),
linkedit_segment_address_offset: u64 = 0,
linkedit_segment_file_offset: u64 = 0,
linkedit_segment_size: u32 = 0,
fn getWrittenBytes(writer: *const Writer) []const u8 {
return writer.items[0..writer.index];
}
fn append(writer: *Writer, bytes: []const u8) void {
writer.writeBytesAt(bytes, writer.index);
writer.index += bytes.len;
}
fn writeBytesAt(writer: *Writer, bytes: []const u8, offset: usize) void {
@memcpy(writer.items[offset..][0..bytes.len], bytes);
}
const SegmentCreation = struct {
name: []const u8,
sections: []const SectionCreation,
protection: LoadCommand.Segment64.VirtualMemoryProtection,
};
const SectionCreation = struct {
name: []const u8,
bytes: []const u8,
alignment: u32 = 1,
flags: LoadCommand.Segment64.Section.Flags,
};
fn writeSegment(writer: *Writer, descriptor: SegmentCreation) void {
assert(writer.segment_index < writer.segment_count);
defer writer.segment_index += 1;
const segment_name = blk: {
var result = [1]u8{0} ** 16;
@memcpy(result[0..descriptor.name.len], descriptor.name);
break :blk result;
};
if (equal(u8, descriptor.name, "__PAGEZERO")) {
assert(writer.segment_offset == @sizeOf(Header));
const address_size = 4 * 1024 * 1024 * 1024;
writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64{
.size = @sizeOf(LoadCommand.Segment64),
.name = segment_name,
.address = 0,
.address_size = address_size,
.file_offset = 0,
.file_size = 0,
.maximum_virtual_memory_protections = descriptor.protection,
.initial_virtual_memory_protections = descriptor.protection,
.section_count = @intCast(descriptor.sections.len),
.flags = .{},
}), writer.segment_offset);
writer.address_offset += address_size;
writer.segment_offset += @sizeOf(LoadCommand.Segment64);
} else if (equal(u8, descriptor.name, "__TEXT")) {
const original_offset = writer.segment_offset;
assert(original_offset == @sizeOf(Header) + @sizeOf(LoadCommand.Segment64));
writer.segment_offset += @sizeOf(LoadCommand.Segment64);
const text_metadata_offset = @sizeOf(Header) + writer.load_command_size;
var section_address_offset = writer.address_offset + text_metadata_offset;
var section_file_offset = writer.file_offset + text_metadata_offset;
for (descriptor.sections) |section| {
section_address_offset = std.mem.alignForward(usize, section_address_offset, section.alignment);
section_file_offset = std.mem.alignForward(usize, section_file_offset, section.alignment);
writer.writeBytesAt(std.mem.asBytes(&LoadCommand.Segment64.Section{
.name = blk: {
var result = [1]u8{0} ** 16;
@memcpy(result[0..section.name.len], section.name);
break :blk result;
},
.segment_name = segment_name,
.address = section_address_offset,
.size = section.bytes.len,
.file_offset = @intCast(section_file_offset),
.alignment = std.math.log2(section.alignment),
.relocation_file_offset = 0,
.relocation_count = 0,
.type = .regular,
.flags = section.flags,
}), writer.segment_offset);
@memcpy(writer.items[section_file_offset..][0..section.bytes.len], section.bytes);
section_address_offset += section.bytes.len;
section_file_offset += section.bytes.len;
writer.segment_offset += @sizeOf(LoadCommand.Segment64.Section);
}
const end_segment_offset = writer.segment_offset;
writer.segment_offset = original_offset;
const size = end_segment_offset - writer.file_offset;
const aligned_size = std.mem.alignForward(usize, size, 16 * 1024);
writer.append(std.mem.asBytes(&LoadCommand.Segment64{
.size = @sizeOf(LoadCommand.Segment64),
.name = segment_name,
.address = writer.address_offset,
.address_size = aligned_size,
.file_offset = writer.file_offset,
.file_size = aligned_size,
.maximum_virtual_memory_protections = descriptor.protection,
.initial_virtual_memory_protections = descriptor.protection,
.section_count = @intCast(descriptor.sections.len),
.flags = .{},
}));
writer.segment_offset = end_segment_offset;
writer.address_offset += aligned_size;
writer.file_offset += aligned_size;
} else {
unreachable;
}
}
fn writeLinkeditData(writer: *Writer, bytes: []const u8, load_command_type: LoadCommand.Type) void {
if (writer.linkedit_segment_size == 0) {
writer.linkedit_segment_address_offset = writer.address_offset;
writer.linkedit_segment_file_offset = writer.file_offset;
}
const data_size: u32 = @intCast(bytes.len);
@memcpy(writer.items[writer.file_offset..][0..data_size], bytes);
writer.append(std.mem.asBytes(&LoadCommand.LinkeditData{
.type = load_command_type,
.data_offset = @intCast(writer.linkedit_segment_file_offset),
.data_size = data_size,
}));
writer.address_offset += data_size;
writer.file_offset += data_size;
writer.linkedit_segment_size += data_size;
}
};
pub fn interpretFile(allocator: Allocator, descriptor: Compilation.Module.Descriptor, file: []const u8) !void {
_ = allocator;
_ = descriptor;
const header: *const Header = @ptrCast(@alignCast(file.ptr));
print("Header : {}\n", .{header});
assert(header.magic == Header.magic);
var text_segment: LoadCommand.Segment64 = undefined;
const load_command_start: *const LoadCommand = @ptrCast(@alignCast(file[@sizeOf(Header)..].ptr));
var load_command_ptr = load_command_start;
for (0..header.load_command_count) |_| {
const load_command = load_command_ptr.*;
switch (load_command.type) {
.segment64 => {
const segment_load_command: *const LoadCommand.Segment64 = @ptrCast(@alignCast(load_command_ptr));
const text_segment_name = "__TEXT";
if (equal(u8, segment_load_command.name[0..text_segment_name.len], text_segment_name)) {
text_segment = segment_load_command.*;
}
print("SLC: {}\n", .{segment_load_command});
print("segment name: {s}\n", .{segment_load_command.name});
const section_ptr: [*]const LoadCommand.Segment64.Section = @ptrFromInt(@intFromPtr(segment_load_command) + @sizeOf(LoadCommand.Segment64));
const sections = section_ptr[0..segment_load_command.section_count];
for (sections) |section| {
print("{}\n", .{section});
print("Section name: {s}. Segment name: {s}\n", .{ section.name, section.segment_name });
}
},
.dyld_chained_fixups => {
const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.dyld_exports_trie => {
const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.symbol_table => {
const command: *const LoadCommand.SymbolTable = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.symbol_table_information => {
const command: *const LoadCommand.SymbolTableInformation = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.load_dylinker => {
const command: *const LoadCommand.Dylinker = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
const name: [*:0]const u8 = @ptrFromInt(@intFromPtr(command) + command.name_offset);
print("Name: {s}\n", .{name});
},
.uuid_number => {
const command: *const LoadCommand.Uuid = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.minimum_os_version => {
const command: *const LoadCommand.MinimumVersion = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.source_version => {
const command: *const LoadCommand.SourceVersion = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.dyld_main_entry_point => {
const command: *const LoadCommand.EntryPoint = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.load_dylib => {
const command: *const LoadCommand.Dylib = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
print("Dylib: {s}\n", .{@as([*:0]const u8, @ptrFromInt(@intFromPtr(load_command_ptr) + @sizeOf(LoadCommand.Dylib)))});
},
.function_starts => {
const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.data_in_code => {
const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
.code_signature => {
const command: *const LoadCommand.LinkeditData = @ptrCast(@alignCast(load_command_ptr));
print("command: {}\n", .{command});
},
else => |t| @panic(@tagName(t)),
}
load_command_ptr = @ptrFromInt(@intFromPtr(load_command_ptr) + load_command.size);
}
// const load_command_end = load_command_ptr;
// const load_command_size = @intFromPtr(load_command_end) - @intFromPtr(load_command_start);
// assert(load_command_size == header.load_command_size);
const segment_count = 3;
var writer = Writer{
.items = try mmap(0x100000, .{}),
.load_command_size = segment_count * @sizeOf(LoadCommand.Segment64) +
2 * @sizeOf(LoadCommand.Segment64.Section) +
@sizeOf(LoadCommand.LinkeditData) +
@sizeOf(LoadCommand.LinkeditData) +
@sizeOf(LoadCommand.SymbolTable) +
@sizeOf(LoadCommand.SymbolTableInformation) +
@sizeOf(LoadCommand.Dylinker) + std.mem.alignForward(u32, "/usr/lib/dyld".len, 8) +
@sizeOf(LoadCommand.Uuid) +
@sizeOf(LoadCommand.MinimumVersion) +
@sizeOf(LoadCommand.EntryPoint) +
@sizeOf(LoadCommand.Dylib) + std.mem.alignForward(u32, "/usr/lib/libSystem.B.dylib".len, 8) +
3 * @sizeOf(LoadCommand.LinkeditData),
.segment_count = segment_count,
};
writer.index = @sizeOf(Header);
writer.writeSegment(.{
.name = "__PAGEZERO",
.sections = &.{},
.protection = .{
.read = false,
.write = false,
.execute = false,
},
});
writer.writeSegment(.{
.name = "__TEXT",
.sections = &.{
.{
.name = "__text",
.bytes = &.{
0x00, 0x00, 0x80, 0x52,
0xc0, 0x03, 0x5f, 0xd6,
},
.alignment = 4,
.flags = .{
.only_machine_instructions = true,
},
},
.{
.name = "__unwind_info",
.bytes = &.{
0x01, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00,
0xb0, 0x3f, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00,
0xb9, 0x3f, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00,
0x0c, 0x00, 0x01, 0x00,
0x10, 0x00, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x02,
},
.alignment = 4,
.flags = .{},
},
},
.protection = .{
.read = true,
.write = false,
.execute = true,
},
});
// TODO: write this later
// writer.writeSegment(.{
// .name = "__LINKEDIT",
// .sections = &.{},
// .protection = .{
// .read = true,
// .write = false,
// .execute = false,
// },
// });
assert(writer.segment_index == writer.segment_count - 1);
writer.index = writer.segment_offset + @sizeOf(LoadCommand.Segment64);
for (file[16384 + 56 ..][0..48]) |b| {
print("0x{x}, ", .{b});
}
const chained_fixup_bytes = &.{ 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
writer.writeLinkeditData(chained_fixup_bytes, .dyld_chained_fixups);
const export_trie_bytes = &.{ 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
writer.writeLinkeditData(export_trie_bytes, .dyld_exports_trie);
unreachable;
// writer.writeSymbolTable(
}
// .bytes = &.{
// 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x5f, 0x0, 0x9, 0x2, 0x0, 0x0, 0x0, 0x0, 0x2, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, 0x3, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb0, 0x7f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0xf, 0x1, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x16, 0x0, 0x0, 0x0, 0xf, 0x1, 0x0, 0x0, 0xb0, 0x3f, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x20, 0x0, 0x5f, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfa, 0xde, 0xc, 0xc0, 0x0, 0x0, 0x1, 0x11, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14, 0xfa, 0xde, 0xc, 0x2, 0x0, 0x0, 0x0, 0xfd, 0x0, 0x2, 0x4, 0x0, 0x0, 0x2, 0x0, 0x2, 0x0, 0x0, 0x0, 0x5d, 0x0, 0x0, 0x0, 0x58, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x40, 0xb0, 0x20, 0x2, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x6d, 0x61, 0x69, 0x6e, 0x0, 0xb2, 0x2a, 0x3, 0x79, 0x1b, 0x82, 0xf4, 0x71, 0xf1, 0xae, 0xfa, 0x44, 0x53, 0xe0, 0xc2, 0x78, 0x1e, 0x56, 0xd1, 0x9b, 0x36, 0x37, 0x7b, 0x7e, 0x61, 0xf5, 0x8a, 0x59, 0xc4, 0xf0, 0x64, 0x56, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0xad, 0x7f, 0xac, 0xb2, 0x58, 0x6f, 0xc6, 0xe9, 0x66, 0xc0, 0x4, 0xd7, 0xd1, 0xd1, 0x6b, 0x2, 0x4f, 0x58, 0x5, 0xff, 0x7c, 0xb4, 0x7c, 0x7a, 0x85, 0xda, 0xbd, 0x8b, 0x48, 0x89, 0x2c, 0xa7, 0x8, 0xdb, 0xee, 0xf5, 0x95, 0x71, 0x3e, 0xcb, 0x29, 0xff, 0x3f, 0x28, 0x46, 0xf0, 0xdc, 0x97, 0xbf, 0x2d, 0x3, 0xf2, 0xec, 0xc, 0x84, 0xa, 0x44, 0x90, 0xf, 0xe0, 0xf4, 0xea, 0x67, 0x97, 0x6b, 0xb0, 0x22, 0x2, 0x0, 0xa7, 0xed, 0x94, 0xb2, 0x3d, 0x86, 0x4d, 0x13, 0xd6, 0xa4, 0xe, 0x1c, 0x1a, 0x6b, 0x9b, 0x82, 0xa0, 0xeb, 0x28, 0x23, 0xfe, 0x8a, 0x51, 0x2a, 0xe5, 0xf9, 0x39,
// },

266
src/backend/pe.zig Normal file
View File

@ -0,0 +1,266 @@
const std = @import("std");
const assert = std.debug.assert;
const print = std.debug.print;
const Allocator = std.mem.Allocator;
const data_structures = @import("../data_structures.zig");
const ArrayList = data_structures.ArrayList;
const emit = @import("emit.zig");
pub const Writer = struct {
in_file: []const u8,
items: []u8,
index: usize = 0,
allocator: Allocator,
pub fn init(allocator: Allocator) !Writer {
const file = try std.fs.cwd().readFileAlloc(allocator, "main.exe", 0xfffffffffffff);
const len = std.mem.alignForward(usize, file.len, 0x1000);
return Writer{
.in_file = file,
.items = try data_structures.mmap(len, .{}),
.allocator = allocator,
};
}
pub fn writeToMemory(writer: *Writer, image: *const emit.Result) !void {
print("File len: {}\n", .{writer.in_file.len});
const dos_header: *const ImageDosHeader = @ptrCast(@alignCast(writer.in_file.ptr));
print("File address: {}\n", .{dos_header.file_address_of_new_exe_header});
print("File: {s}\n", .{writer.in_file[0x40..]});
for (writer.in_file[0x40..], 0..) |byte, index| {
if (byte == 'T') {
print("Index: {}\n", .{index});
break;
}
}
assert(dos_header.magic_number == ImageDosHeader.magic);
// assert(dos_header.file_address_of_new_exe_header == @sizeOf(ImageDosHeader));
print("{}\n", .{dos_header});
const file_header: *const ImageFileHeader = @ptrCast(@alignCast(writer.in_file[dos_header.file_address_of_new_exe_header + 4 ..].ptr));
print("File header: {}\n", .{file_header});
writer.append(std.mem.asBytes(&ImageDosHeader{
.file_address_of_new_exe_header = 208,
}));
while (writer.index < 208) : (writer.index += 1) {
writer.append(&.{0});
}
writer.append(std.mem.asBytes(&image_NT_signature));
writer.append(std.mem.asBytes(&ImageFileHeader{
.machine = switch (image.target.cpu.arch) {
.x86_64 => .amd64,
.aarch64 => .arm64,
else => @panic("Architecture"),
},
.section_count = 3,
.time_date_stamp = @intCast(std.time.timestamp()),
}));
const kernel32 = blk: {
var library = Library{
.name = "KERNEL32.DLL",
};
try library.symbols.append(writer.allocator, Symbol{
.name = "ExitProcess",
});
break :blk library;
};
const libraries = &[_]Library{kernel32};
_ = libraries;
const code = &.{
0x48, 0x83, 0xec, 0x28, //subq $40, %rsp
0xb9, 0x2a, 0x00, 0x00, 0x00, //movl $42, %ecx
0xff, 0x15, 0xf1, 0x0f, 0x00, 0x00, //callq *4081(%rip) # 0x140002000
0xcc,
};
_ = code;
const pdata = &.{
0x00, 0x10,
0x00, 0x00,
0x10, 0x10,
0x00, 0x00,
0x28, 0x21,
0x00, 0x00,
};
_ = pdata;
// TODO
// writer.append(std.mem.asBytes(ImageOptionalHeader{
// .magic = ImageOptionalHeader.magic,
// .size_of_code = code.len,
// }));
unreachable;
}
fn append(writer: *Writer, bytes: []const u8) void {
const destination = writer.items[writer.index..][0..bytes.len];
const source = bytes;
@memcpy(destination, source);
writer.index += bytes.len;
}
pub fn writeToFile(writer: *Writer, executable_relative_path: []const u8) !void {
_ = writer;
_ = executable_relative_path;
unreachable;
}
};
const ImageDosHeader = extern struct {
magic_number: u16 = magic,
bytes_last_page_of_file: u16 = 0,
pages_in_file: u16 = 0,
relocations: u16 = 0,
size_of_header_in_paragraphs: u16 = 0,
minimum_extra_paragraphs: u16 = 0,
maximum_extra_paragraphs: u16 = 0,
initial_ss_value: u16 = 0,
initial_sp_value: u16 = 0,
cheksum: u16 = 0,
initial_ip_value: u16 = 0,
initial_cs_value: u16 = 0,
file_address_of_relocation_table: u16 = 0,
overlay_number: u16 = 0,
reserved_words: [4]u16 = .{0} ** 4,
oem_id: u16 = 0,
oem_info: u16 = 0,
reserved_words2: [10]u16 = .{0} ** 10,
file_address_of_new_exe_header: u32 = @sizeOf(ImageDosHeader),
const magic = 0x5a4d;
comptime {
assert(@sizeOf(ImageDosHeader) == 64);
}
};
const image_NT_signature: u32 = 0x00004550;
/// COFF header format
const ImageFileHeader = extern struct {
machine: ImageFileMachine,
section_count: u16,
time_date_stamp: u32,
symbol_table_offset: u32 = 0,
symbol_count: u32 = 0,
size_of_optional_header: u16 = @sizeOf(ImageOptionalHeader),
characteristics: Characteristics = .{},
const Characteristics = packed struct(u16) {
relocations_stripped: bool = false,
executable_image: bool = true,
stripped_line_count: bool = false,
stripped_local_symbols: bool = false,
aggressive_ws_trim: bool = false,
large_address_aware: bool = true,
reserved: u1 = 0,
bytes_reversed_lo: bool = false,
machine_32bit: bool = false,
stripped_debug: bool = false,
removable_run_from_swap: bool = false,
net_run_from_swap: bool = false,
system: bool = false,
dll: bool = false,
up_systems_only: bool = false,
bytes_reversed_hi: bool = false,
};
};
const ImageFileMachine = enum(u16) {
unknown = 0,
target_host = 0x0001, // Useful for indicating we want to interact with the host and not a WoW guest.
i386 = 0x014c, // Intel 386.
r3000 = 0x0162, // MIPS little-endian, 0x160 big-endian
r4000 = 0x0166, // MIPS little-endian
r10000 = 0x0168, // MIPS little-endian
wcemipsv2 = 0x0169, // MIPS little-endian WCE v2
alpha = 0x0184, // Alpha_AXP
sh3 = 0x01a2, // SH3 little-endian
sh3dsp = 0x01a3,
sh3e = 0x01a4, // SH3E little-endian
sh4 = 0x01a6, // SH4 little-endian
sh5 = 0x01a8, // SH5
arm = 0x01c0, // ARM Little-Endian
thumb = 0x01c2, // ARM Thumb/Thumb-2 Little-Endian
armnt = 0x01c4, // ARM Thumb-2 Little-Endian
am33 = 0x01d3,
powerpc = 0x01F0, // IBM PowerPC Little-Endian
powerpcfp = 0x01f1,
ia64 = 0x0200, // Intel 64
mips16 = 0x0266, // MIPS
alpha64 = 0x0284, // ALPHA64
mipsfpu = 0x0366, // MIPS
mipsfpu16 = 0x0466, // MIPS
tricore = 0x0520, // Infineon
cef = 0x0CEF,
ebc = 0x0EBC, // EFI Byte Code
amd64 = 0x8664, // AMD64 (K8)
m32r = 0x9041, // M32R little-endian
arm64 = 0xAA64, // ARM64 Little-Endian
cee = 0xC0EE,
const axp64 = ImageFileMachine.alpha64;
};
const ImageOptionalHeader = extern struct {
magic: u16 = magic,
major_linker_version: u8 = 0,
minor_linker_version: u8 = 0,
size_of_code: u32,
size_of_initialized_data: u32,
size_of_uninitialized_data: u32,
address_of_entry_point: u32,
base_of_code: u32,
image_base: u64,
section_alignment: u32,
file_alignment: u32,
major_os_version: u16,
minor_os_version: u16,
major_image_version: u16,
minor_image_version: u16,
major_subsystem_version: u16,
minor_subsystem_version: u16,
win32_version_value: u32,
size_of_image: u32,
size_of_headers: u32,
checksum: u32,
subsystem: u16,
dll_characteristics: u16,
size_of_stack_reserve: u64,
size_of_stack_commit: u64,
size_of_heap_reserve: u64,
size_of_heap_commit: u64,
loader_flags: u32,
number_of_RVA_and_sizes: u32,
data_directory: [image_number_of_directory_entries]ImageDataDirectory,
const magic = 0x20b;
comptime {
assert(@sizeOf(ImageOptionalHeader) == 0xf0);
}
};
const ImageDataDirectory = extern struct {
virtual_address: u32,
size: u32,
};
const image_number_of_directory_entries = 0x10;
const Library = struct {
symbols: ArrayList(Symbol) = .{},
name: []const u8,
name_virtual_address: u32 = 0,
virtual_address: u32 = 0,
image_thunk_virtual_address: u32 = 0,
};
const Symbol = struct {
name: []const u8,
name_virtual_address: u32 = 0,
offset_in_data: u32 = 0,
};

File diff suppressed because it is too large Load Diff

View File

@ -56,29 +56,42 @@ pub fn BlockList(comptime T: type) type {
assert(index.valid); assert(index.valid);
return @as(u30, @truncate(@as(u32, @bitCast(index)))); return @as(u30, @truncate(@as(u32, @bitCast(index))));
} }
pub fn fromInteger(usize_index: usize) Index {
const index: u32 = @intCast(usize_index);
const block: u24 = @intCast(index / item_count);
const i: u6 = @intCast(index % item_count);
return .{
.index = i,
.block = block,
};
}
}; };
pub const Iterator = struct { pub const Iterator = struct {
block_index: u26, block_index: u24,
element_index: u7, element_index: u6,
list: *const List, list: *const List,
pub fn getCurrentIndex(i: *const Iterator) Index {
return .{
.block = i.block_index,
.index = @intCast(i.element_index),
};
}
pub fn next(i: *Iterator) ?T { pub fn next(i: *Iterator) ?T {
return if (i.nextPointer()) |ptr| ptr.* else null; return if (i.nextPointer()) |ptr| ptr.* else null;
} }
pub fn nextPointer(i: *Iterator) ?*T { pub fn nextPointer(i: *Iterator) ?*T {
if (i.element_index >= item_count) { for (i.block_index..i.list.blocks.items.len) |block_index| {
i.block_index += 1; for (@as(u8, i.element_index)..item_count) |element_index| {
i.element_index = 0; if (i.list.blocks.items[i.block_index].bitset.isSet(element_index)) {
} i.element_index = @intCast(element_index);
i.element_index +%= 1;
while (i.block_index < i.list.blocks.items.len) : (i.block_index += 1) { i.block_index = @as(u24, @intCast(block_index)) + @intFromBool(i.element_index < element_index);
while (i.element_index < item_count) : (i.element_index += 1) { return &i.list.blocks.items[block_index].items[element_index];
if (i.list.blocks.items[i.block_index].bitset.isSet(i.element_index)) {
const index = i.element_index;
i.element_index += 1;
return &i.list.blocks.items[i.block_index].items[index];
} }
} }
} }
@ -136,6 +149,7 @@ pub fn BlockList(comptime T: type) type {
new_block.* = .{}; new_block.* = .{};
const index = new_block.allocateIndex() catch unreachable; const index = new_block.allocateIndex() catch unreachable;
const ptr = &new_block.items[index]; const ptr = &new_block.items[index];
list.first_block += @intFromBool(block_index != 0);
break :blk Allocation{ break :blk Allocation{
.ptr = ptr, .ptr = ptr,
.index = .{ .index = .{
@ -159,13 +173,11 @@ pub fn BlockList(comptime T: type) type {
} }
} }
pub fn indexOf(list: *List, elem: *T) Index { pub fn indexOf(list: *const List, elem: *const T) Index {
const address = @intFromPtr(elem); const address = @intFromPtr(elem);
std.debug.print("Items: {}. Block count: {}\n", .{ list.len, list.blocks.items.len });
for (list.blocks.items, 0..) |*block, block_index| { for (list.blocks.items, 0..) |*block, block_index| {
const base = @intFromPtr(&block.items[0]); const base = @intFromPtr(&block.items[0]);
const top = base + @sizeOf(T) * item_count; const top = base + @sizeOf(T) * item_count;
std.debug.print("Bitset: {}. address: 0x{x}. Base: 0x{x}. Top: 0x{x}\n", .{ block.bitset, address, base, top });
if (address >= base and address < top) { if (address >= base and address < top) {
return .{ return .{
.block = @intCast(block_index), .block = @intCast(block_index),
@ -199,3 +211,113 @@ pub fn enumFromString(comptime E: type, string: []const u8) ?E {
} }
} else null; } else null;
} }
pub fn StringKeyMap(comptime Value: type) type {
return struct {
list: std.MultiArrayList(Data) = .{},
const Key = u32;
const Data = struct {
key: Key,
value: Value,
};
pub fn length(string_map: *@This()) usize {
return string_map.list.len;
}
fn hash(string: []const u8) Key {
const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string));
return string_key;
}
pub fn getKey(string_map: *const @This(), string: []const u8) ?Key {
return if (string_map.getKeyPtr(string)) |key_ptr| key_ptr.* else null;
}
pub fn getKeyPtr(string_map: *const @This(), string_key: Key) ?*const Key {
for (string_map.list.items(.key)) |*key_ptr| {
if (key_ptr.* == string_key) {
return key_ptr;
}
} else {
return null;
}
}
pub fn getValue(string_map: *const @This(), key: Key) ?Value {
if (string_map.getKeyPtr(key)) |key_ptr| {
const index = string_map.indexOfKey(key_ptr);
return string_map.list.items(.value)[index];
} else {
return null;
}
}
pub fn indexOfKey(string_map: *const @This(), key_ptr: *const Key) usize {
return @divExact(@intFromPtr(key_ptr) - @intFromPtr(string_map.list.items(.key).ptr), @sizeOf(Key));
}
const GOP = struct {
key: Key,
found_existing: bool,
};
pub fn getOrPut(string_map: *@This(), allocator: Allocator, string: []const u8, value: Value) !GOP {
const string_key: Key = @truncate(std.hash.Wyhash.hash(0, string));
for (string_map.list.items(.key)) |key| {
if (key == string_key) return .{
.key = string_key,
.found_existing = true,
};
} else {
try string_map.list.append(allocator, .{
.key = string_key,
.value = value,
});
return .{
.key = string_key,
.found_existing = false,
};
}
}
};
}
const page_size = std.mem.page_size;
extern fn pthread_jit_write_protect_np(enabled: bool) void;
pub fn mmap(size: usize, flags: packed struct {
executable: bool = false,
}) ![]align(page_size) u8 {
return switch (@import("builtin").os.tag) {
.windows => blk: {
const windows = std.os.windows;
break :blk @as([*]align(page_size) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size];
},
.linux, .macos => |os_tag| blk: {
const jit = switch (os_tag) {
.macos => 0x800,
.linux => 0,
else => unreachable,
};
const execute_flag: switch (os_tag) {
.linux => u32,
.macos => c_int,
else => unreachable,
} = if (flags.executable) std.os.PROT.EXEC else 0;
const protection_flags: u32 = @intCast(std.os.PROT.READ | std.os.PROT.WRITE | execute_flag);
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE | jit;
const result = try std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
if (@import("builtin").cpu.arch == .aarch64 and @import("builtin").os.tag == .macos) {
if (flags.executable) {
pthread_jit_write_protect_np(false);
}
}
break :blk result;
},
else => @compileError("OS not supported"),
};
}

View File

@ -10,6 +10,7 @@ const ArrayList = data_structures.ArrayList;
const enumFromString = data_structures.enumFromString; const enumFromString = data_structures.enumFromString;
const Compilation = @import("../Compilation.zig"); const Compilation = @import("../Compilation.zig");
const File = Compilation.File;
const fs = @import("../fs.zig"); const fs = @import("../fs.zig");
pub const Token = packed struct(u64) { pub const Token = packed struct(u64) {
@ -35,6 +36,15 @@ pub const Token = packed struct(u64) {
fixed_keyword_fn = 0x0e, fixed_keyword_fn = 0x0e,
fixed_keyword_unreachable = 0x0f, fixed_keyword_unreachable = 0x0f,
fixed_keyword_return = 0x10, fixed_keyword_return = 0x10,
fixed_keyword_ssize = 0x11,
fixed_keyword_usize = 0x12,
fixed_keyword_switch = 0x13,
fixed_keyword_if = 0x14,
fixed_keyword_else = 0x15,
fixed_keyword_struct = 0x16,
fixed_keyword_enum = 0x17,
fixed_keyword_union = 0x18,
fixed_keyword_extern = 0x19,
keyword_unsigned_integer = 0x1f, keyword_unsigned_integer = 0x1f,
keyword_signed_integer = 0x20, keyword_signed_integer = 0x20,
bang = '!', // 0x21 bang = '!', // 0x21
@ -86,6 +96,15 @@ pub const FixedKeyword = enum {
@"fn", @"fn",
@"unreachable", @"unreachable",
@"return", @"return",
ssize,
usize,
@"switch",
@"if",
@"else",
@"struct",
@"enum",
@"union",
@"extern",
}; };
pub const Result = struct { pub const Result = struct {
@ -93,7 +112,8 @@ pub const Result = struct {
time: u64, time: u64,
}; };
pub fn analyze(allocator: Allocator, text: []const u8) !Result { pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) !Result {
_ = file_index;
const time_start = std.time.Instant.now() catch unreachable; const time_start = std.time.Instant.now() catch unreachable;
var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8); var tokens = try ArrayList(Token).initCapacity(allocator, text.len / 8);
var index: usize = 0; var index: usize = 0;
@ -138,7 +158,7 @@ pub fn analyze(allocator: Allocator, text: []const u8) !Result {
inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)),
} else .identifier; } else .identifier;
}, },
'(', ')', '{', '}', '-', '=', ';', '#', '@', ',', '.' => |operator| blk: { '(', ')', '{', '}', '[', ']', '-', '=', ';', '#', '@', ',', '.', ':', '>', '<', '*', '!' => |operator| blk: {
index += 1; index += 1;
break :blk @enumFromInt(operator); break :blk @enumFromInt(operator);
}, },

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,9 @@ const HashMap = data_structures.HashMap;
const lexical_analyzer = @import("lexical_analyzer.zig"); const lexical_analyzer = @import("lexical_analyzer.zig");
const Token = lexical_analyzer.Token; const Token = lexical_analyzer.Token;
const Compilation = @import("../Compilation.zig");
const File = Compilation.File;
pub const Result = struct { pub const Result = struct {
nodes: ArrayList(Node), nodes: ArrayList(Node),
node_lists: ArrayList(Node.List), node_lists: ArrayList(Node.List),
@ -70,7 +73,7 @@ pub const Node = packed struct(u128) {
container_declaration = 6, container_declaration = 6,
string_literal = 7, string_literal = 7,
compiler_intrinsic_one = 8, compiler_intrinsic_one = 8,
simple_variable_declaration = 9, simple_symbol_declaration = 9,
assign = 10, assign = 10,
@"comptime" = 11, @"comptime" = 11,
node_list = 12, node_list = 12,
@ -96,6 +99,32 @@ pub const Node = packed struct(u128) {
main_one = 32, main_one = 32,
main_two = 33, main_two = 33,
main_zero = 34, main_zero = 34,
call_two = 35,
slice_type = 36,
argument_declaration = 37,
compiler_intrinsic = 38,
ssize_type = 39,
usize_type = 40,
void_type = 41,
call = 42,
many_pointer_type = 43,
enum_literal = 44,
address_of = 45,
keyword_false = 46,
compare_equal = 47,
compare_not_equal = 48,
compare_less_than = 49,
compare_greater_than = 50,
compare_less_or_equal = 51,
compare_greater_or_equal = 52,
@"if" = 53,
if_else = 54,
@"switch" = 55,
switch_case = 56,
enum_type = 57,
enum_field = 58,
extern_qualifier = 59,
function_prototype = 60,
}; };
}; };
@ -109,52 +138,105 @@ const Analyzer = struct {
tokens: []const Token, tokens: []const Token,
token_i: u32 = 0, token_i: u32 = 0,
nodes: ArrayList(Node) = .{}, nodes: ArrayList(Node) = .{},
file: []const u8, source_file: []const u8,
file_index: File.Index,
allocator: Allocator, allocator: Allocator,
temporal_node_heap: ArrayList(Node.Index) = .{}, temporal_node_heap: ArrayList(Node.Index) = .{},
node_lists: ArrayList(Node.List) = .{}, node_lists: ArrayList(Node.List) = .{},
fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 { fn expectToken(analyzer: *Analyzer, token_id: Token.Id) !u32 {
if (analyzer.tokens[analyzer.token_i].id == token_id) { const token_i = analyzer.token_i;
const result = analyzer.token_i; const token = analyzer.tokens[token_i];
const is_expected_token = token.id == token_id;
if (is_expected_token) {
analyzer.token_i += 1; analyzer.token_i += 1;
const result = token_i;
return result; return result;
} else { } else {
std.debug.print("Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) });
return error.unexpected_token; return error.unexpected_token;
} }
} }
fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 { fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 {
const token = analyzer.tokens[token_index]; const token = analyzer.tokens[token_index];
return analyzer.file[token.start..][0..token.len]; return analyzer.source_file[token.start..][0..token.len];
} }
fn symbolDeclaration(analyzer: *Analyzer) !Node.Index { fn symbolDeclaration(analyzer: *Analyzer) anyerror!Node.Index {
const first = analyzer.token_i; const first = analyzer.token_i;
assert(analyzer.tokens[first].id == .fixed_keyword_var or analyzer.tokens[first].id == .fixed_keyword_const); assert(analyzer.tokens[first].id == .fixed_keyword_var or analyzer.tokens[first].id == .fixed_keyword_const);
analyzer.token_i += 1; analyzer.token_i += 1;
_ = try analyzer.expectToken(.identifier); const declaration_name_token = try analyzer.expectToken(.identifier);
const declaration_name = analyzer.bytes(declaration_name_token);
std.debug.print("Starting parsing declaration \"{s}\"\n", .{declaration_name});
// TODO: type std.debug.print("Current token: {}\n", .{analyzer.tokens[analyzer.token_i].id});
_ = try analyzer.expectToken(.equal);
const init_node = try analyzer.expression(); const type_node_index = switch (analyzer.tokens[analyzer.token_i].id) {
.colon => blk: {
_ = try analyzer.expectToken(.semicolon); analyzer.token_i += 1;
break :blk try analyzer.typeExpression();
// TODO: },
const type_node = Node.Index.invalid; else => Node.Index.invalid,
const declaration = Node{
.id = .simple_variable_declaration,
.token = first,
.left = type_node,
.right = init_node,
}; };
const declaration_init_node = analyzer.nodes.items[init_node.unwrap()]; _ = try analyzer.expectToken(.equal);
std.debug.print("Declaration init node: {}\n", .{declaration_init_node});
return analyzer.addNode(declaration); const init_node_index = try analyzer.expression();
const init_node = analyzer.nodes.items[init_node_index.unwrap()];
switch (init_node.id) {
.function_definition => {},
else => _ = try analyzer.expectToken(.semicolon),
}
// TODO:
const declaration = Node{
.id = .simple_symbol_declaration,
.token = first,
.left = type_node_index,
.right = init_node_index,
};
std.debug.print("Adding declaration \"{s}\" with init node of type: {s}\n", .{ declaration_name, @tagName(init_node.id) });
// if (analyzer.token_i < analyzer.tokens.len) {
// const first_token = analyzer.tokens[first];
// const last_token = analyzer.tokens[analyzer.token_i];
// const declaration_source_start = first_token.start;
// const declaration_source_end = last_token.start;
//
// std.debug.print("[ALL]\n", .{});
// std.debug.print("Source file ({} bytes) :\n```\n{s}\n```\n", .{ analyzer.source_file.len, analyzer.source_file });
//
// std.debug.print("[BEFORE]\n", .{});
//
// std.debug.print("Tokens before the declaration: ", .{});
// for (analyzer.tokens[0..first]) |t| {
// std.debug.print("{s} ", .{@tagName(t.id)});
// }
// std.debug.print("\n", .{});
// std.debug.print("Source before the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[0..analyzer.tokens[first].start]});
// std.debug.print("[DECLARATION]\n", .{});
//
// std.debug.print("First token: {}\n", .{first_token});
// std.debug.print("Last token: {}\n", .{last_token});
//
// std.debug.print("Tokens including declaration ([{}-{}])", .{ first, analyzer.token_i });
// for (analyzer.tokens[first..][0 .. analyzer.token_i - first]) |t| {
// std.debug.print("{s} ", .{@tagName(t.id)});
// }
// std.debug.print("\n", .{});
//
// std.debug.print("Source for the declaration:\n```\n{s}\n```\n", .{analyzer.source_file[declaration_source_start..declaration_source_end]});
// std.debug.print("[AFTER]\n", .{});
//
// // TODO
// // print("Tokens for file #{}\n", .{analyzer.
// // for (analyzer.tokens[
// }
return try analyzer.addNode(declaration);
} }
fn containerMembers(analyzer: *Analyzer) !Members { fn containerMembers(analyzer: *Analyzer) !Members {
@ -163,6 +245,7 @@ const Analyzer = struct {
while (analyzer.token_i < analyzer.tokens.len) { while (analyzer.token_i < analyzer.tokens.len) {
const first = analyzer.token_i; const first = analyzer.token_i;
std.debug.print("First token for container member: {s}\n", .{@tagName(analyzer.tokens[first].id)});
const member_node_index: Node.Index = switch (analyzer.tokens[first].id) { const member_node_index: Node.Index = switch (analyzer.tokens[first].id) {
.fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) { .fixed_keyword_comptime => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.left_brace => blk: { .left_brace => blk: {
@ -182,6 +265,8 @@ const Analyzer = struct {
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
std.debug.print("Container member {s}\n", .{@tagName(analyzer.nodes.items[member_node_index.unwrap()].id)});
try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index); try analyzer.temporal_node_heap.append(analyzer.allocator, member_node_index);
} }
@ -197,7 +282,11 @@ const Analyzer = struct {
.left = members_array[0], .left = members_array[0],
.right = members_array[1], .right = members_array[1],
}, },
else => |len| std.debug.panic("Len: {}", .{len}), else => |len| .{
.len = len,
.left = try analyzer.nodeList(members_array),
.right = Node.Index.invalid,
},
}; };
return members; return members;
@ -210,13 +299,16 @@ const Analyzer = struct {
const function_prototype = try analyzer.functionPrototype(); const function_prototype = try analyzer.functionPrototype();
const is_comptime = false; const is_comptime = false;
_ = is_comptime; _ = is_comptime;
const function_body = try analyzer.block(.{ .is_comptime = false }); return switch (analyzer.tokens[analyzer.token_i].id) {
return analyzer.addNode(.{ .left_brace => try analyzer.addNode(.{
.id = .function_definition, .id = .function_definition,
.token = token, .token = token,
.left = function_prototype, .left = function_prototype,
.right = function_body, .right = try analyzer.block(.{ .is_comptime = false }),
}); }),
.semicolon => function_prototype,
else => |t| @panic(@tagName(t)),
};
} }
fn functionPrototype(analyzer: *Analyzer) !Node.Index { fn functionPrototype(analyzer: *Analyzer) !Node.Index {
@ -225,12 +317,46 @@ const Analyzer = struct {
const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis); const arguments = try analyzer.argumentList(.left_parenthesis, .right_parenthesis);
const return_type = try analyzer.typeExpression(); const return_type = try analyzer.typeExpression();
return analyzer.addNode(.{ const simple_function_prototype = try analyzer.addNode(.{
.id = .simple_function_prototype, .id = .simple_function_prototype,
.token = token, .token = token,
.left = arguments, .left = arguments,
.right = return_type, .right = return_type,
}); });
return switch (analyzer.tokens[analyzer.token_i].id) {
.semicolon, .left_brace => simple_function_prototype,
else => blk: {
var list = Node.List{};
while (true) {
const attribute = switch (analyzer.tokens[analyzer.token_i].id) {
.semicolon, .left_brace => break,
.fixed_keyword_extern => b: {
const result = try analyzer.addNode(.{
.id = .extern_qualifier,
.token = analyzer.token_i,
.left = Node.Index.invalid,
.right = Node.Index.invalid,
});
analyzer.token_i += 1;
break :b result;
},
else => b: {
if (true) unreachable;
break :b undefined;
},
};
try list.append(analyzer.allocator, attribute);
}
break :blk try analyzer.addNode(.{
.id = .function_prototype,
.token = token,
.left = simple_function_prototype,
.right = try analyzer.nodeList(list.items),
});
},
};
} }
fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index { fn argumentList(analyzer: *Analyzer, maybe_start_token: ?Token.Id, end_token: Token.Id) !Node.Index {
@ -240,14 +366,32 @@ const Analyzer = struct {
var list = ArrayList(Node.Index){}; var list = ArrayList(Node.Index){};
var foo = false;
while (analyzer.tokens[analyzer.token_i].id != end_token) { while (analyzer.tokens[analyzer.token_i].id != end_token) {
@panic("TODO: argument list"); const identifier = try analyzer.expectToken(.identifier);
_ = try analyzer.expectToken(.colon);
const type_expression = try analyzer.typeExpression();
// const type_expression_node = analyzer.nodes.items[type_expression.unwrap()];
// _ = type_expression_node;
// std.debug.print("Type expression node: {}\n", .{type_expression_node});
foo = true;
if (analyzer.tokens[analyzer.token_i].id == .comma) {
analyzer.token_i += 1;
}
try list.append(analyzer.allocator, try analyzer.addNode(.{
.id = .argument_declaration,
.token = identifier,
.left = type_expression,
.right = Node.Index.invalid,
}));
} }
_ = try analyzer.expectToken(end_token); _ = try analyzer.expectToken(end_token);
if (list.items.len != 0) { if (list.items.len != 0) {
@panic("TODO: arguments"); return try analyzer.nodeList(list.items);
} else { } else {
return Node.Index.invalid; return Node.Index.invalid;
} }
@ -266,6 +410,7 @@ const Analyzer = struct {
while (analyzer.tokens[analyzer.token_i].id != .right_brace) { while (analyzer.tokens[analyzer.token_i].id != .right_brace) {
const first_statement_token = analyzer.tokens[analyzer.token_i]; const first_statement_token = analyzer.tokens[analyzer.token_i];
std.debug.print("First statement token: {s}\n", .{@tagName(first_statement_token.id)});
const statement_index = switch (first_statement_token.id) { const statement_index = switch (first_statement_token.id) {
.identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) { .identifier => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.colon => { .colon => {
@ -274,11 +419,17 @@ const Analyzer = struct {
else => try analyzer.assignExpressionStatement(), else => try analyzer.assignExpressionStatement(),
}, },
.fixed_keyword_unreachable, .fixed_keyword_return => try analyzer.assignExpressionStatement(), .fixed_keyword_unreachable, .fixed_keyword_return => try analyzer.assignExpressionStatement(),
.fixed_keyword_while => try analyzer.whileStatement(options),
.fixed_keyword_while => try analyzer.whileExpression(options),
.fixed_keyword_switch => try analyzer.switchExpression(),
.fixed_keyword_if => try analyzer.ifExpression(),
.fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(), .fixed_keyword_const, .fixed_keyword_var => try analyzer.symbolDeclaration(),
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
const node = analyzer.nodes.items[statement_index.unwrap()];
std.debug.print("Adding statement: {s}\n", .{@tagName(node.id)});
try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index); try analyzer.temporal_node_heap.append(analyzer.allocator, statement_index);
} }
@ -323,10 +474,11 @@ const Analyzer = struct {
.right = Node.Index.invalid, .right = Node.Index.invalid,
}, },
}; };
return analyzer.addNode(node); return analyzer.addNode(node);
} }
fn whileStatement(analyzer: *Analyzer, options: Options) error{ OutOfMemory, unexpected_token, not_implemented }!Node.Index { fn whileExpression(analyzer: *Analyzer, options: Options) anyerror!Node.Index {
const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while); const while_identifier_index = try analyzer.expectToken(.fixed_keyword_while);
_ = try analyzer.expectToken(.left_parenthesis); _ = try analyzer.expectToken(.left_parenthesis);
@ -344,10 +496,117 @@ const Analyzer = struct {
}); });
} }
fn switchExpression(analyzer: *Analyzer) anyerror!Node.Index {
std.debug.print("Parsing switch...\n", .{});
const switch_token = analyzer.token_i;
analyzer.token_i += 1;
_ = try analyzer.expectToken(.left_parenthesis);
const switch_expression = try analyzer.expression();
_ = try analyzer.expectToken(.right_parenthesis);
std.debug.print("Parsed switch expression...\n", .{});
_ = try analyzer.expectToken(.left_brace);
var list = Node.List{};
while (analyzer.tokens[analyzer.token_i].id != .right_brace) {
const case_token = analyzer.token_i;
std.debug.print("Parsing switch case...\n", .{});
const case_node = switch (analyzer.tokens[case_token].id) {
.fixed_keyword_else => blk: {
analyzer.token_i += 1;
break :blk Node.Index.invalid;
},
else => blk: {
var array_list = Node.List{};
while (true) {
try array_list.append(analyzer.allocator, try analyzer.expression());
switch (analyzer.tokens[analyzer.token_i].id) {
.comma => analyzer.token_i += 1,
.equal => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.greater => break,
else => {},
},
else => {},
}
}
break :blk switch (array_list.items.len) {
0 => unreachable,
1 => array_list.items[0],
else => try analyzer.nodeList(array_list.items),
};
},
};
_ = try analyzer.expectToken(.equal);
_ = try analyzer.expectToken(.greater);
const is_left_brace = analyzer.tokens[analyzer.token_i].id == .left_brace;
const expr = switch (is_left_brace) {
true => try analyzer.block(.{
.is_comptime = false,
}),
false => try analyzer.assignExpression(),
};
_ = try analyzer.expectToken(.comma);
const node = try analyzer.addNode(.{
.id = .switch_case,
.token = case_token,
.left = case_node,
.right = expr,
});
try list.append(analyzer.allocator, node);
}
_ = try analyzer.expectToken(.right_brace);
return try analyzer.addNode(.{
.id = .@"switch",
.token = switch_token,
.left = switch_expression,
.right = try analyzer.nodeList(list.items),
});
}
fn ifExpression(analyzer: *Analyzer) anyerror!Node.Index {
const if_token = analyzer.token_i;
analyzer.token_i += 1;
_ = try analyzer.expectToken(.left_parenthesis);
const if_expression = try analyzer.expression();
_ = try analyzer.expectToken(.right_parenthesis);
const if_block = try analyzer.block(.{ .is_comptime = false });
const if_node = try analyzer.addNode(.{
.id = .@"if",
.token = if_token,
.left = if_expression,
.right = if_block,
});
const result = switch (analyzer.tokens[analyzer.token_i].id) {
.fixed_keyword_else => blk: {
analyzer.token_i += 1;
break :blk try analyzer.addNode(.{
.id = .if_else,
.token = if_token,
.left = if_node,
.right = try analyzer.expression(),
});
},
else => if_node,
};
return result;
}
fn assignExpression(analyzer: *Analyzer) !Node.Index { fn assignExpression(analyzer: *Analyzer) !Node.Index {
const expr = try analyzer.expression(); const expr = try analyzer.expression();
const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) { const expression_id: Node.Id = switch (analyzer.tokens[analyzer.token_i].id) {
.semicolon => return expr, .semicolon, .comma => return expr,
.equal => .assign, .equal => .assign,
else => |t| @panic(@tagName(t)), else => |t| @panic(@tagName(t)),
}; };
@ -363,7 +622,7 @@ const Analyzer = struct {
.right = try analyzer.expression(), .right = try analyzer.expression(),
}; };
std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right }); std.debug.print("assign:\nleft: {}.\nright: {}\n", .{ node.left, node.right });
return analyzer.addNode(node); return try analyzer.addNode(node);
} }
fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index { fn compilerIntrinsic(analyzer: *Analyzer) !Node.Index {
@ -390,7 +649,7 @@ const Analyzer = struct {
const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..]; const parameters = analyzer.temporal_node_heap.items[temporal_heap_top..];
return switch (parameters.len) { return try switch (parameters.len) {
1 => analyzer.addNode(.{ 1 => analyzer.addNode(.{
.id = .compiler_intrinsic_one, .id = .compiler_intrinsic_one,
.token = hash, .token = hash,
@ -403,59 +662,114 @@ const Analyzer = struct {
.left = parameters[0], .left = parameters[0],
.right = parameters[1], .right = parameters[1],
}), }),
else => unreachable, else => analyzer.addNode(.{
.id = .compiler_intrinsic,
.token = hash,
.left = try analyzer.nodeList(parameters),
.right = Node.Index.invalid,
}),
}; };
} }
fn expression(analyzer: *Analyzer) error{ OutOfMemory, not_implemented, unexpected_token }!Node.Index { fn expression(analyzer: *Analyzer) anyerror!Node.Index {
return analyzer.expressionPrecedence(0); return try analyzer.expressionPrecedence(0);
} }
fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index {
var result = try analyzer.prefixExpression(); var result = try analyzer.prefixExpression();
if (result.valid) {
const prefix_node = analyzer.nodes.items[result.unwrap()];
std.debug.print("Prefix: {}\n", .{prefix_node.id});
}
var banned_precedence: i32 = -1; var banned_precedence: i32 = -1;
while (analyzer.token_i < analyzer.tokens.len) { while (analyzer.token_i < analyzer.tokens.len) {
const precedence: i32 = switch (analyzer.tokens[analyzer.token_i].id) { const token = analyzer.tokens[analyzer.token_i];
.equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period => -1, // std.debug.print("Looping in expression precedence with token {}\n", .{token});
else => |t| @panic(@tagName(t)), const precedence: i32 = switch (token.id) {
.equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period, .fixed_keyword_const, .fixed_keyword_var => -1,
.bang => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.equal => 30,
else => unreachable,
},
else => |t| {
const start = token.start;
std.debug.print("Source file:\n```\n{s}\n```\n", .{analyzer.source_file[start..]});
@panic(@tagName(t));
},
}; };
std.debug.print("Precedence: {} ({s}) (file #{})\n", .{ precedence, @tagName(token.id), analyzer.file_index.uniqueInteger() });
if (precedence < minimum_precedence) { if (precedence < minimum_precedence) {
std.debug.print("Breaking for minimum_precedence\n", .{});
break; break;
} }
if (precedence == banned_precedence) { if (precedence == banned_precedence) {
std.debug.print("Breaking for banned precedence\n", .{});
break; break;
} }
const operator_token = analyzer.token_i;
const is_bang_equal = analyzer.tokens[operator_token].id == .bang and analyzer.tokens[operator_token + 1].id == .equal;
analyzer.token_i += @as(u32, 1) + @intFromBool(is_bang_equal);
// TODO: fix this // TODO: fix this
const node_index = try analyzer.expressionPrecedence(1); const right = try analyzer.expressionPrecedence(precedence + 1);
_ = node_index;
unreachable; const operation_id: Node.Id = switch (is_bang_equal) {
true => .compare_not_equal,
false => switch (analyzer.tokens[operator_token].id) {
else => |t| @panic(@tagName(t)),
},
};
result = try analyzer.addNode(.{
.id = operation_id,
.token = operator_token,
.left = result,
.right = right,
});
const associativity: Associativity = switch (operation_id) {
.compare_equal, .compare_not_equal, .compare_less_than, .compare_greater_than, .compare_less_or_equal, .compare_greater_or_equal => .none,
else => .left,
};
if (associativity == .none) {
banned_precedence = precedence;
}
} }
return result; return result;
} }
fn prefixExpression(analyzer: *Analyzer) !Node.Index { fn prefixExpression(analyzer: *Analyzer) !Node.Index {
switch (analyzer.tokens[analyzer.token_i].id) { const token = analyzer.token_i;
// .bang => .bool_not, // std.debug.print("Prefix...\n", .{});
// .minus => .negation, const node_id: Node.Id = switch (analyzer.tokens[token].id) {
// .tilde => .bit_not,
// .minus_percent => .negation_wrap,
// .ampersand => .address_of,
// .keyword_try => .@"try",
// .keyword_await => .@"await",
else => |pref| { else => |pref| {
_ = pref; _ = pref;
return analyzer.primaryExpression(); return try analyzer.primaryExpression();
}, },
} .at => .address_of,
.bang => switch (analyzer.tokens[token + 1].id) {
.equal => return try analyzer.primaryExpression(),
else => unreachable,
},
.minus, .tilde => |t| @panic(@tagName(t)),
};
return error.not_implemented; return try analyzer.addNode(.{
.id = node_id,
.token = blk: {
analyzer.token_i += 1;
break :blk token;
},
.left = try analyzer.prefixExpression(),
.right = Node.Index.invalid,
});
} }
fn primaryExpression(analyzer: *Analyzer) !Node.Index { fn primaryExpression(analyzer: *Analyzer) !Node.Index {
@ -464,8 +778,8 @@ const Analyzer = struct {
.colon => unreachable, .colon => unreachable,
else => try analyzer.curlySuffixExpression(), else => try analyzer.curlySuffixExpression(),
}, },
.string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable => try analyzer.curlySuffixExpression(), .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable, .fixed_keyword_switch, .period, .fixed_keyword_enum, .keyword_signed_integer, .keyword_unsigned_integer => try analyzer.curlySuffixExpression(),
.fixed_keyword_fn => analyzer.function(), .fixed_keyword_fn => try analyzer.function(),
.fixed_keyword_return => try analyzer.addNode(.{ .fixed_keyword_return => try analyzer.addNode(.{
.id = .@"return", .id = .@"return",
.token = blk: { .token = blk: {
@ -477,7 +791,7 @@ const Analyzer = struct {
.right = Node.Index.invalid, .right = Node.Index.invalid,
}), }),
// todo:? // todo:?
// .left_brace => try analyzer.block(), .left_brace => try analyzer.block(.{ .is_comptime = false }),
else => |id| { else => |id| {
log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)}); log.warn("By default, calling curlySuffixExpression with {s}", .{@tagName(id)});
unreachable; unreachable;
@ -500,7 +814,7 @@ const Analyzer = struct {
const token_i = analyzer.token_i; const token_i = analyzer.token_i;
assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn); assert(analyzer.tokens[token_i].id == .fixed_keyword_noreturn);
analyzer.token_i += 1; analyzer.token_i += 1;
return analyzer.addNode(.{ return try analyzer.addNode(.{
.id = .keyword_noreturn, .id = .keyword_noreturn,
.token = token_i, .token = token_i,
.left = Node.Index.invalid, .left = Node.Index.invalid,
@ -508,12 +822,15 @@ const Analyzer = struct {
}); });
} }
fn boolTrue(analyzer: *Analyzer) !Node.Index { fn boolLiteral(analyzer: *Analyzer) !Node.Index {
const token_i = analyzer.token_i; const token_i = analyzer.token_i;
assert(analyzer.tokens[token_i].id == .fixed_keyword_true);
analyzer.token_i += 1; analyzer.token_i += 1;
return analyzer.addNode(.{ return try analyzer.addNode(.{
.id = .keyword_true, .id = switch (analyzer.tokens[token_i].id) {
.fixed_keyword_true => .keyword_true,
.fixed_keyword_false => .keyword_false,
else => unreachable,
},
.token = token_i, .token = token_i,
.left = Node.Index.invalid, .left = Node.Index.invalid,
.right = Node.Index.invalid, .right = Node.Index.invalid,
@ -521,9 +838,54 @@ const Analyzer = struct {
} }
fn typeExpression(analyzer: *Analyzer) !Node.Index { fn typeExpression(analyzer: *Analyzer) !Node.Index {
return switch (analyzer.tokens[analyzer.token_i].id) { const first = analyzer.token_i;
.identifier, .fixed_keyword_noreturn, .fixed_keyword_true, .fixed_keyword_false, .hash, .string_literal, .number_literal, .fixed_keyword_unreachable, .keyword_unsigned_integer, .keyword_signed_integer => try analyzer.errorUnionExpression(), return switch (analyzer.tokens[first].id) {
else => |id| @panic(@tagName(id)), else => try analyzer.errorUnionExpression(),
.at => unreachable, // pointer
.bang => unreachable, // error
.left_bracket => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.at => {
// many item pointer
analyzer.token_i += 2;
_ = try analyzer.expectToken(.right_bracket);
const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const;
analyzer.token_i += @intFromBool(is_const);
const pointer_element_type = try analyzer.typeExpression();
return try analyzer.addNode(.{
.id = .many_pointer_type,
.token = first,
.left = pointer_element_type,
.right = Node.Index.invalid,
});
},
else => {
const left_bracket = analyzer.token_i;
analyzer.token_i += 1;
// TODO: compute length
const length_expression = false;
_ = try analyzer.expectToken(.right_bracket);
// Slice
if (!length_expression) {
// TODO: modifiers
const is_const = analyzer.tokens[analyzer.token_i].id == .fixed_keyword_const;
analyzer.token_i += @intFromBool(is_const);
const slice_type = try analyzer.typeExpression();
return try analyzer.addNode(.{
.id = .slice_type,
.token = left_bracket,
.left = Node.Index.invalid,
.right = slice_type,
});
} else {
unreachable;
}
},
},
}; };
} }
@ -531,7 +893,10 @@ const Analyzer = struct {
const suffix_expression = try analyzer.suffixExpression(); const suffix_expression = try analyzer.suffixExpression();
return switch (analyzer.tokens[analyzer.token_i].id) { return switch (analyzer.tokens[analyzer.token_i].id) {
.bang => unreachable, .bang => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.equal => suffix_expression,
else => unreachable,
},
else => suffix_expression, else => suffix_expression,
}; };
} }
@ -550,26 +915,43 @@ const Analyzer = struct {
var expression_list = ArrayList(Node.Index){}; var expression_list = ArrayList(Node.Index){};
while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) { while (analyzer.tokens[analyzer.token_i].id != .right_parenthesis) {
std.debug.print("Loop\n", .{}); const current_token = analyzer.tokens[analyzer.token_i];
std.debug.print("Current token: {s}\n", .{@tagName(current_token.id)});
const parameter = try analyzer.expression(); const parameter = try analyzer.expression();
try expression_list.append(analyzer.allocator, parameter); try expression_list.append(analyzer.allocator, parameter);
analyzer.token_i += @intFromBool(switch (analyzer.tokens[analyzer.token_i].id) { const parameter_node = analyzer.nodes.items[parameter.unwrap()];
.comma, .right_parenthesis => true, std.debug.print("Paremeter node: {s}\n", .{@tagName(parameter_node.id)});
const next_token = analyzer.tokens[analyzer.token_i];
std.debug.print("next token: {s}\n", .{@tagName(next_token.id)});
analyzer.token_i += @intFromBool(switch (next_token.id) {
.comma => true,
.colon, .right_brace, .right_bracket => unreachable, .colon, .right_brace, .right_bracket => unreachable,
else => unreachable, .right_parenthesis => false,
else => |t| @panic(@tagName(t)),
}); });
} }
_ = try analyzer.expectToken(.right_parenthesis); _ = try analyzer.expectToken(.right_parenthesis);
// const is_comma = analyzer.tokens[analyzer.token_i].id == .comma; // const is_comma = analyzer.tokens[analyzer.token_i].id == .comma;
return analyzer.addNode(switch (expression_list.items.len) { return try analyzer.addNode(switch (expression_list.items.len) {
0 => .{ 0 => .{
.id = .call_one, .id = .call_one,
.token = left_parenthesis, .token = left_parenthesis,
.left = result, .left = result,
.right = Node.Index.invalid, .right = Node.Index.invalid,
}, },
else => |len| std.debug.panic("len: {}", .{len}), 1 => .{
.id = .call_two,
.token = left_parenthesis,
.left = result,
.right = expression_list.items[0],
},
else => .{
.id = .call,
.token = left_parenthesis,
.left = result,
.right = try analyzer.nodeList(expression_list.items),
},
}); });
} else { } else {
return result; return result;
@ -583,7 +965,7 @@ const Analyzer = struct {
fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index {
const token_i = analyzer.token_i; const token_i = analyzer.token_i;
const token = analyzer.tokens[token_i]; const token = analyzer.tokens[token_i];
return switch (token.id) { return try switch (token.id) {
.string_literal => blk: { .string_literal => blk: {
analyzer.token_i += 1; analyzer.token_i += 1;
break :blk analyzer.addNode(.{ break :blk analyzer.addNode(.{
@ -618,9 +1000,9 @@ const Analyzer = struct {
}); });
}, },
}, },
.fixed_keyword_noreturn => try analyzer.noReturn(), .fixed_keyword_noreturn => analyzer.noReturn(),
.fixed_keyword_true => try analyzer.boolTrue(), .fixed_keyword_true, .fixed_keyword_false => analyzer.boolLiteral(),
.fixed_keyword_unreachable => try analyzer.addNode(.{ .fixed_keyword_unreachable => analyzer.addNode(.{
.id = .@"unreachable", .id = .@"unreachable",
.token = blk: { .token = blk: {
analyzer.token_i += 1; analyzer.token_i += 1;
@ -630,7 +1012,7 @@ const Analyzer = struct {
.right = Node.Index.invalid, .right = Node.Index.invalid,
}), }),
.hash => analyzer.compilerIntrinsic(), .hash => analyzer.compilerIntrinsic(),
.keyword_unsigned_integer, .keyword_signed_integer => |signedness| try analyzer.addNode(.{ .keyword_unsigned_integer, .keyword_signed_integer => |signedness| analyzer.addNode(.{
.id = switch (signedness) { .id = switch (signedness) {
.keyword_unsigned_integer => .unsigned_integer_type, .keyword_unsigned_integer => .unsigned_integer_type,
.keyword_signed_integer => .signed_integer_type, .keyword_signed_integer => .signed_integer_type,
@ -640,9 +1022,78 @@ const Analyzer = struct {
analyzer.token_i += 1; analyzer.token_i += 1;
break :blk token_i; break :blk token_i;
}, },
.left = @bitCast(@as(u32, std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10) catch unreachable)), .left = @bitCast(@as(u32, try std.fmt.parseInt(u16, analyzer.bytes(token_i)[1..], 10))),
.right = Node.Index.invalid, .right = Node.Index.invalid,
}), }),
.fixed_keyword_usize, .fixed_keyword_ssize => |size_type| analyzer.addNode(.{
.id = switch (size_type) {
.fixed_keyword_usize => .usize_type,
.fixed_keyword_ssize => .ssize_type,
else => unreachable,
},
.token = blk: {
analyzer.token_i += 1;
break :blk token_i;
},
.left = Node.Index.invalid,
.right = Node.Index.invalid,
}),
.fixed_keyword_void => analyzer.addNode(.{
.id = .void_type,
.token = blk: {
analyzer.token_i += 1;
break :blk token_i;
},
.left = Node.Index.invalid,
.right = Node.Index.invalid,
}),
.fixed_keyword_switch => try analyzer.switchExpression(),
.period => switch (analyzer.tokens[token_i + 1].id) {
.identifier => try analyzer.addNode(.{
.id = .enum_literal,
.token = blk: {
analyzer.token_i += 2;
break :blk token_i;
},
.left = Node.Index.invalid,
.right = Node.Index.invalid,
}),
else => |t| @panic(@tagName(t)),
},
.fixed_keyword_enum => blk: {
analyzer.token_i += 1;
_ = try analyzer.expectToken(.left_brace);
var enum_field_list = Node.List{};
while (analyzer.tokens[analyzer.token_i].id != .right_brace) {
const enum_name = try analyzer.expectToken(.identifier);
const value_associated = switch (analyzer.tokens[analyzer.token_i].id) {
.comma => comma: {
analyzer.token_i += 1;
break :comma Node.Index.invalid;
},
else => |t| @panic(@tagName(t)),
};
const enum_field_node = try analyzer.addNode(.{
.id = .enum_field,
.token = enum_name,
.left = value_associated,
.right = Node.Index.invalid,
});
try enum_field_list.append(analyzer.allocator, enum_field_node);
}
analyzer.token_i += 1;
break :blk try analyzer.addNode(.{
.id = .enum_type,
.token = token_i,
.left = try analyzer.nodeList(enum_field_list.items),
.right = Node.Index.invalid,
});
},
else => |foo| { else => |foo| {
switch (foo) { switch (foo) {
.identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }), .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }),
@ -658,7 +1109,7 @@ const Analyzer = struct {
return switch (token.id) { return switch (token.id) {
.left_bracket => unreachable, .left_bracket => unreachable,
.period => switch (analyzer.tokens[analyzer.token_i + 1].id) { .period => switch (analyzer.tokens[analyzer.token_i + 1].id) {
.identifier => analyzer.addNode(.{ .identifier => try analyzer.addNode(.{
.id = .field_access, .id = .field_access,
.token = blk: { .token = blk: {
const main_token = analyzer.token_i; const main_token = analyzer.token_i;
@ -684,7 +1135,13 @@ const Analyzer = struct {
fn addNode(analyzer: *Analyzer, node: Node) !Node.Index { fn addNode(analyzer: *Analyzer, node: Node) !Node.Index {
const index = analyzer.nodes.items.len; const index = analyzer.nodes.items.len;
try analyzer.nodes.append(analyzer.allocator, node); try analyzer.nodes.append(analyzer.allocator, node);
std.debug.print("Adding node #{} {s}\n", .{ index, @tagName(node.id) }); std.debug.print("Adding node #{} (0x{x}) {s} to file #{}\n", .{ index, @intFromPtr(&analyzer.nodes.items[index]), @tagName(node.id), analyzer.file_index.uniqueInteger() });
// if (node.id == .identifier) {
// std.debug.print("Node identifier: {s}\n", .{analyzer.bytes(node.token)});
// }
if (node.id == .call) {
std.debug.print("Call two: {}\n", .{node});
}
return Node.Index{ return Node.Index{
.value = @intCast(index), .value = @intCast(index),
}; };
@ -695,10 +1152,12 @@ const Analyzer = struct {
var new_node_list = try ArrayList(Node.Index).initCapacity(analyzer.allocator, input.len); var new_node_list = try ArrayList(Node.Index).initCapacity(analyzer.allocator, input.len);
try new_node_list.appendSlice(analyzer.allocator, input); try new_node_list.appendSlice(analyzer.allocator, input);
try analyzer.node_lists.append(analyzer.allocator, new_node_list); try analyzer.node_lists.append(analyzer.allocator, new_node_list);
return try analyzer.addNode(.{
return .{ .id = .node_list,
.value = @intCast(index), .token = 0,
}; .left = .{ .value = @intCast(index) },
.right = Node.Index.invalid,
});
} }
}; };
@ -708,11 +1167,12 @@ const Members = struct {
right: Node.Index, right: Node.Index,
}; };
pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !Result { pub fn analyze(allocator: Allocator, tokens: []const Token, source_file: []const u8, file_index: File.Index) !Result {
const start = std.time.Instant.now() catch unreachable; const start = std.time.Instant.now() catch unreachable;
var analyzer = Analyzer{ var analyzer = Analyzer{
.tokens = tokens, .tokens = tokens,
.file = file, .source_file = source_file,
.file_index = file_index,
.allocator = allocator, .allocator = allocator,
}; };
const node_index = try analyzer.addNode(.{ const node_index = try analyzer.addNode(.{
@ -725,10 +1185,12 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R
assert(node_index.value == 0); assert(node_index.value == 0);
assert(node_index.valid); assert(node_index.valid);
std.debug.print("Start Parsing file root members\n", .{});
const members = try analyzer.containerMembers(); const members = try analyzer.containerMembers();
std.debug.print("End Parsing file root members\n", .{});
switch (members.len) { switch (members.len) {
0 => unreachable, 0 => analyzer.nodes.items[0].id = .main_zero,
1 => { 1 => {
analyzer.nodes.items[0].id = .main_one; analyzer.nodes.items[0].id = .main_one;
analyzer.nodes.items[0].left = members.left; analyzer.nodes.items[0].left = members.left;
@ -738,7 +1200,10 @@ pub fn analyze(allocator: Allocator, tokens: []const Token, file: []const u8) !R
analyzer.nodes.items[0].left = members.left; analyzer.nodes.items[0].left = members.left;
analyzer.nodes.items[0].right = members.right; analyzer.nodes.items[0].right = members.right;
}, },
else => unreachable, else => {
analyzer.nodes.items[0].id = .main;
analyzer.nodes.items[0].left = members.left;
},
} }
const end = std.time.Instant.now() catch unreachable; const end = std.time.Instant.now() catch unreachable;
@ -775,3 +1240,8 @@ pub const SymbolDeclaration = struct {
initialization_node: Node.Index, initialization_node: Node.Index,
mutability_token: Token.Index, mutability_token: Token.Index,
}; };
const Associativity = enum {
none,
left,
};

View File

@ -1,6 +1,7 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const assert = std.debug.assert; const assert = std.debug.assert;
const equal = std.mem.eql;
const Compilation = @import("Compilation.zig"); const Compilation = @import("Compilation.zig");
@ -9,20 +10,64 @@ const default_src_file = "src/test/main.nat";
pub fn main() !void { pub fn main() !void {
const allocator = std.heap.page_allocator; const allocator = std.heap.page_allocator;
const arguments = try std.process.argsAlloc(allocator); const compilation_descriptor = try parseArguments(allocator);
if (arguments.len == 2) {
try singleCompilation(allocator, arguments[1]);
} else {
@panic("Wrong arguments");
}
}
fn singleCompilation(allocator: Allocator, main_file_path: []const u8) !void {
const compilation = try Compilation.init(allocator); const compilation = try Compilation.init(allocator);
try compilation.compileModule(.{ try compilation.compileModule(compilation_descriptor);
.main_package_path = main_file_path, }
});
const ArgumentParsingError = error{
main_package_path_not_specified,
};
fn parseArguments(allocator: Allocator) !Compilation.Module.Descriptor {
const arguments = (try std.process.argsAlloc(allocator))[1..];
var maybe_executable_path: ?[]const u8 = null;
var maybe_main_package_path: ?[]const u8 = null;
var target_triplet: []const u8 = "x86_64-linux-gnu";
var i: usize = 0;
while (i < arguments.len) : (i += 1) {
const current_argument = arguments[i];
if (equal(u8, current_argument, "-o")) {
if (i <= arguments.len) {
maybe_executable_path = arguments[i + 1];
assert(maybe_executable_path.?.len != 0);
i += 1;
} else {
unreachable;
}
} else if (equal(u8, current_argument, "-target")) {
if (i <= arguments.len) {
target_triplet = arguments[i + 1];
i += 1;
} else {
unreachable;
}
} else {
maybe_main_package_path = current_argument;
}
}
const main_package_path = maybe_main_package_path orelse return error.main_package_path_not_specified;
const executable_path = maybe_executable_path orelse blk: {
const executable_name = std.fs.path.basename(main_package_path[0 .. main_package_path.len - "/main.nat".len]);
assert(executable_name.len > 0);
const result = try std.mem.concat(allocator, u8, &.{ "nat/", executable_name });
break :blk result;
};
const cross_target = try std.zig.CrossTarget.parse(.{ .arch_os_abi = target_triplet });
const target = cross_target.toTarget();
std.debug.print("Target: {}\n", .{target});
return .{
.main_package_path = main_package_path,
.executable_path = executable_path,
.target = target,
};
} }
test { test {

View File

@ -1,3 +1,3 @@
const main = fn() s32 { const main = fn() s32 {
return 0; return 0;
}; }

View File

@ -0,0 +1,6 @@
const std = #import("std");
const main = fn() s32 {
std.print("Hello world!\n", 13);
return 0;
}

4
test/stack/main.nat Normal file
View File

@ -0,0 +1,4 @@
const main = fn() s32 {
var a : s32 = 0;
return a;
}