write better lexer and parser

This commit is contained in:
David Gonzalez Martin 2023-09-03 20:44:18 -06:00
parent 30931dc6f1
commit e8a2e79a00
10 changed files with 608 additions and 311 deletions

17
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,17 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "cppvsdbg",
"request": "launch",
"name": "Debug",
"program": "${workspaceFolder}/zig-out/bin/compiler.exe",
"args": [],
"cwd": "${workspaceFolder}",
"preLaunchTask": "zig build"
}
]
}

12
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,12 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "zig build",
"type": "shell",
"command": "zig build"
}
]
}

21
src/compiler.zig Normal file
View File

@ -0,0 +1,21 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const data_structures = @import("data_structures.zig");
const lexer = @import("lexer.zig");
const parser = @import("parser.zig");
test {
_ = lexer;
_ = parser;
}
pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void {
const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize));
std.debug.print("File:\n\n```\n{s}\n```\n", .{file});
const lexer_result = try lexer.lex(allocator, file);
const parser_result = try parser.parse(allocator, &lexer_result);
_ = parser_result;
}

View File

@ -1,3 +1,4 @@
const std = @import("std"); const std = @import("std");
pub const ArrayList = std.ArrayListUnmanaged; pub const ArrayList = std.ArrayListUnmanaged;
pub const HashMap = std.AutoHashMap;

View File

@ -8,19 +8,12 @@ const expectEqual = std.testing.expectEqual;
const ir = @import("ir.zig"); const ir = @import("ir.zig");
pub const Result = struct {
pub fn free(result: *Result, allocator: Allocator) void {
_ = allocator;
_ = result;
}
};
const Section = struct { const Section = struct {
content: []align(page_size) u8, content: []align(page_size) u8,
index: usize = 0, index: usize = 0,
}; };
const Image = struct { const Result = struct {
sections: struct { sections: struct {
text: Section, text: Section,
rodata: Section, rodata: Section,
@ -28,8 +21,8 @@ const Image = struct {
}, },
entry_point: u32 = 0, entry_point: u32 = 0,
fn create() !Image { fn create() !Result {
return Image{ return Result{
.sections = .{ .sections = .{
.text = .{ .content = try mmap(page_size, .{ .executable = true }) }, .text = .{ .content = try mmap(page_size, .{ .executable = true }) },
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) }, .rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
@ -38,33 +31,47 @@ const Image = struct {
}; };
} }
fn destroy(image: *Image) void { fn destroy(image: *Result) void {
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| { inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
std.os.munmap(@field(image.sections, field_name).content); const section_bytes = @field(image.sections, field_name).content;
switch (@import("builtin").os.tag) {
.linux => std.os.munmap(section_bytes),
.windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE),
else => @compileError("OS not supported"),
}
} }
} }
inline fn mmap(size: usize, flags: packed struct { fn mmap(size: usize, flags: packed struct {
executable: bool, executable: bool,
}) ![]align(page_size) u8 { }) ![]align(page_size) u8 {
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0; return switch (@import("builtin").os.tag) {
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE; .windows => blk: {
const windows = std.os.windows;
break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size];
},
.linux => blk: {
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0); break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
},
else => @compileError("OS not supported"),
};
} }
fn appendCode(image: *Image, code: []const u8) void { fn appendCode(image: *Result, code: []const u8) void {
const destination = image.sections.text.content[image.sections.text.index..][0..code.len]; const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
@memcpy(destination, code); @memcpy(destination, code);
image.sections.text.index += code.len; image.sections.text.index += code.len;
} }
fn appendCodeByte(image: *Image, code_byte: u8) void { fn appendCodeByte(image: *Result, code_byte: u8) void {
image.sections.text.content[image.sections.text.index] = code_byte; image.sections.text.content[image.sections.text.index] = code_byte;
image.sections.text.index += 1; image.sections.text.index += 1;
} }
fn getEntryPoint(image: *const Image, comptime Function: type) *const Function { fn getEntryPoint(image: *const Result, comptime Function: type) *const Function {
comptime { comptime {
assert(@typeInfo(Function) == .Fn); assert(@typeInfo(Function) == .Fn);
} }
@ -72,6 +79,16 @@ const Image = struct {
assert(image.sections.text.content.len > 0); assert(image.sections.text.content.len > 0);
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point])); return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
} }
pub fn free(result: *Result, allocator: Allocator) void {
_ = allocator;
inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| {
switch (@import("builtin").os.tag) {
.windows => unreachable,
else => std.os.munmap(@field(result.sections, field_name).content),
}
}
}
}; };
const Rex = enum(u8) { const Rex = enum(u8) {
@ -123,7 +140,7 @@ const ret = 0xc3;
const mov_a_imm = [1]u8{0xb8}; const mov_a_imm = [1]u8{0xb8};
const mov_reg_imm8: u8 = 0xb0; const mov_reg_imm8: u8 = 0xb0;
inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 { fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
comptime { comptime {
assert(@typeInfo(@TypeOf(integer)) == .Int); assert(@typeInfo(@TypeOf(integer)) == .Int);
} }
@ -131,7 +148,7 @@ inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer)); return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
} }
inline fn movAImm(image: *Image, integer: anytype) void { fn movAImm(image: *Result, integer: anytype) void {
const T = @TypeOf(integer); const T = @TypeOf(integer);
image.appendCode(&(switch (T) { image.appendCode(&(switch (T) {
u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)}, u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
@ -143,8 +160,9 @@ inline fn movAImm(image: *Image, integer: anytype) void {
} }
test "ret void" { test "ret void" {
var image = try Image.create(); const allocator = std.testing.allocator;
defer image.destroy(); var image = try Result.create();
defer image.free(allocator);
image.appendCodeByte(ret); image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) void); const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
@ -166,8 +184,8 @@ fn getMaxInteger(comptime T: type) T {
test "ret integer" { test "ret integer" {
inline for (integer_types_to_test) |Int| { inline for (integer_types_to_test) |Int| {
var image = try Image.create(); var image = try Result.create();
defer image.destroy(); defer image.free(std.testing.allocator);
const expected_number = getMaxInteger(Int); const expected_number = getMaxInteger(Int);
movAImm(&image, expected_number); movAImm(&image, expected_number);
@ -185,11 +203,11 @@ const LastByte = packed struct(u8) {
always_on: u2 = 0b11, always_on: u2 = 0b11,
}; };
fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { fn movRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .mov, dst, src); dstRmSrcR(image, T, .mov, dst, src);
} }
fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void { fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
const last_byte: u8 = @bitCast(LastByte{ const last_byte: u8 = @bitCast(LastByte{
.dst = dst, .dst = dst,
.src = src, .src = src,
@ -216,8 +234,9 @@ fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPReg
test "ret integer argument" { test "ret integer argument" {
inline for (integer_types_to_test) |Int| { inline for (integer_types_to_test) |Int| {
var image = try Image.create(); const allocator = std.testing.allocator;
defer image.destroy(); var image = try Result.create();
defer image.free(allocator);
const number = getMaxInteger(Int); const number = getMaxInteger(Int);
movRmR(&image, Int, .a, .di); movRmR(&image, Int, .a, .di);
@ -239,14 +258,15 @@ fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
}; };
} }
fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void { fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .sub, dst, src); dstRmSrcR(image, T, .sub, dst, src);
} }
test "ret sub arguments" { test "ret sub arguments" {
inline for (integer_types_to_test) |Int| { inline for (integer_types_to_test) |Int| {
var image = try Image.create(); const allocator = std.testing.allocator;
defer image.destroy(); var image = try Result.create();
defer image.free(allocator);
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2); const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a); const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
@ -328,10 +348,10 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
opcode: OpcodeRmR, opcode: OpcodeRmR,
pub fn runTest(test_case: @This()) !void { pub fn runTest(test_case: @This()) !void {
const allocator = std.testing.allocator;
for (0..10) |_| { for (0..10) |_| {
var image = try Image.create(); var image = try Result.create();
defer image.destroy(); defer image.free(allocator);
errdefer image.destroy();
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2); const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a); const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
movRmR(&image, T, .a, .di); movRmR(&image, T, .a, .di);
@ -351,9 +371,9 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
} }
test "call after" { test "call after" {
var image = try Image.create(); const allocator = std.testing.allocator;
defer image.destroy(); var image = try Result.create();
errdefer image.destroy(); defer image.free(allocator);
const jump_patch_offset = image.sections.text.index + 1; const jump_patch_offset = image.sections.text.index + 1;
image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 }); image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
const jump_source = image.sections.text.index; const jump_source = image.sections.text.index;
@ -367,9 +387,9 @@ test "call after" {
} }
test "call before" { test "call before" {
var image = try Image.create(); const allocator = std.testing.allocator;
defer image.destroy(); var image = try Result.create();
errdefer image.destroy(); defer image.free(allocator);
const first_jump_patch_offset = image.sections.text.index + 1; const first_jump_patch_offset = image.sections.text.index + 1;
const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 }; const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
image.appendCode(&first_call); image.appendCode(&first_call);
@ -390,9 +410,20 @@ test "call before" {
pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result { pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result {
_ = allocator; _ = allocator;
var image = try Result.create();
var entry_point: u32 = 0;
_ = entry_point;
for (ir_result.functions.items) |*function| { for (ir_result.functions.items) |*function| {
_ = function; for (function.instructions.items) |instruction| {
switch (instruction.id) {
.ret_void => {
image.appendCodeByte(ret);
},
}
}
} }
return Result{}; return image;
} }

View File

@ -14,7 +14,7 @@ const void_type = Type{
const Type = struct { const Type = struct {
id: Id, id: Id,
inline fn isPrimitive(T: Type) bool { fn isPrimitive(T: Type) bool {
return switch (T.id) { return switch (T.id) {
.void => true, .void => true,
}; };
@ -66,6 +66,7 @@ const Function = struct {
pub const Result = struct { pub const Result = struct {
top_level_declarations: ArrayList(TopLevelDeclaration), top_level_declarations: ArrayList(TopLevelDeclaration),
functions: ArrayList(Function), functions: ArrayList(Function),
instructions: struct {} = .{},
pub fn free(result: *Result, allocator: Allocator) void { pub fn free(result: *Result, allocator: Allocator) void {
for (result.functions.items) |*function| { for (result.functions.items) |*function| {

View File

@ -3,95 +3,90 @@ const Allocator = std.mem.Allocator;
const assert = std.debug.assert; const assert = std.debug.assert;
const log = std.log; const log = std.log;
const equal = std.mem.eql;
const data_structures = @import("data_structures.zig"); const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList; const ArrayList = data_structures.ArrayList;
const fs = @import("fs.zig"); const fs = @import("fs.zig");
const parser = @import("parser.zig");
pub inline fn rdtsc() u64 { pub const TokenTypeMap = blk: {
var edx: u32 = undefined; var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined;
var eax: u32 = undefined;
asm volatile ( result[@intFromEnum(TokenId.identifier)] = Identifier;
\\rdtsc result[@intFromEnum(TokenId.operator)] = Operator;
: [eax] "={eax}" (eax), result[@intFromEnum(TokenId.number)] = Number;
[edx] "={edx}" (edx),
);
return @as(u64, edx) << 32 | eax; break :blk result;
}
inline fn rdtscFast() u32 {
return asm volatile (
\\rdtsc
: [eax] "={eax}" (-> u32),
:
: "edx"
);
}
const vector_byte_count = 16;
// These two actually take less space due to how Zig handles bool as u1
const VBool = @Vector(vector_byte_count, bool);
const VU1 = @Vector(vector_byte_count, u1);
const VU8 = @Vector(vector_byte_count, u8);
inline fn vand(v1: VBool, v2: VBool) VBool {
return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2)));
}
inline fn byteMask(n: u8) VU8 {
return @splat(n);
}
inline fn endOfIdentifier(ch: u8) bool {
// TODO: complete
return ch == ' ' or ch == '(' or ch == ')';
}
const Identifier = struct {
start: u32,
end: u32,
}; };
pub const Identifier = parser.Node;
pub const TokenId = enum { pub const TokenId = enum {
identifier, identifier,
special_character, operator,
number,
}; };
pub const SpecialCharacter = enum(u8) { pub const Operator = enum(u8) {
arrow = 0,
left_parenthesis = '(', left_parenthesis = '(',
right_parenthesis = ')', right_parenthesis = ')',
left_brace = '{', left_brace = '{',
right_brace = '}', right_brace = '}',
equal = '=',
colon = ':',
semicolon = ';',
};
pub const Number = struct {
content: union(enum) {
float: f64,
integer: Integer,
},
const Integer = struct {
value: u64,
is_negative: bool,
};
}; };
pub const Result = struct { pub const Result = struct {
identifiers: ArrayList(Identifier), arrays: struct {
special_characters: ArrayList(SpecialCharacter), identifier: ArrayList(Identifier),
ids: ArrayList(TokenId), operator: ArrayList(Operator),
number: ArrayList(Number),
id: ArrayList(TokenId),
},
file: []const u8, file: []const u8,
time: u64 = 0, time: u64 = 0,
pub fn free(result: *Result, allocator: Allocator) void { pub fn free(result: *Result, allocator: Allocator) void {
result.identifiers.clearAndFree(allocator); inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| {
result.special_characters.clearAndFree(allocator); @field(result.arrays, field.name).clearAndFree(allocator);
result.ids.clearAndFree(allocator); }
allocator.free(result.file); }
fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void {
// const index = result.arrays.id.items.len;
@field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value);
result.arrays.id.appendAssumeCapacity(token_id);
// log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value });
} }
}; };
fn lex(allocator: Allocator, text: []const u8) !Result { pub fn lex(allocator: Allocator, text: []const u8) !Result {
const time_start = std.time.Instant.now() catch unreachable; const time_start = std.time.Instant.now() catch unreachable;
var index: usize = 0; var index: usize = 0;
var result = Result{ var result = Result{
.identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len), .arrays = .{
.special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len), .identifier = try ArrayList(Identifier).initCapacity(allocator, text.len),
.ids = try ArrayList(TokenId).initCapacity(allocator, text.len), .operator = try ArrayList(Operator).initCapacity(allocator, text.len),
.number = try ArrayList(Number).initCapacity(allocator, text.len),
.id = try ArrayList(TokenId).initCapacity(allocator, text.len),
},
.file = text, .file = text,
}; };
@ -105,35 +100,47 @@ fn lex(allocator: Allocator, text: []const u8) !Result {
switch (first_char) { switch (first_char) {
'a'...'z', 'A'...'Z', '_' => { 'a'...'z', 'A'...'Z', '_' => {
const start = index; const start = index;
// SIMD this while (true) {
while (!endOfIdentifier(text[index])) { const ch = text[index];
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) {
index += 1;
continue;
}
break;
}
result.appendToken(.identifier, .{
.left = @intCast(start),
.right = @intCast(index),
.type = .identifier,
});
},
'(', ')', '{', '}', '-', '=', ';' => |operator| {
result.appendToken(.operator, @enumFromInt(operator));
index += 1;
},
'0'...'9' => {
const start = index;
while (text[index] >= '0' and text[index] <= '9') {
index += 1; index += 1;
} }
const end = index;
result.identifiers.appendAssumeCapacity(.{ const number_slice = text[start..end];
.start = @intCast(start), const number = try std.fmt.parseInt(u64, number_slice, 10);
.end = @intCast(index), result.appendToken(.number, .{
.content = .{
.integer = .{
.value = number,
.is_negative = false,
},
},
}); });
result.ids.appendAssumeCapacity(.identifier);
},
'(', ')', '{', '}' => |special_character| {
result.special_characters.appendAssumeCapacity(@enumFromInt(special_character));
result.ids.appendAssumeCapacity(.special_character);
index += 1;
}, },
' ', '\n' => index += 1, ' ', '\n' => index += 1,
'-' => { else => |foo| {
if (text[index + 1] == '>') {
result.special_characters.appendAssumeCapacity(.arrow);
result.ids.appendAssumeCapacity(.special_character);
index += 2;
} else {
@panic("TODO");
}
},
else => {
index += 1; index += 1;
std.debug.panic("NI: {c}", .{foo});
}, },
} }
} }
@ -141,16 +148,11 @@ fn lex(allocator: Allocator, text: []const u8) !Result {
return result; return result;
} }
pub fn runTest(allocator: Allocator, file: []const u8) !Result {
const result = try lex(allocator, file);
return result;
}
test "lexer" { test "lexer" {
const allocator = std.testing.allocator; const allocator = std.testing.allocator;
const file_path = fs.first; const file_path = fs.first;
const file = try fs.readFile(allocator, file_path); const file = try fs.readFile(allocator, file_path);
var result = try runTest(allocator, file); defer allocator.free(file);
var result = try lex(allocator, file);
defer result.free(allocator); defer result.free(allocator);
} }

View File

@ -2,44 +2,17 @@ const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const assert = std.debug.assert; const assert = std.debug.assert;
const compiler = @import("compiler.zig");
const fs = @import("fs.zig"); const fs = @import("fs.zig");
const lexer = @import("lexer.zig");
const parser = @import("parser.zig");
const ir = @import("ir.zig");
const emit = @import("emit.zig");
pub const seed = std.math.maxInt(u64); pub const seed = std.math.maxInt(u64);
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator(); const allocator = gpa.allocator();
try behaviorTest(allocator, fs.first); try compiler.cycle(allocator, fs.first);
}
fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void {
const file = try fs.readFile(allocator, file_relative_path);
var lexer_result = try lexer.runTest(allocator, file);
defer lexer_result.free(allocator);
var parser_result = parser.runTest(allocator, &lexer_result) catch |err| {
std.log.err("Lexer took {} ns", .{lexer_result.time});
return err;
};
defer parser_result.free(allocator);
var ir_result = try ir.runTest(allocator, &parser_result);
defer ir_result.free(allocator);
var emit_result = try emit.runTest(allocator, &ir_result);
defer emit_result.free(allocator);
} }
test { test {
_ = lexer; _ = compiler;
_ = parser;
_ = ir;
_ = emit;
}
test "behavior test 1" {
const allocator = std.testing.allocator;
try behaviorTest(allocator, fs.first);
} }

View File

@ -1,195 +1,434 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const assert = std.debug.assert; const assert = std.debug.assert;
const log = std.log;
const data_structures = @import("data_structures.zig"); const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList; const ArrayList = data_structures.ArrayList;
const HashMap = data_structures.HashMap;
const lexer = @import("lexer.zig"); const lexer = @import("lexer.zig");
pub const Result = struct { pub const Result = struct {
functions: ArrayList(Function), function_map: ArrayList(lexer.Identifier),
strings: StringMap, nodes: ArrayList(Node),
pub fn free(result: *Result, allocator: Allocator) void { pub fn free(result: *Result, allocator: Allocator) void {
result.functions.clearAndFree(allocator); result.functions.clearAndFree(allocator);
result.strings.clearAndFree(allocator);
} }
}; };
pub const Node = packed struct(u64) {
type: Type,
left: Node.Index,
right: Node.Index,
pub const Index = u27;
pub const Type = enum(u10) {
root = 0,
identifier = 1,
number = 2,
@"return" = 3,
block_one = 4,
function_declaration_no_arguments = 5,
container_declaration = 6,
};
};
const Error = error{
unexpected_token,
not_implemented,
OutOfMemory,
};
pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
var parser = Parser{
.allocator = allocator,
.nodes = ArrayList(Node){},
.function_map = ArrayList(lexer.Identifier){},
.lexer = .{
.result = lexer_result,
},
};
errdefer parser.free();
const node_index = try parser.appendNode(Node{
.type = .root,
.left = 0,
.right = 0,
});
_ = node_index;
const members = try parser.parseContainerMembers();
_ = members;
return Result{
.function_map = parser.function_map,
.nodes = parser.nodes,
};
}
const ExpressionMutabilityQualifier = enum {
@"const",
@"var",
};
const Keyword = enum {
@"return",
@"fn",
};
const PeekResult = union(lexer.TokenId) { const PeekResult = union(lexer.TokenId) {
special_character: lexer.SpecialCharacter, identifier: lexer.Identifier,
identifier: []const u8, operator: lexer.Operator,
number: lexer.Number,
}; };
const Function = struct { const Lexer = struct {
name: u32, result: *const lexer.Result,
return_type: u32, indices: struct {
arguments: ArrayList(Argument), identifier: u32 = 0,
statements: ArrayList(Statement), operator: u32 = 0,
number: u32 = 0,
id: u32 = 0,
} = .{},
const Argument = struct { fn hasTokens(l: *const Lexer) bool {
foo: u32 = 0, return l.indices.id < l.result.arrays.id.items.len;
}; }
fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 {
assert(l.isCurrentToken(token_id));
return @field(l.indices, @tagName(token_id));
}
fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void {
assert(l.isCurrentToken(token_id));
l.indices.id += 1;
const index_ptr = &@field(l.indices, @tagName(token_id));
const index = index_ptr.*;
const token_value = @field(l.result.arrays, @tagName(token_id)).items[index];
log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value });
index_ptr.* += 1;
}
fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool {
return l.result.arrays.id.items[l.indices.id] == token_id;
}
fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 {
comptime {
assert(lexer.Identifier == Node);
}
assert(identifier.type == .identifier);
return l.result.file[identifier.left..][0 .. identifier.right - identifier.left];
}
fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] {
const peek_result = l.peek() orelse return error.not_implemented;
return switch (peek_result) {
expected_token_id => |token| blk: {
l.consume(expected_token_id);
break :blk token;
},
else => error.not_implemented,
};
}
fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 {
const peek_result = l.peek() orelse return error.not_implemented;
return switch (peek_result) {
expected_token_id => blk: {
const index = l.currentTokenIndex(expected_token_id);
l.consume(expected_token_id);
break :blk index;
},
else => error.not_implemented,
};
}
fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void {
const peek_result = l.peek() orelse return error.not_implemented;
switch (peek_result) {
expected_token_id => |token| {
if (expected_token != token) {
return error.not_implemented;
}
l.consume(expected_token_id);
},
else => |token| {
std.debug.panic("{s}", .{@tagName(token)});
},
}
}
fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool {
return switch (l.peek() orelse unreachable) {
.operator => |operator| {
const result = operator == expected_operator;
if (result) {
l.consume(.operator);
}
return result;
},
else => false,
};
}
fn peek(l: *const Lexer) ?PeekResult {
if (l.indices.id >= l.result.arrays.id.items.len) {
return null;
}
return switch (l.result.arrays.id.items[l.indices.id]) {
inline else => |token| blk: {
const tag = @tagName(token);
const index = @field(l.indices, tag);
const array = &@field(l.result.arrays, tag);
break :blk @unionInit(PeekResult, tag, array.items[index]);
},
};
}
}; };
const Statement = struct {
foo: u32 = 0,
};
const StringMap = std.AutoHashMapUnmanaged(u32, []const u8);
const Parser = struct { const Parser = struct {
id_index: u32 = 0, lexer: Lexer,
identifier_index: u32 = 0, nodes: ArrayList(Node),
special_character_index: u32 = 0, function_map: ArrayList(lexer.Identifier),
strings: StringMap,
allocator: Allocator, allocator: Allocator,
functions: ArrayList(Function),
fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result { fn appendNode(parser: *Parser, node: Node) !Node.Index {
while (parser.id_index < lexer_result.ids.items.len) { const index = parser.nodes.items.len;
try parser.parseTopLevelDeclaration(lexer_result); try parser.nodes.append(parser.allocator, node);
} return @intCast(index);
}
return Result{ fn getNode(parser: *Parser, node_index: Node.Index) *Node {
.functions = parser.functions, return &parser.nodes.items[node_index];
.strings = parser.strings, }
fn free(parser: *Parser) void {
_ = parser;
}
fn parseTypeExpression(parser: *Parser) !Node.Index {
// TODO: make this decent
return switch (parser.lexer.peek() orelse unreachable) {
.identifier => parser.nodeFromToken(.identifier),
else => unreachable,
}; };
} }
fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function { fn parseFunctionDeclaration(parser: *Parser) !Node.Index {
assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis); try parser.lexer.expectSpecificToken(.operator, .left_parenthesis);
parser.consume(lexer_result, .special_character); while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) {
while (true) {
if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) {
break;
} else |_| {}
return error.not_implemented; return error.not_implemented;
} }
try parser.expectSpecialCharacter(lexer_result, .arrow); const t = try parser.parseTypeExpression();
const function_declaration = try parser.appendNode(.{
const return_type_identifier = try parser.expectIdentifier(lexer_result); .type = .function_declaration_no_arguments,
.left = t,
try parser.expectSpecialCharacter(lexer_result, .left_brace); .right = try parser.parseBlock(),
});
while (true) { return function_declaration;
if (parser.expectSpecialCharacter(lexer_result, .right_brace)) {
break;
} else |_| {}
return error.not_implemented;
}
return Function{
.name = name,
.statements = ArrayList(Statement){},
.arguments = ArrayList(Function.Argument){},
.return_type = return_type_identifier,
};
} }
inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void { fn parseBlock(parser: *Parser) !Node.Index {
assert(lexer_result.ids.items[parser.id_index] == token_id); try parser.lexer.expectSpecificToken(.operator, .left_brace);
parser.id_index += 1;
switch (token_id) { var statements = ArrayList(Node.Index){};
.special_character => parser.special_character_index += 1,
.identifier => parser.identifier_index += 1, while (!parser.lexer.maybeExpectOperator(.right_brace)) {
const statement = try parser.parseStatement();
try statements.append(parser.allocator, statement);
} }
}
fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void { const node: Node = switch (statements.items.len) {
const top_level_identifier = try parser.expectIdentifier(lexer_result); 0 => unreachable,
const next_token = parser.peek(lexer_result); 1 => .{
.type = .block_one,
switch (next_token) { .left = statements.items[0],
.special_character => |special_character| switch (special_character) { .right = 0,
.left_parenthesis => {
const function = try parser.parseFunction(lexer_result, top_level_identifier);
try parser.functions.append(parser.allocator, function);
},
else => return error.not_implemented,
}, },
else => unreachable,
};
log.debug("Parsed block!", .{});
return parser.appendNode(node);
}
fn parseStatement(parser: *Parser) !Node.Index {
// TODO: more stuff before
const expression = try parser.parseAssignExpression();
try parser.lexer.expectSpecificToken(.operator, .semicolon);
return expression;
}
fn parseAssignExpression(parser: *Parser) !Node.Index {
const expression = try parser.parseExpression();
switch (parser.lexer.peek() orelse unreachable) {
.operator => |operator| switch (operator) {
.semicolon => return expression,
else => unreachable,
},
else => unreachable,
}
return error.not_implemented;
}
fn parseExpression(parser: *Parser) Error!Node.Index {
return parser.parseExpressionPrecedence(0);
}
fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index {
var expr_index = try parser.parsePrefixExpression();
log.debug("Expr index: {}", .{expr_index});
var banned_precedence: i32 = -1;
while (parser.lexer.hasTokens()) {
const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) {
.operator => |operator| switch (operator) {
.semicolon => -1,
else => @panic(@tagName(operator)),
},
else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }),
};
if (precedence < minimum_precedence) {
break;
}
if (precedence == banned_precedence) {
unreachable;
}
const node_index = try parser.parseExpressionPrecedence(1);
_ = node_index;
unreachable;
}
log.err("Parsed expression precedence", .{});
return expr_index;
}
fn parsePrefixExpression(parser: *Parser) !Node.Index {
switch (parser.lexer.peek() orelse unreachable) {
// .bang => .bool_not,
// .minus => .negation,
// .tilde => .bit_not,
// .minus_percent => .negation_wrap,
// .ampersand => .address_of,
// .keyword_try => .@"try",
// .keyword_await => .@"await",
else => |pref| {
log.err("Pref: {s}", .{@tagName(pref)});
return parser.parsePrimaryExpression();
},
}
return error.not_implemented;
}
fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index {
const node = try parser.appendNode(.{
.type = @field(Node.Type, @tagName(token_id)),
.left = @intCast(parser.lexer.currentTokenIndex(token_id)),
.right = 0,
});
parser.lexer.consume(token_id);
return node;
}
fn parsePrimaryExpression(parser: *Parser) !Node.Index {
const result = switch (parser.lexer.peek() orelse unreachable) {
.number => try parser.nodeFromToken(.number),
.identifier => |identifier| { .identifier => |identifier| {
_ = identifier; const identifier_name = parser.lexer.getIdentifier(identifier);
return error.not_implemented; inline for (@typeInfo(Keyword).Enum.fields) |keyword| {
}, if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) {
} .@"return" => blk: {
} parser.lexer.consume(.identifier);
const node_ref = try parser.appendNode(.{
.type = .@"return",
.left = try parser.parseExpression(),
.right = 0,
});
break :blk node_ref;
},
.@"fn" => blk: {
parser.lexer.consume(.identifier);
// TODO: figure out name association
break :blk try parser.parseFunctionDeclaration();
},
};
}
inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult { unreachable;
return switch (lexer_result.ids.items[parser.id_index]) {
.special_character => .{
.special_character = lexer_result.special_characters.items[parser.special_character_index],
}, },
.identifier => .{ else => |foo| {
.identifier = blk: { std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo });
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end];
},
}, },
}; };
return result;
} }
fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void { fn parseContainerMembers(parser: *Parser) !void {
const token_id = lexer_result.ids.items[parser.id_index]; var container_nodes = ArrayList(Node.Index){};
if (token_id != .special_character) { while (parser.lexer.hasTokens()) {
return error.expected_special_character; const container_node = switch (parser.lexer.peek() orelse unreachable) {
.identifier => |first_identifier_ref| blk: {
parser.lexer.consume(.identifier);
const first_identifier = parser.lexer.getIdentifier(first_identifier_ref);
if (std.mem.eql(u8, first_identifier, "comptime")) {
unreachable;
} else {
const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier);
_ = mutability_qualifier;
const identifier = try parser.appendNode(.{
.type = .identifier,
.left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)),
.right = 0,
});
switch (parser.lexer.peek() orelse unreachable) {
.operator => |operator| switch (operator) {
.colon => unreachable,
.equal => {
parser.lexer.consume(.operator);
const expression = try parser.parseExpression();
break :blk try parser.appendNode(.{
.type = .container_declaration,
.left = expression,
.right = identifier,
});
},
else => unreachable,
},
else => |foo| std.debug.panic("WTF: {}", .{foo}),
}
}
},
else => |a| std.debug.panic("{}", .{a}),
};
try container_nodes.append(parser.allocator, container_node);
} }
defer parser.id_index += 1;
const special_character = lexer_result.special_characters.items[parser.special_character_index];
if (special_character != expected) {
return error.expected_different_special_character;
}
parser.special_character_index += 1;
} }
fn acceptSpecialCharacter() void {}
fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 {
const token_id = lexer_result.ids.items[parser.id_index];
if (token_id != .identifier) {
return Error.expected_identifier;
}
parser.id_index += 1;
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
parser.identifier_index += 1;
const identifier = lexer_result.file[identifier_range.start..identifier_range.end];
const Hash = std.hash.Wyhash;
const seed = @intFromPtr(identifier.ptr);
var hasher = Hash.init(seed);
std.hash.autoHash(&hasher, identifier.ptr);
const hash = hasher.final();
const truncated_hash: u32 = @truncate(hash);
try parser.strings.put(parser.allocator, truncated_hash, identifier);
return truncated_hash;
}
const Error = error{
expected_identifier,
expected_special_character,
expected_different_special_character,
not_implemented,
};
}; };
pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
var parser = Parser{
.allocator = allocator,
.strings = StringMap{},
.functions = ArrayList(Function){},
};
return parser.parse(lexer_result) catch |err| {
std.log.err("error: {}", .{err});
return err;
};
}

View File

@ -1,3 +1,3 @@
main() -> void { const main = fn() i32 {
return 0;
} }