write better lexer and parser
This commit is contained in:
parent
30931dc6f1
commit
e8a2e79a00
17
.vscode/launch.json
vendored
Normal file
17
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "cppvsdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "Debug",
|
||||||
|
"program": "${workspaceFolder}/zig-out/bin/compiler.exe",
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}",
|
||||||
|
"preLaunchTask": "zig build"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
12
.vscode/tasks.json
vendored
Normal file
12
.vscode/tasks.json
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
||||||
|
// for the documentation about the tasks.json format
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "zig build",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "zig build"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
21
src/compiler.zig
Normal file
21
src/compiler.zig
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
||||||
|
const data_structures = @import("data_structures.zig");
|
||||||
|
|
||||||
|
const lexer = @import("lexer.zig");
|
||||||
|
const parser = @import("parser.zig");
|
||||||
|
|
||||||
|
test {
|
||||||
|
_ = lexer;
|
||||||
|
_ = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cycle(allocator: Allocator, file_relative_path: []const u8) !void {
|
||||||
|
const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize));
|
||||||
|
std.debug.print("File:\n\n```\n{s}\n```\n", .{file});
|
||||||
|
const lexer_result = try lexer.lex(allocator, file);
|
||||||
|
const parser_result = try parser.parse(allocator, &lexer_result);
|
||||||
|
_ = parser_result;
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
pub const ArrayList = std.ArrayListUnmanaged;
|
pub const ArrayList = std.ArrayListUnmanaged;
|
||||||
|
pub const HashMap = std.AutoHashMap;
|
||||||
|
117
src/emit.zig
117
src/emit.zig
@ -8,19 +8,12 @@ const expectEqual = std.testing.expectEqual;
|
|||||||
|
|
||||||
const ir = @import("ir.zig");
|
const ir = @import("ir.zig");
|
||||||
|
|
||||||
pub const Result = struct {
|
|
||||||
pub fn free(result: *Result, allocator: Allocator) void {
|
|
||||||
_ = allocator;
|
|
||||||
_ = result;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const Section = struct {
|
const Section = struct {
|
||||||
content: []align(page_size) u8,
|
content: []align(page_size) u8,
|
||||||
index: usize = 0,
|
index: usize = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
const Image = struct {
|
const Result = struct {
|
||||||
sections: struct {
|
sections: struct {
|
||||||
text: Section,
|
text: Section,
|
||||||
rodata: Section,
|
rodata: Section,
|
||||||
@ -28,8 +21,8 @@ const Image = struct {
|
|||||||
},
|
},
|
||||||
entry_point: u32 = 0,
|
entry_point: u32 = 0,
|
||||||
|
|
||||||
fn create() !Image {
|
fn create() !Result {
|
||||||
return Image{
|
return Result{
|
||||||
.sections = .{
|
.sections = .{
|
||||||
.text = .{ .content = try mmap(page_size, .{ .executable = true }) },
|
.text = .{ .content = try mmap(page_size, .{ .executable = true }) },
|
||||||
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
|
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
|
||||||
@ -38,33 +31,47 @@ const Image = struct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn destroy(image: *Image) void {
|
fn destroy(image: *Result) void {
|
||||||
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
|
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
|
||||||
std.os.munmap(@field(image.sections, field_name).content);
|
const section_bytes = @field(image.sections, field_name).content;
|
||||||
|
switch (@import("builtin").os.tag) {
|
||||||
|
.linux => std.os.munmap(section_bytes),
|
||||||
|
.windows => std.os.windows.VirtualFree(section_bytes.ptr, 0, std.os.windows.MEM_RELEASE),
|
||||||
|
else => @compileError("OS not supported"),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline fn mmap(size: usize, flags: packed struct {
|
fn mmap(size: usize, flags: packed struct {
|
||||||
executable: bool,
|
executable: bool,
|
||||||
}) ![]align(page_size) u8 {
|
}) ![]align(page_size) u8 {
|
||||||
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
|
return switch (@import("builtin").os.tag) {
|
||||||
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
|
.windows => blk: {
|
||||||
|
const windows = std.os.windows;
|
||||||
|
break :blk @as([*]align(0x1000) u8, @ptrCast(@alignCast(try windows.VirtualAlloc(null, size, windows.MEM_COMMIT | windows.MEM_RESERVE, windows.PAGE_EXECUTE_READWRITE))))[0..size];
|
||||||
|
},
|
||||||
|
.linux => blk: {
|
||||||
|
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
|
||||||
|
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
|
||||||
|
|
||||||
return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
|
break :blk std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
|
||||||
|
},
|
||||||
|
else => @compileError("OS not supported"),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn appendCode(image: *Image, code: []const u8) void {
|
fn appendCode(image: *Result, code: []const u8) void {
|
||||||
const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
|
const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
|
||||||
@memcpy(destination, code);
|
@memcpy(destination, code);
|
||||||
image.sections.text.index += code.len;
|
image.sections.text.index += code.len;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn appendCodeByte(image: *Image, code_byte: u8) void {
|
fn appendCodeByte(image: *Result, code_byte: u8) void {
|
||||||
image.sections.text.content[image.sections.text.index] = code_byte;
|
image.sections.text.content[image.sections.text.index] = code_byte;
|
||||||
image.sections.text.index += 1;
|
image.sections.text.index += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn getEntryPoint(image: *const Image, comptime Function: type) *const Function {
|
fn getEntryPoint(image: *const Result, comptime Function: type) *const Function {
|
||||||
comptime {
|
comptime {
|
||||||
assert(@typeInfo(Function) == .Fn);
|
assert(@typeInfo(Function) == .Fn);
|
||||||
}
|
}
|
||||||
@ -72,6 +79,16 @@ const Image = struct {
|
|||||||
assert(image.sections.text.content.len > 0);
|
assert(image.sections.text.content.len > 0);
|
||||||
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
|
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn free(result: *Result, allocator: Allocator) void {
|
||||||
|
_ = allocator;
|
||||||
|
inline for (comptime std.meta.fieldNames(@TypeOf(result.sections))) |field_name| {
|
||||||
|
switch (@import("builtin").os.tag) {
|
||||||
|
.windows => unreachable,
|
||||||
|
else => std.os.munmap(@field(result.sections, field_name).content),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const Rex = enum(u8) {
|
const Rex = enum(u8) {
|
||||||
@ -123,7 +140,7 @@ const ret = 0xc3;
|
|||||||
const mov_a_imm = [1]u8{0xb8};
|
const mov_a_imm = [1]u8{0xb8};
|
||||||
const mov_reg_imm8: u8 = 0xb0;
|
const mov_reg_imm8: u8 = 0xb0;
|
||||||
|
|
||||||
inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
|
fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
|
||||||
comptime {
|
comptime {
|
||||||
assert(@typeInfo(@TypeOf(integer)) == .Int);
|
assert(@typeInfo(@TypeOf(integer)) == .Int);
|
||||||
}
|
}
|
||||||
@ -131,7 +148,7 @@ inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
|
|||||||
return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
|
return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline fn movAImm(image: *Image, integer: anytype) void {
|
fn movAImm(image: *Result, integer: anytype) void {
|
||||||
const T = @TypeOf(integer);
|
const T = @TypeOf(integer);
|
||||||
image.appendCode(&(switch (T) {
|
image.appendCode(&(switch (T) {
|
||||||
u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
|
u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
|
||||||
@ -143,8 +160,9 @@ inline fn movAImm(image: *Image, integer: anytype) void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "ret void" {
|
test "ret void" {
|
||||||
var image = try Image.create();
|
const allocator = std.testing.allocator;
|
||||||
defer image.destroy();
|
var image = try Result.create();
|
||||||
|
defer image.free(allocator);
|
||||||
image.appendCodeByte(ret);
|
image.appendCodeByte(ret);
|
||||||
|
|
||||||
const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
|
const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
|
||||||
@ -166,8 +184,8 @@ fn getMaxInteger(comptime T: type) T {
|
|||||||
|
|
||||||
test "ret integer" {
|
test "ret integer" {
|
||||||
inline for (integer_types_to_test) |Int| {
|
inline for (integer_types_to_test) |Int| {
|
||||||
var image = try Image.create();
|
var image = try Result.create();
|
||||||
defer image.destroy();
|
defer image.free(std.testing.allocator);
|
||||||
const expected_number = getMaxInteger(Int);
|
const expected_number = getMaxInteger(Int);
|
||||||
|
|
||||||
movAImm(&image, expected_number);
|
movAImm(&image, expected_number);
|
||||||
@ -185,11 +203,11 @@ const LastByte = packed struct(u8) {
|
|||||||
always_on: u2 = 0b11,
|
always_on: u2 = 0b11,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
|
fn movRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
|
||||||
dstRmSrcR(image, T, .mov, dst, src);
|
dstRmSrcR(image, T, .mov, dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
|
fn dstRmSrcR(image: *Result, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
|
||||||
const last_byte: u8 = @bitCast(LastByte{
|
const last_byte: u8 = @bitCast(LastByte{
|
||||||
.dst = dst,
|
.dst = dst,
|
||||||
.src = src,
|
.src = src,
|
||||||
@ -216,8 +234,9 @@ fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPReg
|
|||||||
|
|
||||||
test "ret integer argument" {
|
test "ret integer argument" {
|
||||||
inline for (integer_types_to_test) |Int| {
|
inline for (integer_types_to_test) |Int| {
|
||||||
var image = try Image.create();
|
const allocator = std.testing.allocator;
|
||||||
defer image.destroy();
|
var image = try Result.create();
|
||||||
|
defer image.free(allocator);
|
||||||
const number = getMaxInteger(Int);
|
const number = getMaxInteger(Int);
|
||||||
|
|
||||||
movRmR(&image, Int, .a, .di);
|
movRmR(&image, Int, .a, .di);
|
||||||
@ -239,14 +258,15 @@ fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
|
fn subRmR(image: *Result, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
|
||||||
dstRmSrcR(image, T, .sub, dst, src);
|
dstRmSrcR(image, T, .sub, dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "ret sub arguments" {
|
test "ret sub arguments" {
|
||||||
inline for (integer_types_to_test) |Int| {
|
inline for (integer_types_to_test) |Int| {
|
||||||
var image = try Image.create();
|
const allocator = std.testing.allocator;
|
||||||
defer image.destroy();
|
var image = try Result.create();
|
||||||
|
defer image.free(allocator);
|
||||||
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
|
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
|
||||||
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
|
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
|
||||||
|
|
||||||
@ -328,10 +348,10 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
|
|||||||
opcode: OpcodeRmR,
|
opcode: OpcodeRmR,
|
||||||
|
|
||||||
pub fn runTest(test_case: @This()) !void {
|
pub fn runTest(test_case: @This()) !void {
|
||||||
|
const allocator = std.testing.allocator;
|
||||||
for (0..10) |_| {
|
for (0..10) |_| {
|
||||||
var image = try Image.create();
|
var image = try Result.create();
|
||||||
defer image.destroy();
|
defer image.free(allocator);
|
||||||
errdefer image.destroy();
|
|
||||||
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
|
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
|
||||||
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
|
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
|
||||||
movRmR(&image, T, .a, .di);
|
movRmR(&image, T, .a, .di);
|
||||||
@ -351,9 +371,9 @@ fn TestIntegerBinaryOperation(comptime T: type) type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "call after" {
|
test "call after" {
|
||||||
var image = try Image.create();
|
const allocator = std.testing.allocator;
|
||||||
defer image.destroy();
|
var image = try Result.create();
|
||||||
errdefer image.destroy();
|
defer image.free(allocator);
|
||||||
const jump_patch_offset = image.sections.text.index + 1;
|
const jump_patch_offset = image.sections.text.index + 1;
|
||||||
image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
|
image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
|
||||||
const jump_source = image.sections.text.index;
|
const jump_source = image.sections.text.index;
|
||||||
@ -367,9 +387,9 @@ test "call after" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test "call before" {
|
test "call before" {
|
||||||
var image = try Image.create();
|
const allocator = std.testing.allocator;
|
||||||
defer image.destroy();
|
var image = try Result.create();
|
||||||
errdefer image.destroy();
|
defer image.free(allocator);
|
||||||
const first_jump_patch_offset = image.sections.text.index + 1;
|
const first_jump_patch_offset = image.sections.text.index + 1;
|
||||||
const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
|
const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
|
||||||
image.appendCode(&first_call);
|
image.appendCode(&first_call);
|
||||||
@ -390,9 +410,20 @@ test "call before" {
|
|||||||
pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result {
|
pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result {
|
||||||
_ = allocator;
|
_ = allocator;
|
||||||
|
|
||||||
|
var image = try Result.create();
|
||||||
|
|
||||||
|
var entry_point: u32 = 0;
|
||||||
|
_ = entry_point;
|
||||||
|
|
||||||
for (ir_result.functions.items) |*function| {
|
for (ir_result.functions.items) |*function| {
|
||||||
_ = function;
|
for (function.instructions.items) |instruction| {
|
||||||
|
switch (instruction.id) {
|
||||||
|
.ret_void => {
|
||||||
|
image.appendCodeByte(ret);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Result{};
|
return image;
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ const void_type = Type{
|
|||||||
const Type = struct {
|
const Type = struct {
|
||||||
id: Id,
|
id: Id,
|
||||||
|
|
||||||
inline fn isPrimitive(T: Type) bool {
|
fn isPrimitive(T: Type) bool {
|
||||||
return switch (T.id) {
|
return switch (T.id) {
|
||||||
.void => true,
|
.void => true,
|
||||||
};
|
};
|
||||||
@ -66,6 +66,7 @@ const Function = struct {
|
|||||||
pub const Result = struct {
|
pub const Result = struct {
|
||||||
top_level_declarations: ArrayList(TopLevelDeclaration),
|
top_level_declarations: ArrayList(TopLevelDeclaration),
|
||||||
functions: ArrayList(Function),
|
functions: ArrayList(Function),
|
||||||
|
instructions: struct {} = .{},
|
||||||
|
|
||||||
pub fn free(result: *Result, allocator: Allocator) void {
|
pub fn free(result: *Result, allocator: Allocator) void {
|
||||||
for (result.functions.items) |*function| {
|
for (result.functions.items) |*function| {
|
||||||
|
176
src/lexer.zig
176
src/lexer.zig
@ -3,95 +3,90 @@ const Allocator = std.mem.Allocator;
|
|||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
const log = std.log;
|
const log = std.log;
|
||||||
|
|
||||||
|
const equal = std.mem.eql;
|
||||||
|
|
||||||
const data_structures = @import("data_structures.zig");
|
const data_structures = @import("data_structures.zig");
|
||||||
const ArrayList = data_structures.ArrayList;
|
const ArrayList = data_structures.ArrayList;
|
||||||
|
|
||||||
const fs = @import("fs.zig");
|
const fs = @import("fs.zig");
|
||||||
|
const parser = @import("parser.zig");
|
||||||
|
|
||||||
pub inline fn rdtsc() u64 {
|
pub const TokenTypeMap = blk: {
|
||||||
var edx: u32 = undefined;
|
var result: [@typeInfo(TokenId).Enum.fields.len]type = undefined;
|
||||||
var eax: u32 = undefined;
|
|
||||||
|
|
||||||
asm volatile (
|
result[@intFromEnum(TokenId.identifier)] = Identifier;
|
||||||
\\rdtsc
|
result[@intFromEnum(TokenId.operator)] = Operator;
|
||||||
: [eax] "={eax}" (eax),
|
result[@intFromEnum(TokenId.number)] = Number;
|
||||||
[edx] "={edx}" (edx),
|
|
||||||
);
|
|
||||||
|
|
||||||
return @as(u64, edx) << 32 | eax;
|
break :blk result;
|
||||||
}
|
|
||||||
|
|
||||||
inline fn rdtscFast() u32 {
|
|
||||||
return asm volatile (
|
|
||||||
\\rdtsc
|
|
||||||
: [eax] "={eax}" (-> u32),
|
|
||||||
:
|
|
||||||
: "edx"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const vector_byte_count = 16;
|
|
||||||
// These two actually take less space due to how Zig handles bool as u1
|
|
||||||
const VBool = @Vector(vector_byte_count, bool);
|
|
||||||
const VU1 = @Vector(vector_byte_count, u1);
|
|
||||||
|
|
||||||
const VU8 = @Vector(vector_byte_count, u8);
|
|
||||||
|
|
||||||
inline fn vand(v1: VBool, v2: VBool) VBool {
|
|
||||||
return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline fn byteMask(n: u8) VU8 {
|
|
||||||
return @splat(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline fn endOfIdentifier(ch: u8) bool {
|
|
||||||
// TODO: complete
|
|
||||||
return ch == ' ' or ch == '(' or ch == ')';
|
|
||||||
}
|
|
||||||
|
|
||||||
const Identifier = struct {
|
|
||||||
start: u32,
|
|
||||||
end: u32,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const Identifier = parser.Node;
|
||||||
|
|
||||||
pub const TokenId = enum {
|
pub const TokenId = enum {
|
||||||
identifier,
|
identifier,
|
||||||
special_character,
|
operator,
|
||||||
|
number,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const SpecialCharacter = enum(u8) {
|
pub const Operator = enum(u8) {
|
||||||
arrow = 0,
|
|
||||||
left_parenthesis = '(',
|
left_parenthesis = '(',
|
||||||
right_parenthesis = ')',
|
right_parenthesis = ')',
|
||||||
left_brace = '{',
|
left_brace = '{',
|
||||||
right_brace = '}',
|
right_brace = '}',
|
||||||
|
equal = '=',
|
||||||
|
colon = ':',
|
||||||
|
semicolon = ';',
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Number = struct {
|
||||||
|
content: union(enum) {
|
||||||
|
float: f64,
|
||||||
|
integer: Integer,
|
||||||
|
},
|
||||||
|
|
||||||
|
const Integer = struct {
|
||||||
|
value: u64,
|
||||||
|
is_negative: bool,
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Result = struct {
|
pub const Result = struct {
|
||||||
identifiers: ArrayList(Identifier),
|
arrays: struct {
|
||||||
special_characters: ArrayList(SpecialCharacter),
|
identifier: ArrayList(Identifier),
|
||||||
ids: ArrayList(TokenId),
|
operator: ArrayList(Operator),
|
||||||
|
number: ArrayList(Number),
|
||||||
|
id: ArrayList(TokenId),
|
||||||
|
},
|
||||||
file: []const u8,
|
file: []const u8,
|
||||||
time: u64 = 0,
|
time: u64 = 0,
|
||||||
|
|
||||||
pub fn free(result: *Result, allocator: Allocator) void {
|
pub fn free(result: *Result, allocator: Allocator) void {
|
||||||
result.identifiers.clearAndFree(allocator);
|
inline for (@typeInfo(@TypeOf(result.arrays)).Struct.fields) |field| {
|
||||||
result.special_characters.clearAndFree(allocator);
|
@field(result.arrays, field.name).clearAndFree(allocator);
|
||||||
result.ids.clearAndFree(allocator);
|
}
|
||||||
allocator.free(result.file);
|
}
|
||||||
|
|
||||||
|
fn appendToken(result: *Result, comptime token_id: TokenId, token_value: TokenTypeMap[@intFromEnum(token_id)]) void {
|
||||||
|
// const index = result.arrays.id.items.len;
|
||||||
|
@field(result.arrays, @tagName(token_id)).appendAssumeCapacity(token_value);
|
||||||
|
result.arrays.id.appendAssumeCapacity(token_id);
|
||||||
|
// log.err("Token #{}: {s} {}", .{ index, @tagName(token_id), token_value });
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
fn lex(allocator: Allocator, text: []const u8) !Result {
|
pub fn lex(allocator: Allocator, text: []const u8) !Result {
|
||||||
const time_start = std.time.Instant.now() catch unreachable;
|
const time_start = std.time.Instant.now() catch unreachable;
|
||||||
|
|
||||||
var index: usize = 0;
|
var index: usize = 0;
|
||||||
|
|
||||||
var result = Result{
|
var result = Result{
|
||||||
.identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len),
|
.arrays = .{
|
||||||
.special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len),
|
.identifier = try ArrayList(Identifier).initCapacity(allocator, text.len),
|
||||||
.ids = try ArrayList(TokenId).initCapacity(allocator, text.len),
|
.operator = try ArrayList(Operator).initCapacity(allocator, text.len),
|
||||||
|
.number = try ArrayList(Number).initCapacity(allocator, text.len),
|
||||||
|
.id = try ArrayList(TokenId).initCapacity(allocator, text.len),
|
||||||
|
},
|
||||||
.file = text,
|
.file = text,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -105,35 +100,47 @@ fn lex(allocator: Allocator, text: []const u8) !Result {
|
|||||||
switch (first_char) {
|
switch (first_char) {
|
||||||
'a'...'z', 'A'...'Z', '_' => {
|
'a'...'z', 'A'...'Z', '_' => {
|
||||||
const start = index;
|
const start = index;
|
||||||
// SIMD this
|
while (true) {
|
||||||
while (!endOfIdentifier(text[index])) {
|
const ch = text[index];
|
||||||
|
if ((ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_' or (ch >= '0' and ch <= '9')) {
|
||||||
|
index += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.appendToken(.identifier, .{
|
||||||
|
.left = @intCast(start),
|
||||||
|
.right = @intCast(index),
|
||||||
|
.type = .identifier,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
'(', ')', '{', '}', '-', '=', ';' => |operator| {
|
||||||
|
result.appendToken(.operator, @enumFromInt(operator));
|
||||||
|
index += 1;
|
||||||
|
},
|
||||||
|
'0'...'9' => {
|
||||||
|
const start = index;
|
||||||
|
|
||||||
|
while (text[index] >= '0' and text[index] <= '9') {
|
||||||
index += 1;
|
index += 1;
|
||||||
}
|
}
|
||||||
|
const end = index;
|
||||||
result.identifiers.appendAssumeCapacity(.{
|
const number_slice = text[start..end];
|
||||||
.start = @intCast(start),
|
const number = try std.fmt.parseInt(u64, number_slice, 10);
|
||||||
.end = @intCast(index),
|
result.appendToken(.number, .{
|
||||||
|
.content = .{
|
||||||
|
.integer = .{
|
||||||
|
.value = number,
|
||||||
|
.is_negative = false,
|
||||||
|
},
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
result.ids.appendAssumeCapacity(.identifier);
|
|
||||||
},
|
|
||||||
'(', ')', '{', '}' => |special_character| {
|
|
||||||
result.special_characters.appendAssumeCapacity(@enumFromInt(special_character));
|
|
||||||
result.ids.appendAssumeCapacity(.special_character);
|
|
||||||
index += 1;
|
|
||||||
},
|
},
|
||||||
' ', '\n' => index += 1,
|
' ', '\n' => index += 1,
|
||||||
'-' => {
|
else => |foo| {
|
||||||
if (text[index + 1] == '>') {
|
|
||||||
result.special_characters.appendAssumeCapacity(.arrow);
|
|
||||||
result.ids.appendAssumeCapacity(.special_character);
|
|
||||||
index += 2;
|
|
||||||
} else {
|
|
||||||
@panic("TODO");
|
|
||||||
}
|
|
||||||
},
|
|
||||||
else => {
|
|
||||||
index += 1;
|
index += 1;
|
||||||
|
std.debug.panic("NI: {c}", .{foo});
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -141,16 +148,11 @@ fn lex(allocator: Allocator, text: []const u8) !Result {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn runTest(allocator: Allocator, file: []const u8) !Result {
|
|
||||||
const result = try lex(allocator, file);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
test "lexer" {
|
test "lexer" {
|
||||||
const allocator = std.testing.allocator;
|
const allocator = std.testing.allocator;
|
||||||
const file_path = fs.first;
|
const file_path = fs.first;
|
||||||
const file = try fs.readFile(allocator, file_path);
|
const file = try fs.readFile(allocator, file_path);
|
||||||
var result = try runTest(allocator, file);
|
defer allocator.free(file);
|
||||||
|
var result = try lex(allocator, file);
|
||||||
defer result.free(allocator);
|
defer result.free(allocator);
|
||||||
}
|
}
|
||||||
|
33
src/main.zig
33
src/main.zig
@ -2,44 +2,17 @@ const std = @import("std");
|
|||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
|
|
||||||
|
const compiler = @import("compiler.zig");
|
||||||
const fs = @import("fs.zig");
|
const fs = @import("fs.zig");
|
||||||
|
|
||||||
const lexer = @import("lexer.zig");
|
|
||||||
const parser = @import("parser.zig");
|
|
||||||
const ir = @import("ir.zig");
|
|
||||||
const emit = @import("emit.zig");
|
|
||||||
|
|
||||||
pub const seed = std.math.maxInt(u64);
|
pub const seed = std.math.maxInt(u64);
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
const allocator = gpa.allocator();
|
const allocator = gpa.allocator();
|
||||||
try behaviorTest(allocator, fs.first);
|
try compiler.cycle(allocator, fs.first);
|
||||||
}
|
|
||||||
|
|
||||||
fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void {
|
|
||||||
const file = try fs.readFile(allocator, file_relative_path);
|
|
||||||
var lexer_result = try lexer.runTest(allocator, file);
|
|
||||||
defer lexer_result.free(allocator);
|
|
||||||
var parser_result = parser.runTest(allocator, &lexer_result) catch |err| {
|
|
||||||
std.log.err("Lexer took {} ns", .{lexer_result.time});
|
|
||||||
return err;
|
|
||||||
};
|
|
||||||
defer parser_result.free(allocator);
|
|
||||||
var ir_result = try ir.runTest(allocator, &parser_result);
|
|
||||||
defer ir_result.free(allocator);
|
|
||||||
var emit_result = try emit.runTest(allocator, &ir_result);
|
|
||||||
defer emit_result.free(allocator);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test {
|
test {
|
||||||
_ = lexer;
|
_ = compiler;
|
||||||
_ = parser;
|
|
||||||
_ = ir;
|
|
||||||
_ = emit;
|
|
||||||
}
|
|
||||||
|
|
||||||
test "behavior test 1" {
|
|
||||||
const allocator = std.testing.allocator;
|
|
||||||
try behaviorTest(allocator, fs.first);
|
|
||||||
}
|
}
|
||||||
|
535
src/parser.zig
535
src/parser.zig
@ -1,195 +1,434 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
|
const log = std.log;
|
||||||
|
|
||||||
const data_structures = @import("data_structures.zig");
|
const data_structures = @import("data_structures.zig");
|
||||||
const ArrayList = data_structures.ArrayList;
|
const ArrayList = data_structures.ArrayList;
|
||||||
|
const HashMap = data_structures.HashMap;
|
||||||
|
|
||||||
const lexer = @import("lexer.zig");
|
const lexer = @import("lexer.zig");
|
||||||
|
|
||||||
pub const Result = struct {
|
pub const Result = struct {
|
||||||
functions: ArrayList(Function),
|
function_map: ArrayList(lexer.Identifier),
|
||||||
strings: StringMap,
|
nodes: ArrayList(Node),
|
||||||
|
|
||||||
pub fn free(result: *Result, allocator: Allocator) void {
|
pub fn free(result: *Result, allocator: Allocator) void {
|
||||||
result.functions.clearAndFree(allocator);
|
result.functions.clearAndFree(allocator);
|
||||||
result.strings.clearAndFree(allocator);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const Node = packed struct(u64) {
|
||||||
|
type: Type,
|
||||||
|
left: Node.Index,
|
||||||
|
right: Node.Index,
|
||||||
|
|
||||||
|
pub const Index = u27;
|
||||||
|
|
||||||
|
pub const Type = enum(u10) {
|
||||||
|
root = 0,
|
||||||
|
identifier = 1,
|
||||||
|
number = 2,
|
||||||
|
@"return" = 3,
|
||||||
|
block_one = 4,
|
||||||
|
function_declaration_no_arguments = 5,
|
||||||
|
container_declaration = 6,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const Error = error{
|
||||||
|
unexpected_token,
|
||||||
|
not_implemented,
|
||||||
|
OutOfMemory,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn parse(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
|
||||||
|
var parser = Parser{
|
||||||
|
.allocator = allocator,
|
||||||
|
.nodes = ArrayList(Node){},
|
||||||
|
.function_map = ArrayList(lexer.Identifier){},
|
||||||
|
.lexer = .{
|
||||||
|
.result = lexer_result,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
errdefer parser.free();
|
||||||
|
|
||||||
|
const node_index = try parser.appendNode(Node{
|
||||||
|
.type = .root,
|
||||||
|
.left = 0,
|
||||||
|
.right = 0,
|
||||||
|
});
|
||||||
|
_ = node_index;
|
||||||
|
|
||||||
|
const members = try parser.parseContainerMembers();
|
||||||
|
_ = members;
|
||||||
|
|
||||||
|
return Result{
|
||||||
|
.function_map = parser.function_map,
|
||||||
|
.nodes = parser.nodes,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const ExpressionMutabilityQualifier = enum {
|
||||||
|
@"const",
|
||||||
|
@"var",
|
||||||
|
};
|
||||||
|
|
||||||
|
const Keyword = enum {
|
||||||
|
@"return",
|
||||||
|
@"fn",
|
||||||
|
};
|
||||||
|
|
||||||
const PeekResult = union(lexer.TokenId) {
|
const PeekResult = union(lexer.TokenId) {
|
||||||
special_character: lexer.SpecialCharacter,
|
identifier: lexer.Identifier,
|
||||||
identifier: []const u8,
|
operator: lexer.Operator,
|
||||||
|
number: lexer.Number,
|
||||||
};
|
};
|
||||||
|
|
||||||
const Function = struct {
|
const Lexer = struct {
|
||||||
name: u32,
|
result: *const lexer.Result,
|
||||||
return_type: u32,
|
indices: struct {
|
||||||
arguments: ArrayList(Argument),
|
identifier: u32 = 0,
|
||||||
statements: ArrayList(Statement),
|
operator: u32 = 0,
|
||||||
|
number: u32 = 0,
|
||||||
|
id: u32 = 0,
|
||||||
|
} = .{},
|
||||||
|
|
||||||
const Argument = struct {
|
fn hasTokens(l: *const Lexer) bool {
|
||||||
foo: u32 = 0,
|
return l.indices.id < l.result.arrays.id.items.len;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
fn currentTokenIndex(l: *const Lexer, comptime token_id: lexer.TokenId) u32 {
|
||||||
|
assert(l.isCurrentToken(token_id));
|
||||||
|
return @field(l.indices, @tagName(token_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume(l: *Lexer, comptime token_id: lexer.TokenId) void {
|
||||||
|
assert(l.isCurrentToken(token_id));
|
||||||
|
l.indices.id += 1;
|
||||||
|
const index_ptr = &@field(l.indices, @tagName(token_id));
|
||||||
|
const index = index_ptr.*;
|
||||||
|
const token_value = @field(l.result.arrays, @tagName(token_id)).items[index];
|
||||||
|
log.err("Consuming {s} ({})...", .{ @tagName(token_id), token_value });
|
||||||
|
|
||||||
|
index_ptr.* += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isCurrentToken(l: *const Lexer, token_id: lexer.TokenId) bool {
|
||||||
|
return l.result.arrays.id.items[l.indices.id] == token_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn getIdentifier(l: *const Lexer, identifier: Node) []const u8 {
|
||||||
|
comptime {
|
||||||
|
assert(lexer.Identifier == Node);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(identifier.type == .identifier);
|
||||||
|
|
||||||
|
return l.result.file[identifier.left..][0 .. identifier.right - identifier.left];
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expectTokenType(l: *Lexer, comptime expected_token_id: lexer.TokenId) !lexer.TokenTypeMap[@intFromEnum(expected_token_id)] {
|
||||||
|
const peek_result = l.peek() orelse return error.not_implemented;
|
||||||
|
return switch (peek_result) {
|
||||||
|
expected_token_id => |token| blk: {
|
||||||
|
l.consume(expected_token_id);
|
||||||
|
break :blk token;
|
||||||
|
},
|
||||||
|
else => error.not_implemented,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expectTokenTypeIndex(l: *Lexer, comptime expected_token_id: lexer.TokenId) !u32 {
|
||||||
|
const peek_result = l.peek() orelse return error.not_implemented;
|
||||||
|
return switch (peek_result) {
|
||||||
|
expected_token_id => blk: {
|
||||||
|
const index = l.currentTokenIndex(expected_token_id);
|
||||||
|
l.consume(expected_token_id);
|
||||||
|
break :blk index;
|
||||||
|
},
|
||||||
|
else => error.not_implemented,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expectSpecificToken(l: *Lexer, comptime expected_token_id: lexer.TokenId, expected_token: lexer.TokenTypeMap[@intFromEnum(expected_token_id)]) !void {
|
||||||
|
const peek_result = l.peek() orelse return error.not_implemented;
|
||||||
|
switch (peek_result) {
|
||||||
|
expected_token_id => |token| {
|
||||||
|
if (expected_token != token) {
|
||||||
|
return error.not_implemented;
|
||||||
|
}
|
||||||
|
|
||||||
|
l.consume(expected_token_id);
|
||||||
|
},
|
||||||
|
else => |token| {
|
||||||
|
std.debug.panic("{s}", .{@tagName(token)});
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn maybeExpectOperator(l: *Lexer, expected_operator: lexer.Operator) bool {
|
||||||
|
return switch (l.peek() orelse unreachable) {
|
||||||
|
.operator => |operator| {
|
||||||
|
const result = operator == expected_operator;
|
||||||
|
if (result) {
|
||||||
|
l.consume(.operator);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
},
|
||||||
|
else => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(l: *const Lexer) ?PeekResult {
|
||||||
|
if (l.indices.id >= l.result.arrays.id.items.len) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return switch (l.result.arrays.id.items[l.indices.id]) {
|
||||||
|
inline else => |token| blk: {
|
||||||
|
const tag = @tagName(token);
|
||||||
|
const index = @field(l.indices, tag);
|
||||||
|
const array = &@field(l.result.arrays, tag);
|
||||||
|
|
||||||
|
break :blk @unionInit(PeekResult, tag, array.items[index]);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const Statement = struct {
|
|
||||||
foo: u32 = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const StringMap = std.AutoHashMapUnmanaged(u32, []const u8);
|
|
||||||
|
|
||||||
const Parser = struct {
|
const Parser = struct {
|
||||||
id_index: u32 = 0,
|
lexer: Lexer,
|
||||||
identifier_index: u32 = 0,
|
nodes: ArrayList(Node),
|
||||||
special_character_index: u32 = 0,
|
function_map: ArrayList(lexer.Identifier),
|
||||||
strings: StringMap,
|
|
||||||
allocator: Allocator,
|
allocator: Allocator,
|
||||||
functions: ArrayList(Function),
|
|
||||||
|
|
||||||
fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result {
|
fn appendNode(parser: *Parser, node: Node) !Node.Index {
|
||||||
while (parser.id_index < lexer_result.ids.items.len) {
|
const index = parser.nodes.items.len;
|
||||||
try parser.parseTopLevelDeclaration(lexer_result);
|
try parser.nodes.append(parser.allocator, node);
|
||||||
}
|
return @intCast(index);
|
||||||
|
}
|
||||||
|
|
||||||
return Result{
|
fn getNode(parser: *Parser, node_index: Node.Index) *Node {
|
||||||
.functions = parser.functions,
|
return &parser.nodes.items[node_index];
|
||||||
.strings = parser.strings,
|
}
|
||||||
|
|
||||||
|
fn free(parser: *Parser) void {
|
||||||
|
_ = parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseTypeExpression(parser: *Parser) !Node.Index {
|
||||||
|
// TODO: make this decent
|
||||||
|
return switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.identifier => parser.nodeFromToken(.identifier),
|
||||||
|
else => unreachable,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function {
|
fn parseFunctionDeclaration(parser: *Parser) !Node.Index {
|
||||||
assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis);
|
try parser.lexer.expectSpecificToken(.operator, .left_parenthesis);
|
||||||
parser.consume(lexer_result, .special_character);
|
while (!parser.lexer.maybeExpectOperator(.right_parenthesis)) {
|
||||||
|
|
||||||
while (true) {
|
|
||||||
if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) {
|
|
||||||
break;
|
|
||||||
} else |_| {}
|
|
||||||
|
|
||||||
return error.not_implemented;
|
return error.not_implemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
try parser.expectSpecialCharacter(lexer_result, .arrow);
|
const t = try parser.parseTypeExpression();
|
||||||
|
const function_declaration = try parser.appendNode(.{
|
||||||
const return_type_identifier = try parser.expectIdentifier(lexer_result);
|
.type = .function_declaration_no_arguments,
|
||||||
|
.left = t,
|
||||||
try parser.expectSpecialCharacter(lexer_result, .left_brace);
|
.right = try parser.parseBlock(),
|
||||||
|
});
|
||||||
while (true) {
|
return function_declaration;
|
||||||
if (parser.expectSpecialCharacter(lexer_result, .right_brace)) {
|
|
||||||
break;
|
|
||||||
} else |_| {}
|
|
||||||
|
|
||||||
return error.not_implemented;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Function{
|
|
||||||
.name = name,
|
|
||||||
.statements = ArrayList(Statement){},
|
|
||||||
.arguments = ArrayList(Function.Argument){},
|
|
||||||
.return_type = return_type_identifier,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void {
|
fn parseBlock(parser: *Parser) !Node.Index {
|
||||||
assert(lexer_result.ids.items[parser.id_index] == token_id);
|
try parser.lexer.expectSpecificToken(.operator, .left_brace);
|
||||||
parser.id_index += 1;
|
|
||||||
switch (token_id) {
|
var statements = ArrayList(Node.Index){};
|
||||||
.special_character => parser.special_character_index += 1,
|
|
||||||
.identifier => parser.identifier_index += 1,
|
while (!parser.lexer.maybeExpectOperator(.right_brace)) {
|
||||||
|
const statement = try parser.parseStatement();
|
||||||
|
try statements.append(parser.allocator, statement);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void {
|
const node: Node = switch (statements.items.len) {
|
||||||
const top_level_identifier = try parser.expectIdentifier(lexer_result);
|
0 => unreachable,
|
||||||
const next_token = parser.peek(lexer_result);
|
1 => .{
|
||||||
|
.type = .block_one,
|
||||||
switch (next_token) {
|
.left = statements.items[0],
|
||||||
.special_character => |special_character| switch (special_character) {
|
.right = 0,
|
||||||
.left_parenthesis => {
|
|
||||||
const function = try parser.parseFunction(lexer_result, top_level_identifier);
|
|
||||||
try parser.functions.append(parser.allocator, function);
|
|
||||||
},
|
|
||||||
else => return error.not_implemented,
|
|
||||||
},
|
},
|
||||||
|
else => unreachable,
|
||||||
|
};
|
||||||
|
log.debug("Parsed block!", .{});
|
||||||
|
return parser.appendNode(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseStatement(parser: *Parser) !Node.Index {
|
||||||
|
// TODO: more stuff before
|
||||||
|
const expression = try parser.parseAssignExpression();
|
||||||
|
try parser.lexer.expectSpecificToken(.operator, .semicolon);
|
||||||
|
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseAssignExpression(parser: *Parser) !Node.Index {
|
||||||
|
const expression = try parser.parseExpression();
|
||||||
|
switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.operator => |operator| switch (operator) {
|
||||||
|
.semicolon => return expression,
|
||||||
|
else => unreachable,
|
||||||
|
},
|
||||||
|
else => unreachable,
|
||||||
|
}
|
||||||
|
|
||||||
|
return error.not_implemented;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseExpression(parser: *Parser) Error!Node.Index {
|
||||||
|
return parser.parseExpressionPrecedence(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parseExpressionPrecedence(parser: *Parser, minimum_precedence: i32) !Node.Index {
|
||||||
|
var expr_index = try parser.parsePrefixExpression();
|
||||||
|
log.debug("Expr index: {}", .{expr_index});
|
||||||
|
|
||||||
|
var banned_precedence: i32 = -1;
|
||||||
|
while (parser.lexer.hasTokens()) {
|
||||||
|
const precedence: i32 = switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.operator => |operator| switch (operator) {
|
||||||
|
.semicolon => -1,
|
||||||
|
else => @panic(@tagName(operator)),
|
||||||
|
},
|
||||||
|
else => |foo| std.debug.panic("Foo: ({s}) {}", .{ @tagName(foo), foo }),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (precedence < minimum_precedence) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (precedence == banned_precedence) {
|
||||||
|
unreachable;
|
||||||
|
}
|
||||||
|
|
||||||
|
const node_index = try parser.parseExpressionPrecedence(1);
|
||||||
|
_ = node_index;
|
||||||
|
|
||||||
|
unreachable;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.err("Parsed expression precedence", .{});
|
||||||
|
|
||||||
|
return expr_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parsePrefixExpression(parser: *Parser) !Node.Index {
|
||||||
|
switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
// .bang => .bool_not,
|
||||||
|
// .minus => .negation,
|
||||||
|
// .tilde => .bit_not,
|
||||||
|
// .minus_percent => .negation_wrap,
|
||||||
|
// .ampersand => .address_of,
|
||||||
|
// .keyword_try => .@"try",
|
||||||
|
// .keyword_await => .@"await",
|
||||||
|
|
||||||
|
else => |pref| {
|
||||||
|
log.err("Pref: {s}", .{@tagName(pref)});
|
||||||
|
return parser.parsePrimaryExpression();
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return error.not_implemented;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nodeFromToken(parser: *Parser, comptime token_id: lexer.TokenId) !Node.Index {
|
||||||
|
const node = try parser.appendNode(.{
|
||||||
|
.type = @field(Node.Type, @tagName(token_id)),
|
||||||
|
.left = @intCast(parser.lexer.currentTokenIndex(token_id)),
|
||||||
|
.right = 0,
|
||||||
|
});
|
||||||
|
parser.lexer.consume(token_id);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parsePrimaryExpression(parser: *Parser) !Node.Index {
|
||||||
|
const result = switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.number => try parser.nodeFromToken(.number),
|
||||||
.identifier => |identifier| {
|
.identifier => |identifier| {
|
||||||
_ = identifier;
|
const identifier_name = parser.lexer.getIdentifier(identifier);
|
||||||
return error.not_implemented;
|
inline for (@typeInfo(Keyword).Enum.fields) |keyword| {
|
||||||
},
|
if (std.mem.eql(u8, identifier_name, keyword.name)) return switch (@as(Keyword, @enumFromInt(keyword.value))) {
|
||||||
}
|
.@"return" => blk: {
|
||||||
}
|
parser.lexer.consume(.identifier);
|
||||||
|
const node_ref = try parser.appendNode(.{
|
||||||
|
.type = .@"return",
|
||||||
|
.left = try parser.parseExpression(),
|
||||||
|
.right = 0,
|
||||||
|
});
|
||||||
|
break :blk node_ref;
|
||||||
|
},
|
||||||
|
.@"fn" => blk: {
|
||||||
|
parser.lexer.consume(.identifier);
|
||||||
|
// TODO: figure out name association
|
||||||
|
break :blk try parser.parseFunctionDeclaration();
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult {
|
unreachable;
|
||||||
return switch (lexer_result.ids.items[parser.id_index]) {
|
|
||||||
.special_character => .{
|
|
||||||
.special_character = lexer_result.special_characters.items[parser.special_character_index],
|
|
||||||
},
|
},
|
||||||
.identifier => .{
|
else => |foo| {
|
||||||
.identifier = blk: {
|
std.debug.panic("foo: {s}. {}", .{ @tagName(foo), foo });
|
||||||
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
|
|
||||||
break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end];
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void {
|
fn parseContainerMembers(parser: *Parser) !void {
|
||||||
const token_id = lexer_result.ids.items[parser.id_index];
|
var container_nodes = ArrayList(Node.Index){};
|
||||||
if (token_id != .special_character) {
|
while (parser.lexer.hasTokens()) {
|
||||||
return error.expected_special_character;
|
const container_node = switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.identifier => |first_identifier_ref| blk: {
|
||||||
|
parser.lexer.consume(.identifier);
|
||||||
|
|
||||||
|
const first_identifier = parser.lexer.getIdentifier(first_identifier_ref);
|
||||||
|
|
||||||
|
if (std.mem.eql(u8, first_identifier, "comptime")) {
|
||||||
|
unreachable;
|
||||||
|
} else {
|
||||||
|
const mutability_qualifier: ExpressionMutabilityQualifier = if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"const"))) .@"const" else if (std.mem.eql(u8, first_identifier, @tagName(ExpressionMutabilityQualifier.@"var"))) .@"var" else @panic(first_identifier);
|
||||||
|
_ = mutability_qualifier;
|
||||||
|
|
||||||
|
const identifier = try parser.appendNode(.{
|
||||||
|
.type = .identifier,
|
||||||
|
.left = @intCast(try parser.lexer.expectTokenTypeIndex(.identifier)),
|
||||||
|
.right = 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
switch (parser.lexer.peek() orelse unreachable) {
|
||||||
|
.operator => |operator| switch (operator) {
|
||||||
|
.colon => unreachable,
|
||||||
|
.equal => {
|
||||||
|
parser.lexer.consume(.operator);
|
||||||
|
|
||||||
|
const expression = try parser.parseExpression();
|
||||||
|
break :blk try parser.appendNode(.{
|
||||||
|
.type = .container_declaration,
|
||||||
|
.left = expression,
|
||||||
|
.right = identifier,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
else => unreachable,
|
||||||
|
},
|
||||||
|
else => |foo| std.debug.panic("WTF: {}", .{foo}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => |a| std.debug.panic("{}", .{a}),
|
||||||
|
};
|
||||||
|
|
||||||
|
try container_nodes.append(parser.allocator, container_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
defer parser.id_index += 1;
|
|
||||||
|
|
||||||
const special_character = lexer_result.special_characters.items[parser.special_character_index];
|
|
||||||
if (special_character != expected) {
|
|
||||||
return error.expected_different_special_character;
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.special_character_index += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn acceptSpecialCharacter() void {}
|
|
||||||
|
|
||||||
fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 {
|
|
||||||
const token_id = lexer_result.ids.items[parser.id_index];
|
|
||||||
if (token_id != .identifier) {
|
|
||||||
return Error.expected_identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
parser.id_index += 1;
|
|
||||||
|
|
||||||
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
|
|
||||||
parser.identifier_index += 1;
|
|
||||||
const identifier = lexer_result.file[identifier_range.start..identifier_range.end];
|
|
||||||
const Hash = std.hash.Wyhash;
|
|
||||||
const seed = @intFromPtr(identifier.ptr);
|
|
||||||
var hasher = Hash.init(seed);
|
|
||||||
std.hash.autoHash(&hasher, identifier.ptr);
|
|
||||||
const hash = hasher.final();
|
|
||||||
const truncated_hash: u32 = @truncate(hash);
|
|
||||||
try parser.strings.put(parser.allocator, truncated_hash, identifier);
|
|
||||||
return truncated_hash;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Error = error{
|
|
||||||
expected_identifier,
|
|
||||||
expected_special_character,
|
|
||||||
expected_different_special_character,
|
|
||||||
not_implemented,
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
|
|
||||||
var parser = Parser{
|
|
||||||
.allocator = allocator,
|
|
||||||
.strings = StringMap{},
|
|
||||||
.functions = ArrayList(Function){},
|
|
||||||
};
|
|
||||||
|
|
||||||
return parser.parse(lexer_result) catch |err| {
|
|
||||||
std.log.err("error: {}", .{err});
|
|
||||||
return err;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
main() -> void {
|
const main = fn() i32 {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user