From 9cbb03256bc4dc846232c37b710ead004ec4555f Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 19 Feb 2025 19:13:05 -0600 Subject: [PATCH] Parse simple file with null storage and result --- build.zig | 182 +++++++++++++--------- src/LLVM.zig | 7 +- src/main.zig | 7 +- src/parser.zig | 401 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 517 insertions(+), 80 deletions(-) create mode 100644 src/parser.zig diff --git a/build.zig b/build.zig index 50a0d2b..8daab98 100644 --- a/build.zig +++ b/build.zig @@ -49,80 +49,83 @@ const LLVM = struct { module: *std.Build.Module, fn setup(b: *std.Build, path: []const u8, target: std.Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) !LLVM { - var llvm_libs = std.ArrayList([]const u8).init(b.allocator); - var flags = std.ArrayList([]const u8).init(b.allocator); - const llvm_config_path = if (b.option([]const u8, "llvm_prefix", "LLVM prefix")) |llvm_prefix| blk: { - const full_path = try std.mem.concat(b.allocator, u8, &.{ llvm_prefix, "/bin/llvm-config" }); - const f = std.fs.cwd().openFile(full_path, .{}) catch return error.llvm_not_found; - f.close(); - break :blk full_path; - } else executable_find_in_path(b.allocator, "llvm-config", path) orelse return error.llvm_not_found; - const llvm_components_result = try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--components" }); - var it = std.mem.splitScalar(u8, llvm_components_result, ' '); - var args = std.ArrayList([]const u8).init(b.allocator); - try args.append(llvm_config_path); - try args.append("--libs"); - while (it.next()) |component| { - try args.append(std.mem.trimRight(u8, component, "\n")); + if (enable_llvm) { + var llvm_libs = std.ArrayList([]const u8).init(b.allocator); + var flags = std.ArrayList([]const u8).init(b.allocator); + const llvm_config_path = if (b.option([]const u8, "llvm_prefix", "LLVM prefix")) |llvm_prefix| blk: { + const full_path = try std.mem.concat(b.allocator, u8, &.{ llvm_prefix, "/bin/llvm-config" }); + const f = std.fs.cwd().openFile(full_path, .{}) catch return error.llvm_not_found; + f.close(); + break :blk full_path; + } else executable_find_in_path(b.allocator, "llvm-config", path) orelse return error.llvm_not_found; + const llvm_components_result = try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--components" }); + var it = std.mem.splitScalar(u8, llvm_components_result, ' '); + var args = std.ArrayList([]const u8).init(b.allocator); + try args.append(llvm_config_path); + try args.append("--libs"); + while (it.next()) |component| { + try args.append(std.mem.trimRight(u8, component, "\n")); + } + const llvm_libs_result = try run_process_and_capture_stdout(b, args.items); + it = std.mem.splitScalar(u8, llvm_libs_result, ' '); + + while (it.next()) |lib| { + const llvm_lib = std.mem.trimLeft(u8, std.mem.trimRight(u8, lib, "\n"), "-l"); + try llvm_libs.append(llvm_lib); + } + + const llvm_cxx_flags_result = try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--cxxflags" }); + it = std.mem.splitScalar(u8, llvm_cxx_flags_result, ' '); + while (it.next()) |flag| { + const llvm_cxx_flag = std.mem.trimRight(u8, flag, "\n"); + try flags.append(llvm_cxx_flag); + } + + const llvm_lib_dir = std.mem.trimRight(u8, try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--libdir" }), "\n"); + + if (optimize != .ReleaseSmall) { + try flags.append("-g"); + } + + try flags.append("-fno-rtti"); + + const llvm = b.createModule(.{ + .target = target, + .optimize = optimize, + }); + + llvm.addLibraryPath(.{ .cwd_relative = llvm_lib_dir }); + + llvm.addCSourceFiles(.{ + .files = &.{"src/llvm.cpp"}, + .flags = flags.items, + }); + llvm.addIncludePath(.{ .cwd_relative = "/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/14.2.1/../../../../include/c++/14.2.1" }); + llvm.addIncludePath(.{ .cwd_relative = "/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/14.2.1/../../../../include/c++/14.2.1/x86_64-pc-linux-gnu" }); + llvm.addObjectFile(.{ .cwd_relative = "/usr/lib/libstdc++.so.6" }); + + const needed_libraries: []const []const u8 = &.{ "unwind", "z" }; + + const lld_libs: []const []const u8 = &.{ "lldCommon", "lldCOFF", "lldELF", "lldMachO", "lldMinGW", "lldWasm" }; + + for (needed_libraries) |lib| { + llvm.linkSystemLibrary(lib, .{}); + } + + for (llvm_libs.items) |lib| { + llvm.linkSystemLibrary(lib, .{}); + } + + for (lld_libs) |lib| { + llvm.linkSystemLibrary(lib, .{}); + } + + return LLVM{ + .module = llvm, + }; + } else { + return undefined; } - const llvm_libs_result = try run_process_and_capture_stdout(b, args.items); - it = std.mem.splitScalar(u8, llvm_libs_result, ' '); - - while (it.next()) |lib| { - const llvm_lib = std.mem.trimLeft(u8, std.mem.trimRight(u8, lib, "\n"), "-l"); - try llvm_libs.append(llvm_lib); - } - - const llvm_cxx_flags_result = try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--cxxflags" }); - it = std.mem.splitScalar(u8, llvm_cxx_flags_result, ' '); - while (it.next()) |flag| { - const llvm_cxx_flag = std.mem.trimRight(u8, flag, "\n"); - try flags.append(llvm_cxx_flag); - } - - const llvm_lib_dir = std.mem.trimRight(u8, try run_process_and_capture_stdout(b, &.{ llvm_config_path, "--libdir" }), "\n"); - - if (optimize != .ReleaseSmall) { - try flags.append("-g"); - } - - try flags.append("-fno-rtti"); - - const llvm = b.createModule(.{ - .target = target, - .optimize = optimize, - }); - - llvm.addLibraryPath(.{ .cwd_relative = llvm_lib_dir }); - - llvm.addCSourceFiles(.{ - .files = &.{"src/llvm.cpp"}, - .flags = flags.items, - }); - llvm.addIncludePath(.{ .cwd_relative = "/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/14.2.1/../../../../include/c++/14.2.1" }); - llvm.addIncludePath(.{ .cwd_relative = "/usr/bin/../lib64/gcc/x86_64-pc-linux-gnu/14.2.1/../../../../include/c++/14.2.1/x86_64-pc-linux-gnu" }); - - const needed_libraries: []const []const u8 = &.{ "unwind", "z" }; - - llvm.addObjectFile(.{ .cwd_relative = "/usr/lib/libstdc++.so.6" }); - - const lld_libs: []const []const u8 = &.{ "lldCommon", "lldCOFF", "lldELF", "lldMachO", "lldMinGW", "lldWasm" }; - - for (needed_libraries) |lib| { - llvm.linkSystemLibrary(lib, .{}); - } - - for (llvm_libs.items) |lib| { - llvm.linkSystemLibrary(lib, .{}); - } - - for (lld_libs) |lib| { - llvm.linkSystemLibrary(lib, .{}); - } - - return LLVM{ - .module = llvm, - }; } fn link(llvm: LLVM, target: *std.Build.Step.Compile) void { @@ -135,17 +138,34 @@ const LLVM = struct { } }; +fn debug_binary(b: *std.Build, exe: *std.Build.Step.Compile) *std.Build.Step.Run { + const run_step = std.Build.Step.Run.create(b, b.fmt("debug {s}", .{exe.name})); + run_step.addArg("gdb"); + run_step.addArg("-ex"); + run_step.addArg("r"); + run_step.addArtifactArg(exe); + + return run_step; +} + +var enable_llvm: bool = undefined; + pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + enable_llvm = b.option(bool, "enable_llvm", "Enable LLVM") orelse false; const env = try std.process.getEnvMap(b.allocator); const path = env.get("PATH") orelse unreachable; + const configuration = b.addOptions(); + configuration.addOption(bool, "enable_llvm", enable_llvm); + const exe_mod = b.createModule(.{ .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, }); + exe_mod.addOptions("configuration", configuration); const llvm = try LLVM.setup(b, path, target, optimize); @@ -153,8 +173,11 @@ pub fn build(b: *std.Build) !void { .name = "bloat-buster", .root_module = exe_mod, }); + exe.linkLibC(); - llvm.link(exe); + if (enable_llvm) { + llvm.link(exe); + } b.installArtifact(exe); @@ -166,10 +189,19 @@ pub fn build(b: *std.Build) !void { const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); + const debug_cmd = debug_binary(b, exe); + const debug_step = b.step("debug", "Debug the app"); + debug_step.dependOn(&debug_cmd.step); + const exe_unit_tests = b.addTest(.{ .root_module = exe_mod, }); - llvm.link(exe_unit_tests); + exe_unit_tests.linkLibC(); + + if (enable_llvm) { + llvm.link(exe); + } + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); const test_step = b.step("test", "Run unit tests"); diff --git a/src/LLVM.zig b/src/LLVM.zig index a894a30..24d9c21 100644 --- a/src/LLVM.zig +++ b/src/LLVM.zig @@ -2,6 +2,7 @@ const lib = @import("lib.zig"); const Arena = lib.Arena; const assert = lib.assert; const api = @import("llvm_api.zig"); +const configuration = @import("configuration"); /// This is a String which ABI-compatible with C++ pub const String = extern struct { @@ -757,7 +758,11 @@ const LldArgvBuilder = struct { } }; -pub fn experiment() void { +test "experiment" { + if (!configuration.enable_llvm) { + return error.SkipZigTest; + } + const thread = &global.threads[0]; thread.initialize(); const module = thread.context.create_module("first_module"); diff --git a/src/main.zig b/src/main.zig index 44d8658..cf1ec89 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,5 +1,6 @@ const lib = @import("lib.zig"); const llvm = @import("LLVM.zig"); +const parser = @import("parser.zig"); const Arena = lib.Arena; pub const panic = struct { @@ -138,10 +139,7 @@ pub const panic = struct { var global_persistent_arena: *Arena = undefined; pub fn main() callconv(.C) c_int { - lib.GlobalState.initialize(); - - llvm.initialize_all(); - llvm.experiment(); + parser.parser_experiment(); return 0; } @@ -156,4 +154,5 @@ comptime { test { _ = lib; _ = llvm; + _ = parser; } diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..6eb196e --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,401 @@ +const lib = @import("lib.zig"); +const assert = lib.assert; + +const LexerResult = struct { + token: Token, + offset: u32, + character_count: u32, +}; + +const Token = enum {}; + +const left_bracket = '['; +const right_bracket = ']'; +const left_brace = '{'; +const right_brace = '}'; +const left_parenthesis = '('; +const right_parenthesis = ')'; + +fn is_identifier_start_ch(ch: u8) bool { + return (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or ch == '_'; +} + +fn is_decimal_ch(ch: u8) bool { + return ch >= '0' and ch <= '9'; +} + +fn is_identifier_ch(ch: u8) bool { + return is_identifier_start_ch(ch) or is_decimal_ch(ch); +} + +fn string_to_enum(comptime E: type, string: []const u8) ?E { + inline for (@typeInfo(E).@"enum".fields) |e| { + if (lib.string.equal(e.name, string)) { + return @field(E, e.name); + } + } else return null; +} + +pub fn parse_identifier(content: []const u8, start: u32) []const u8 { + var offset = start; + + if (is_identifier_start_ch(content[start])) { + offset += 1; + + while (offset < content.len) { + if (is_identifier_ch(content[offset])) { + offset += 1; + } else { + break; + } + } + } + + return content[start..offset]; +} + +const GlobalKeyword = enum { + @"export", + @"extern", +}; + +const GlobalKind = enum { + @"fn", + foo, +}; + +const FunctionKeyword = enum { + cc, + foo, +}; + +const CallingConvention = enum { + unknown, + c, +}; + +fn report_error() noreturn { + lib.os.abort(); +} + +fn is_space(ch: u8) bool { + return ((ch == ' ') or (ch == '\n')) or ((ch == '\t' or ch == '\r')); +} + +fn skip_space(content: []const u8, start: u32) u32 { + var offset = start; + while (offset < content.len and is_space(content[offset])) { + offset += 1; + } + return offset; +} + +const StatementStartKeyword = enum { + @"return", + foooooooooo, +}; + +fn parse_integer(content: []const u8, start: u32) u32 { + const integer_start_ch = content[start]; + assert(!is_space(integer_start_ch)); + assert(is_decimal_ch(integer_start_ch)); + + var offset = start; + + switch (integer_start_ch) { + '0' => { + offset += 1; + + switch (content[offset]) { + 'x' => { + // TODO: parse hexadecimal + report_error(); + }, + 'o' => { + // TODO: parse octal + report_error(); + }, + 'b' => { + // TODO: parse binary + report_error(); + }, + '0'...'9' => { + report_error(); + }, + // Zero literal + else => {}, + } + }, + // TODO: decimal number + '1'...'9' => report_error(), + else => unreachable, + } + + return offset; +} + +fn parse_value(content: []const u8, start: u32) u32 { + var offset = start; + offset = skip_space(content, start); + + const value_start_ch = content[offset]; + if (is_identifier_start_ch(value_start_ch)) { + report_error(); + } else if (is_decimal_ch(value_start_ch)) { + offset = parse_integer(content, offset); + } else { + report_error(); + } + + return offset; +} + +fn parse_block(content: []const u8, start: u32) u32 { + var offset = start; + + offset = skip_space(content, offset); + + const is_left_brace = content[offset] == left_brace; + offset += @intFromBool(is_left_brace); + + if (!is_left_brace) { + report_error(); + } + + while (true) { + offset = skip_space(content, offset); + + if (offset == content.len) { + break; + } + + if (content[offset] == right_brace) { + break; + } + + const statement_start_ch = content[offset]; + if (is_identifier_start_ch(statement_start_ch)) { + const statement_start_identifier = parse_identifier(content, offset); + // Here, since we have a mandatory identifier start ch, we know at least we have a one-character identifier and an if check is not necessary + offset += @intCast(statement_start_identifier.len); + + if (string_to_enum(StatementStartKeyword, statement_start_identifier)) |statement_start_keyword| { + switch (statement_start_keyword) { + .@"return" => { + offset = parse_value(content, offset); + }, + else => unreachable, + } + + const require_semicolon = switch (statement_start_keyword) { + .@"return" => true, + else => report_error(), + }; + + const is_semicolon = content[offset] == ';'; + offset += @intFromBool(is_semicolon); + + if (require_semicolon and !is_semicolon) { + report_error(); + } + } else { + report_error(); + } + } else { + report_error(); + } + } + + // TODO: handle it in a better way + assert(content[offset] == right_brace); + offset += 1; + + return offset; +} + +pub noinline fn parse_file(content: []const u8) void { + var offset: u32 = 0; + + while (true) { + offset = skip_space(content, offset); + + if (offset == content.len) { + break; + } + + var is_export = false; + + if (content[offset] == left_bracket) { + offset += 1; + + while (offset < content.len) { + const global_keyword_string = parse_identifier(content, offset); + offset += @intCast(global_keyword_string.len); + + if (global_keyword_string.len == 0) { + break; + } + + const global_keyword = string_to_enum(GlobalKeyword, global_keyword_string) orelse report_error(); + switch (global_keyword) { + .@"export" => is_export = false, + else => report_error(), + } + + switch (content[offset]) { + right_bracket => {}, + else => report_error(), + } + } + + const is_right_bracket = content[offset] == right_bracket; + offset += @intFromBool(is_right_bracket); + + if (!is_right_bracket) { + report_error(); + } + + offset = skip_space(content, offset); + } + + const global_name = parse_identifier(content, offset); + offset += @intCast(global_name.len); + + if (global_name.len == 0) { + report_error(); + } + + offset = skip_space(content, offset); + + const is_equal_token = content[offset] == '='; + offset += @intFromBool(is_equal_token); + + if (!is_equal_token) { + report_error(); + } + + offset = skip_space(content, offset); + + const global_kind_string = parse_identifier(content, offset); + offset += @intCast(global_kind_string.len); + + offset = skip_space(content, offset); + + if (global_kind_string.len == 0) { + report_error(); + } + + const global_kind = string_to_enum(GlobalKind, global_kind_string) orelse report_error(); + + switch (global_kind) { + .@"fn" => { + var calling_convention = CallingConvention.unknown; + + if (content[offset] == left_bracket) { + offset += 1; + + while (offset < content.len) { + const function_identifier = parse_identifier(content, offset); + offset += @intCast(function_identifier.len); + + if (function_identifier.len == 0) { + break; + } + + const function_keyword = string_to_enum(FunctionKeyword, function_identifier) orelse report_error(); + + offset = skip_space(content, offset); + + switch (function_keyword) { + .cc => { + const is_left_parenthesis = content[offset] == left_parenthesis; + offset += @intFromBool(is_left_parenthesis); + + if (!is_left_parenthesis) { + report_error(); + } + + offset = skip_space(content, offset); + + const calling_convention_string = parse_identifier(content, offset); + offset += @intCast(calling_convention_string.len); + + if (calling_convention_string.len == 0) { + report_error(); + } + + calling_convention = string_to_enum(CallingConvention, calling_convention_string) orelse report_error(); + + offset = skip_space(content, offset); + + const is_right_parenthesis = content[offset] == right_parenthesis; + offset += @intFromBool(is_right_parenthesis); + + if (!is_right_parenthesis) { + report_error(); + } + + offset = skip_space(content, offset); + }, + else => report_error(), + } + + switch (content[offset]) { + right_bracket => {}, + else => report_error(), + } + } + + const is_right_bracket = content[offset] == right_bracket; + offset += @intFromBool(is_right_bracket); + + if (!is_right_bracket) { + report_error(); + } + } + + offset = skip_space(content, offset); + + const is_left_parenthesis = content[offset] == left_parenthesis; + offset += @intFromBool(is_left_parenthesis); + + if (!is_left_parenthesis) { + report_error(); + } + + while (offset < content.len and content[offset] != right_parenthesis) { + // TODO: arguments + report_error(); + } + + // TODO: handle it in a better way + assert(content[offset] == right_parenthesis); + offset += 1; + + offset = skip_space(content, offset); + + const return_type = parse_identifier(content, offset); + offset += @intCast(return_type.len); + + if (return_type.len == 0) { + report_error(); + } + + offset = parse_block(content, offset); + }, + else => report_error(), + } + } +} + +pub fn parser_experiment() void { + const strlit = + \\[export] main = fn [cc(c)] () s32 + \\{ + \\ return 0; + \\} + ; + parse_file(strlit); +} + +test "parse" {}