From e44a6ee8b7f0b59520210fa918b98d4f32515204 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Wed, 17 Apr 2024 08:19:35 -0600 Subject: [PATCH] Implement file reading in the self-hosted compiler --- bootstrap/Compilation.zig | 159 ++++++++++++++++++++-------------- bootstrap/backend/llvm.zig | 1 + bootstrap/frontend/parser.zig | 2 +- lib/std/os.nat | 102 +++++++++++++++++++--- lib/std/os/linux.nat | 65 +++++++++++++- lib/std/std.nat | 37 +------- src/main.nat | 13 ++- 7 files changed, 263 insertions(+), 116 deletions(-) diff --git a/bootstrap/Compilation.zig b/bootstrap/Compilation.zig index 289ce37..ffe4d64 100644 --- a/bootstrap/Compilation.zig +++ b/bootstrap/Compilation.zig @@ -3831,6 +3831,7 @@ pub const Instruction = union(enum) { pointer_to_nullable, pointer_source_type_to_destination_type, pointer_to_not_nullable, + pointer_none_terminated_to_zero, slice_var_to_const, slice_to_nullable, slice_to_not_null, @@ -4869,9 +4870,13 @@ pub const Builder = struct { unreachable; } } - if (destination_pointer.termination != source_pointer.termination) { - unreachable; - } + if (destination_pointer.termination != source_pointer.termination) return switch (destination_pointer.termination) { + .zero => switch (source_pointer.termination) { + .none => .pointer_none_terminated_to_zero, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + }; unreachable; } else { return .pointer_const_to_var; @@ -8258,10 +8263,19 @@ pub const Builder = struct { var sliceable_pointer_index: ?u32 = null; var sliceable_length_index: ?u32 = null; + var ignore_field_count: u8 = 0; for (field_nodes.slice(), 0..) |field_node_index, index| { const field_node = unit.getNode(field_node_index); - const identifier = unit.getExpectedTokenBytes(field_node.token, .identifier); + const identifier = switch (unit.getTokenId(field_node.token)) { + .identifier => unit.getExpectedTokenBytes(field_node.token, .identifier), + .discard => try std.mem.concat(context.allocator, u8, &.{"_", &.{'0' + b: { + const ch = '0' + ignore_field_count; + ignore_field_count += 1; + break :b ch; + }}}), + else => unreachable, + }; const hash = try unit.processIdentifier(context, identifier); switch (container_type) { @@ -8326,8 +8340,6 @@ pub const Builder = struct { .bitfield => { assert(field_node.id == .container_field); const bitfield = &ty.integer.kind.bitfield; - const field_name = unit.getExpectedTokenBytes(field_node.token, .identifier); - const field_name_hash = try unit.processIdentifier(context, field_name); const field_type = try builder.resolveType(unit, context, field_node.left, &.{}); const field_default_value: ?V.Comptime = switch (field_node.right) { .null => null, @@ -8335,7 +8347,7 @@ pub const Builder = struct { }; const struct_field = try unit.struct_fields.append(context.my_allocator, .{ - .name = field_name_hash, + .name = hash, .type = field_type, .default_value = field_default_value, }); @@ -11682,6 +11694,10 @@ pub const Builder = struct { else => |t| @panic(@tagName(t)), } }, + .anonymous_empty_literal => switch (type_expect) { + .type => |type_index| try builder.resolveContainerLiteral(unit, context, &.{}, type_index), + else => |t| @panic(@tagName(t)), + }, else => |t| @panic(@tagName(t)), }; @@ -12135,6 +12151,7 @@ pub const Builder = struct { assert(initialization_node.right == .null); const field_name = unit.getExpectedTokenBytes(Token.addInt(initialization_node.token, 1), .identifier); const field_name_hash = try unit.processIdentifier(context, field_name); + if (field_name_hash == field.name) { const expected_type = field.type; const field_initialization = try builder.resolveRuntimeValue(unit, context, Type.Expect{ .type = expected_type }, initialization_node.left, .right); @@ -12175,6 +12192,7 @@ pub const Builder = struct { .unsigned => ct_int.value, .signed => unreachable, }, + .enum_value => |enum_field_index| unit.enum_fields.get(enum_field_index).value, else => |t| @panic(@tagName(t)), }; const value_with_offset = field_value << @as(u6, @intCast(bit_offset)); @@ -12214,70 +12232,61 @@ pub const Builder = struct { const field_bit_size = field_type.getBitSize(unit); defer bit_offset += field_bit_size; - switch (field.value) { - .@"comptime" => |ct| { - _ = ct; // autofix - unreachable; + const field_zero_extend = try unit.instructions.append(context.my_allocator, .{ + .cast = .{ + .id = .zero_extend, + .value = field, + .type = type_index, }, - .runtime => { - const field_zero_extend = try unit.instructions.append(context.my_allocator, .{ - .cast = .{ - .id = .zero_extend, - .value = field, - .type = type_index, - }, - }); - try builder.appendInstruction(unit, context, field_zero_extend); + }); + try builder.appendInstruction(unit, context, field_zero_extend); - const shift_left = try unit.instructions.append(context.my_allocator, .{ - .integer_binary_operation = .{ - .id = .shift_left, - .left = .{ - .value = .{ - .runtime = field_zero_extend, - }, - .type = type_index, - }, - .right = .{ - .value = .{ - .@"comptime" = .{ - .constant_int = .{ - .value = bit_offset, - }, - }, - }, - .type = type_index, - }, - .signedness = integer.signedness, - }, - }); - - try builder.appendInstruction(unit, context, shift_left); - - const merge_or = try unit.instructions.append(context.my_allocator, .{ - .integer_binary_operation = .{ - .id = .bit_or, - .signedness = integer.signedness, - .left = .{ - .value = .{ - .runtime = shift_left, - }, - .type = type_index, - }, - .right = value, - }, - }); - try builder.appendInstruction(unit, context, merge_or); - - value = .{ + const shift_left = try unit.instructions.append(context.my_allocator, .{ + .integer_binary_operation = .{ + .id = .shift_left, + .left = .{ .value = .{ - .runtime = merge_or, + .runtime = field_zero_extend, }, .type = type_index, - }; + }, + .right = .{ + .value = .{ + .@"comptime" = .{ + .constant_int = .{ + .value = bit_offset, + }, + }, + }, + .type = type_index, + }, + .signedness = integer.signedness, + }, + }); + + try builder.appendInstruction(unit, context, shift_left); + + const merge_or = try unit.instructions.append(context.my_allocator, .{ + .integer_binary_operation = .{ + .id = .bit_or, + .signedness = integer.signedness, + .left = .{ + .value = .{ + .runtime = shift_left, + }, + .type = type_index, + }, + .right = value, }, - else => |t| @panic(@tagName(t)), - } + }); + try builder.appendInstruction(unit, context, merge_or); + + value = .{ + .value = .{ + .runtime = merge_or, + }, + .type = type_index, + }; } return value; @@ -13393,7 +13402,14 @@ pub const Builder = struct { }, .call => { const result = try builder.resolveCall(unit, context, statement_node_index); - assert(result.type == .void or result.type == .noreturn); + switch (unit.types.get(result.type).*) { + .void, .noreturn => {}, + .@"struct" => |struct_index| switch (unit.structs.get(struct_index).kind) { + .error_union => {}, + else => |t| @panic(@tagName(t)), + }, + else => |t| @panic(@tagName(t)), + } }, .@"switch" => { const expression_to_switch_on = try builder.resolveRuntimeValue(unit, context, Type.Expect.none, statement_node.left, .right); @@ -14354,6 +14370,7 @@ pub const Builder = struct { const phi_block = try builder.newBasicBlock(unit, context); const before_switch_bb = builder.current_basic_block; + for (case_nodes) |case_node_index| { builder.current_basic_block = before_switch_bb; const case_node = unit.getNode(case_node_index); @@ -14403,6 +14420,14 @@ pub const Builder = struct { } } + if (switch_instruction.else_block == .null) { + switch_instruction.else_block = try builder.newBasicBlock(unit, context); + const old_block = builder.current_basic_block; + builder.current_basic_block = switch_instruction.else_block; + try builder.buildUnreachable(unit, context); + builder.current_basic_block = old_block; + } + if (phi.values.length > 0) { builder.current_basic_block = phi_block; try builder.appendInstruction(unit, context, phi_index); @@ -15145,6 +15170,7 @@ pub const Builder = struct { } }, .error_to_all_errors_error_union => return try builder.resolveErrorToAllErrorUnion(unit, context, ti, result), + .type_to_error_union => return try builder.resolveTypeToErrorUnion(unit, context, ti, result), else => |t| @panic(@tagName(t)), } }, @@ -15198,6 +15224,7 @@ pub const Builder = struct { const return_node = unit.getNode(return_node_index); assert(return_node.id == .@"return"); assert(return_node.right == .null); + const return_value = if (return_node.left != .null) b: { const return_value_node_index = return_node.left; const return_value = try builder.resolveRuntimeValue(unit, context, Type.Expect{ diff --git a/bootstrap/backend/llvm.zig b/bootstrap/backend/llvm.zig index 5cdd8e9..76a9733 100644 --- a/bootstrap/backend/llvm.zig +++ b/bootstrap/backend/llvm.zig @@ -2711,6 +2711,7 @@ pub fn codegen(unit: *Compilation.Unit, context: *const Compilation.Context) !vo .pointer_const_to_var, .pointer_to_array_to_pointer_to_many, .pointer_source_type_to_destination_type, + .pointer_none_terminated_to_zero, => { try llvm.llvm_instruction_map.put_no_clobber(context.my_allocator, instruction_index, value); }, diff --git a/bootstrap/frontend/parser.zig b/bootstrap/frontend/parser.zig index 8a831ac..103bd5a 100644 --- a/bootstrap/frontend/parser.zig +++ b/bootstrap/frontend/parser.zig @@ -1799,7 +1799,7 @@ const Analyzer = struct { }, else => |t| @panic(@tagName(t)), }, - .identifier => b: { + .identifier, .discard, => b: { analyzer.consumeToken(); switch (container_type) { diff --git a/lib/std/os.nat b/lib/std/os.nat index 4731339..6614c18 100644 --- a/lib/std/os.nat +++ b/lib/std/os.nat @@ -40,7 +40,7 @@ const FileDescriptor = struct{ handle: system.FileDescriptor, const ReadError = error{ - + failed, }; const read = fn(file_descriptor: FileDescriptor, bytes: []u8) ReadError!usize { @@ -48,27 +48,39 @@ const FileDescriptor = struct{ switch (current) { .linux => { const len: usize = #min(max_file_operation_byte_count, bytes.length); - const syscall_result = system.read(file_descriptor, bytes); + const syscall_result = system.read(file_descriptor.handle, bytes); const byte_count = unwrap_syscall(syscall_result) catch |err| switch (err) { - else => unreachable, + else => return ReadError.failed, }; return byte_count; }, .macos => { const len: usize = #min(max_file_operation_byte_count, bytes.length); - const syscall_result = system.read(file_descriptor, bytes); + const syscall_result = system.read(file_descriptor.handle, bytes); const byte_count = unwrap_syscall(syscall_result) catch |err| switch (err) { - else => unreachable, + else => return ReadError.failed, }; return byte_count; }, else => #error("OS not supported"), } } else { - return 0; + const result: usize = 0; + return result; } } + const read_all = fn(file_descriptor: FileDescriptor, bytes: []u8) ReadError!void { + var bytes_read: usize = 0; + + while (bytes_read < bytes.length) { + const iteration_read_byte_count = try file_descriptor.read(bytes = bytes[bytes_read..]); + bytes_read += iteration_read_byte_count; + } + + assert(bytes_read == bytes.length); + } + const WriteError = error{ write_failed, }; @@ -94,6 +106,56 @@ const FileDescriptor = struct{ else => #error("OS not supported"), } } + + const write_all = fn(file_descriptor: FileDescriptor, bytes: []const u8) WriteError!void { + var bytes_written: usize = 0; + + while (bytes_written < bytes.length) { + const iteration_written_byte_count = try file_descriptor.write(bytes = bytes[bytes_written..]); + bytes_written += iteration_written_byte_count; + } + + assert(bytes_written == bytes.length); + } + + const get_size = fn (file_descriptor: FileDescriptor) GetAttributesError!u64 { + switch (current) { + .linux => { + const file_attributes = try file_descriptor.get_attributes(); + return file_attributes.size; + }, + else => #error("OS not supported"), + } + } + + const GetAttributesError = error{ + failed, + }; + + const get_attributes = fn (file_descriptor: FileDescriptor) GetAttributesError!FileAttributes { + switch (current) { + .linux => { + var stat_buffer: linux.Stat = undefined; + const raw_result = linux.fstat(file_descriptor.handle, stat_buffer.&); + const result = unwrap_syscall(raw_result) catch |err| switch (err) { + else => return GetAttributesError.failed, + }; + + const size: u64 = #cast(stat_buffer.size); + + const file_attributes = FileAttributes{ + .size = size, + }; + + return file_attributes; + }, + else => #error("OS not supported"), + } + } +}; + +const FileAttributes = struct{ + size: u64, }; const StdFileDescriptor = enum { @@ -318,18 +380,38 @@ const dup2 = fn(old_file_descriptor: system.FileDescriptor, new_file_descriptor: } const OpenError = error{ - + failed, }; -const open = fn(path: [&:0]const u8, flags: u32, permissions: u32) OpenError!FileDescriptor{ + +const OpenFlags = bitfield(u32) { + access_mode: AccessMode = .read_only, +}; + +const AccessMode = enum(u2) { + read_only = 0, + write_only = 1, + read_write = 2, +}; + +const open = fn(path: [&:0]const u8, open_flags: OpenFlags) OpenError!FileDescriptor{ switch (current) { .linux => { - const syscall_result = linux.open(path, flags, permissions); + const flags = linux.OpenFlags{ + .access_mode = switch (open_flags.access_mode) { + .read_only => .read_only, + .write_only => .write_only, + .read_write => .read_write, + }, + }; + const syscall_result = linux.open(path, flags, 0); const result = unwrap_syscall(syscall_result) catch |err| switch (err) { else => unreachable, }; + const r: u32 = #cast(result); + const file_descriptor = FileDescriptor{ - .handle = #cast(result), + .handle = #cast(r), }; return file_descriptor; }, diff --git a/lib/std/os/linux.nat b/lib/std/os/linux.nat index bb11d6e..83939a6 100644 --- a/lib/std/os/linux.nat +++ b/lib/std/os/linux.nat @@ -908,8 +908,9 @@ const dup2 = fn(old: FileDescriptor, new: FileDescriptor) usize { return result; } -const open = fn(path: [&:0]const u8, flags: u32, permissions: u32) usize { - const result = #syscall(#cast(Syscall.open), #cast(path), flags, permissions); +const open = fn(path: [&:0]const u8, flags: OpenFlags, permissions: u32) usize { + const flattened_flags: u32 = #cast(flags); + const result = #syscall(#cast(Syscall.open), #cast(path), flattened_flags, permissions); return result; } @@ -954,6 +955,37 @@ const memfd_create = fn(name: [&:0]const u8, flags: u32) usize { return result; } +const TimeSpec = struct{ + seconds: s64, + nanoseconds: s64, +}; + +const Stat = struct{ + dev: u64, + inode: u64, + nlink: u64, + + mode: u32, + uid: u32, + gid: u32, + _: u32, + rdev: u64, + size: s64, + block_size: s64, + blocks: s64, + + atime: TimeSpec, + mtime: TimeSpec, + ctime: TimeSpec, + _: [3]u64, +}; + +const fstat = fn(file_descriptor: FileDescriptor, stat_buffer: &Stat) usize { + const file_descriptor_u: u32 = #cast(file_descriptor); + const result = #syscall(#cast(Syscall.fstat), file_descriptor_u, #cast(stat_buffer)); + return result; +} + const unwrap_syscall = fn(syscall_result: usize) Error!usize { const signed_syscall_result: ssize = #cast(syscall_result); @@ -989,3 +1021,32 @@ const PollFileDescriptor = struct{ rdband: bool = false, }; }; + +const AccessMode = enum(u2) { + read_only = 0, + write_only = 1, + read_write = 2, +}; + +const OpenFlags = bitfield(u32) { + access_mode: AccessMode, + _: u4 = 0, + creat: bool = false, + excl: bool = false, + noctty: bool = false, + truncate: bool = false, + append: bool = false, + non_block: bool = false, + dsync: bool = false, + async: bool = false, + direct: bool = false, + _: u1 = 0, + directory: bool = false, + no_follow: bool = false, + noatime: bool = false, + cloexec: bool = false, + sync: bool = false, + path: bool = false, + tmpfile: bool = false, + _: u9 = 0, +}; diff --git a/lib/std/std.nat b/lib/std/std.nat index cec506a..8945a79 100644 --- a/lib/std/std.nat +++ b/lib/std/std.nat @@ -44,10 +44,7 @@ const byte_equal = fn (a: []const u8, b: []const u8) bool { const print = fn(bytes: []const u8) void { const file_descriptor = os.StdFileDescriptor.get(descriptor = .stdout); - const file_writer = FileWriter{ - .descriptor = file_descriptor, - }; - _ = file_writer.write_all(bytes) catch unreachable; + _ = file_descriptor.write_all(bytes) catch unreachable; } const format_usize = fn(n: usize, buffer: &[65]u8) []u8 { @@ -75,10 +72,7 @@ const print_usize = fn(n: usize) void { const bytes = format_usize(n, buffer = buffer.&); assert(bytes.length < buffer.length); const file_descriptor = os.StdFileDescriptor.get(descriptor = .stdout); - const file_writer = FileWriter{ - .descriptor = file_descriptor, - }; - file_writer.write_all(bytes) catch unreachable; + file_descriptor.write_all(bytes) catch unreachable; } const print_u8 = fn(n: u8) void { @@ -173,33 +167,6 @@ const Arena = struct{ } }; -const Writer = struct{ - callback: &const fn(writer: &Writer, bytes: []const u8) Writer.Error!usize, - const Error = error{ - write_failed, - }; -}; - -const FileWriter = struct{ - descriptor: os.FileDescriptor, - - const write = fn(file_writer: FileWriter, bytes: []const u8) Writer.Error!usize { - const bytes_written = file_writer.descriptor.write(bytes) catch return Writer.Error.write_failed; - return bytes_written; - } - - const write_all = fn(file_writer: FileWriter, bytes: []const u8) Writer.Error!void { - var bytes_written: usize = 0; - - while (bytes_written < bytes.length) { - const iteration_written_byte_count = try file_writer.write(bytes = bytes[bytes_written..]); - bytes_written += iteration_written_byte_count; - } - - assert(bytes_written == bytes.length); - } -}; - const copy_bytes = fn(destination: []u8, source: []const u8) void { assert(ok = destination.length == source.length); for (0..destination.length) |i| { diff --git a/src/main.nat b/src/main.nat index 2a45744..77618a7 100644 --- a/src/main.nat +++ b/src/main.nat @@ -39,7 +39,7 @@ const make_output_path = fn (main_source_file_path: []const u8) []const u8 { assert(main_source_file_path.length > 0); } -const command_exe = fn (arena: &Arena, command_arguments: []const [&:0]const u8) void { +const command_exe = fn (arena: &Arena, command_arguments: []const [&:0]const u8) *!void { var i: usize = 0; var maybe_output_argument: ?[]const u8 = null; @@ -70,7 +70,16 @@ const command_exe = fn (arena: &Arena, command_arguments: []const [&:0]const u8) const main_executable_name = maybe_main_executable_name orelse (std.os.basename(main_source_file[0..main_source_file.length - 9]) orelse unreachable); // 9 => "/main.nat".length - print("Foo\n"); + print("TODO: lex '"); + print(main_source_file); + print("'\n"); + + const file_descriptor = try std.os.open(#cast(main_source_file.pointer), .{}); + const file_size = try file_descriptor.get_size(); + const file_buffer = try arena.new_array($u8, file_size); + file_descriptor.read_all(file_buffer); + print("File:\n"); + print(file_buffer); } const main = fn() *!void {