diff --git a/bootstrap/Compilation.zig b/bootstrap/Compilation.zig index d1d708c..f7dcccb 100644 --- a/bootstrap/Compilation.zig +++ b/bootstrap/Compilation.zig @@ -8,6 +8,7 @@ const byte_equal = data_structures.byte_equal; const byte_equal_terminated = data_structures.byte_equal_terminated; const first_slice = data_structures.first_slice; const starts_with_slice = data_structures.starts_with_slice; +const PinnedArray = data_structures.PinnedArray; const UnpinnedArray = data_structures.UnpinnedArray; const BlockList = data_structures.BlockList; const MyAllocator = data_structures.MyAllocator; @@ -113,6 +114,10 @@ pub fn compileBuildExecutable(context: *const Context, arguments: []const []cons .is_test = false, .c_source_files = &.{}, }, + .token_buffer = Token.Buffer{ + .tokens = try PinnedArray(Token).init_with_default_granularity(), + .line_offsets = try PinnedArray(u32).init_with_default_granularity(), + }, }; try unit.compile(context); @@ -3005,6 +3010,10 @@ pub fn buildExecutable(context: *const Context, arguments: []const []const u8, o .is_test = options.is_test, .c_source_files = c_source_files.slice(), }, + .token_buffer = Token.Buffer{ + .tokens = try PinnedArray(Token).init_with_default_granularity(), + .line_offsets = try PinnedArray(u32).init_with_default_granularity(), + }, }; try unit.compile(context); @@ -5617,16 +5626,12 @@ pub const Builder = struct { column: u32, }; - fn getTokenDebugInfo(builder: *Builder, unit: *Unit, token: Token.Index) TokenDebugInfo { + fn getTokenDebugInfo(builder: *Builder, unit: *Unit, token_index: Token.Index) TokenDebugInfo { const file = unit.files.get(builder.current_file); - const index = Token.unwrap(token); - assert(index < unit.token_buffer.length); - const line_offset_index = unit.token_buffer.lines[index]; - const line = line_offset_index - file.lexer.line_offset; - const offset = unit.token_buffer.offsets[index]; - assert(line_offset_index < unit.token_buffer.line_offsets.length); - const line_offset = unit.token_buffer.line_offsets.pointer[line_offset_index]; - const column = offset - line_offset; + const token = unit.token_buffer.tokens.get(token_index); + const line = token.line - file.lexer.line_offset; + const line_offset = unit.token_buffer.line_offsets.get_unchecked(token.line).*; + const column = token.offset - line_offset; return .{ .line = line, @@ -6708,33 +6713,6 @@ pub const Builder = struct { else => |t| @panic(@tagName(t)), } } else { - // var scope_it: ?*Debug.Scope = builder.current_scope; - // const indentation_size = 4; - // var indentation: u32 = 0; - // - // var file_path: []const u8 = ""; - // while (scope_it) |scope| : (scope_it = scope.parent) { - // for (0..indentation * indentation_size) |_| { - // std.debug.print(" ", .{}); - // } - // std.debug.print("> Scope {s} ", .{@tagName(scope.kind)}); - // switch (scope.kind) { - // .compilation_unit => {}, - // .file_container, .container => {}, - // .function => {}, - // .file => { - // const global_scope = @fieldParentPtr(Debug.Scope.Global, "scope", scope); - // const file = @fieldParentPtr(Debug.File, "scope", global_scope); - // std.debug.print("{s}", .{file.relative_path}); - // file_path = file.relative_path; - // }, - // .block => {}, - // } - // - // std.debug.print("\n", .{}); - // indentation += 1; - // } - try write(.panic, "identifier '"); try write(.panic, identifier); try write(.panic, "' not found\n"); @@ -7425,7 +7403,7 @@ pub const Builder = struct { .comptime_argument_declaration => switch (polymorphic_call_argument_node.id) { .comptime_expression => { const comptime_argument = try builder.resolveComptimeValue(unit, context, Type.Expect{ .type = argument_type }, .{}, polymorphic_call_argument_node.left, null, .right, &.{}, null, &.{}); - const name = unit.getExpectedTokenBytes(Token.addInt(argument_declaration_node.token, 1), .identifier); + const name = unit.getExpectedTokenBytes(@enumFromInt(@intFromEnum(argument_declaration_node.token) + 1), .identifier); const name_hash = try unit.processIdentifier(context, name); const debug_info = builder.getTokenDebugInfo(unit, argument_declaration_node.token); try comptime_parameter_declarations.append(context.my_allocator, .{ @@ -7505,7 +7483,7 @@ pub const Builder = struct { } fn put_argument_in_scope(builder: *Builder, unit: *Unit, context: *const Context, argument_node: *const Node, argument_index: usize, argument_type_index: Type.Index) !void { - const argument_name = switch (unit.getTokenId(argument_node.token)) { + const argument_name = switch (unit.token_buffer.tokens.get(argument_node.token).id) { .identifier => b: { const argument_name = unit.getExpectedTokenBytes(argument_node.token, .identifier); @@ -8707,7 +8685,7 @@ pub const Builder = struct { .constant_symbol_declaration, .variable_symbol_declaration, => { - const expected_identifier_token_index = Token.addInt(declaration_node.token, 1); + const expected_identifier_token_index: Token.Index = @enumFromInt(@intFromEnum(declaration_node.token) + 1); const identifier = unit.getExpectedTokenBytes(expected_identifier_token_index, .identifier); // logln(.compilation, .identifier, "Analyzing global declaration {s}", .{identifier}); const identifier_hash = try unit.processIdentifier(context, identifier); @@ -8823,7 +8801,7 @@ pub const Builder = struct { for (field_nodes.slice(), 0..) |field_node_index, index| { const field_node = unit.getNode(field_node_index); - const identifier = switch (unit.getTokenId(field_node.token)) { + const identifier = switch (unit.token_buffer.tokens.get(field_node.token).id) { .identifier => unit.getExpectedTokenBytes(field_node.token, .identifier), .string_literal => try unit.fixupStringLiteral(context, field_node.token), .discard => try std.mem.concat(context.allocator, u8, &.{ "_", &.{'0' + b: { @@ -9875,7 +9853,7 @@ pub const Builder = struct { switch (type_expect) { .type => |type_index| { const expected_type = unit.types.get(type_index); - const identifier = unit.getExpectedTokenBytes(Token.addInt(node.token, 1), .identifier); + const identifier = unit.getExpectedTokenBytes(@enumFromInt(@intFromEnum(node.token) + 1), .identifier); const hash = try unit.processIdentifier(context, identifier); switch (expected_type.*) { .integer => |*integer| switch (integer.kind) { @@ -10587,7 +10565,7 @@ pub const Builder = struct { switch (expected_type.*) { .integer => |*integer| switch (integer.kind) { .@"enum" => |*enum_type| { - const identifier = unit.getExpectedTokenBytes(Token.addInt(node.token, 1), .identifier); + const identifier = unit.getExpectedTokenBytes(@enumFromInt(@intFromEnum(node.token) + 1), .identifier); const hash = try unit.processIdentifier(context, identifier); for (enum_type.fields.slice()) |field_index| { const field = unit.enum_fields.get(field_index); @@ -13045,7 +13023,7 @@ pub const Builder = struct { assert(initialization_node.id == .container_field_initialization); assert(initialization_node.left != .null); assert(initialization_node.right == .null); - const field_name = unit.getExpectedTokenBytes(Token.addInt(initialization_node.token, 1), .identifier); + const field_name = unit.getExpectedTokenBytes(@enumFromInt(@intFromEnum(initialization_node.token) + 1), .identifier); const field_name_hash = try unit.processIdentifier(context, field_name); if (field_name_hash == field.name) { @@ -14124,12 +14102,11 @@ pub const Builder = struct { } } - fn emitLocalVariableDeclaration(builder: *Builder, unit: *Unit, context: *const Context, token: Token.Index, mutability: Mutability, declaration_type: Type.Index, initialization: V, emit: bool, maybe_name: ?[]const u8) !Instruction.Index { + fn emitLocalVariableDeclaration(builder: *Builder, unit: *Unit, context: *const Context, token_index: Token.Index, mutability: Mutability, declaration_type: Type.Index, initialization: V, emit: bool, maybe_name: ?[]const u8) !Instruction.Index { assert(builder.current_scope.local); - const index = Token.unwrap(token); - const id = unit.token_buffer.ids[index]; - const identifier = if (maybe_name) |name| name else switch (id) { - .identifier => unit.getExpectedTokenBytes(token, .identifier), + const token = unit.token_buffer.tokens.get(token_index); + const identifier = if (maybe_name) |name| name else switch (token.id) { + .identifier => unit.getExpectedTokenBytes(token_index, .identifier), .discard => blk: { const name = try join_name(context, "_", unit.discard_identifiers, 10); unit.discard_identifiers += 1; @@ -14139,7 +14116,7 @@ pub const Builder = struct { }; // logln(.compilation, .identifier, "Analyzing local declaration {s}", .{identifier}); const identifier_hash = try unit.processIdentifier(context, identifier); - const token_debug_info = builder.getTokenDebugInfo(unit, token); + const token_debug_info = builder.getTokenDebugInfo(unit, token_index); const look_in_parent_scopes = true; if (builder.current_scope.lookupDeclaration(identifier_hash, look_in_parent_scopes)) |lookup| { @@ -14255,7 +14232,7 @@ pub const Builder = struct { => { // All variables here are local assert(builder.current_scope.local); - const expected_identifier_token_index = Token.addInt(statement_node.token, 1); + const expected_identifier_token_index: Token.Index = @enumFromInt(@intFromEnum(statement_node.token) + 1); const mutability: Mutability = switch (statement_node.id) { .constant_symbol_declaration => .@"const", @@ -16826,6 +16803,7 @@ pub const Enum = struct { pub const Unit = struct { node_buffer: Node.List = .{}, + token_buffer: Token.Buffer, files: Debug.File.List = .{}, types: Type.List = .{}, structs: Struct.List = .{}, @@ -16847,7 +16825,6 @@ pub const Unit = struct { constant_arrays: V.Comptime.ConstantArray.List = .{}, constant_slices: V.Comptime.ConstantSlice.List = .{}, error_fields: Type.Error.Field.List = .{}, - token_buffer: Token.Buffer = .{}, node_lists: UnpinnedArray(UnpinnedArray(Node.Index)) = .{}, file_token_offsets: MyHashMap(Token.Range, Debug.File.Index) = .{}, file_map: MyHashMap([]const u8, Debug.File.Index) = .{}, @@ -17095,7 +17072,7 @@ pub const Unit = struct { switch (switch_case_condition_node.id) { .dot_literal => { - if (try unit.typeCheckEnumLiteral(context, Token.addInt(switch_case_condition_node.token, 1), enum_type)) |enum_field_index| { + if (try unit.typeCheckEnumLiteral(context, @enumFromInt(@intFromEnum(switch_case_condition_node.token) + 1), enum_type)) |enum_field_index| { for (existing_enums.slice()) |existing| { if (enum_field_index == existing) { // Duplicate case @@ -17117,7 +17094,7 @@ pub const Unit = struct { const case_condition_node = unit.getNode(case_condition_node_index); switch (case_condition_node.id) { .dot_literal => { - if (try unit.typeCheckEnumLiteral(context, Token.addInt(case_condition_node.token, 1), enum_type)) |enum_field_index| { + if (try unit.typeCheckEnumLiteral(context, @enumFromInt(@intFromEnum(case_condition_node.token) + 1), enum_type)) |enum_field_index| { for (existing_enums.slice()) |existing| { if (enum_field_index == existing) { // Duplicate case @@ -17193,23 +17170,12 @@ pub const Unit = struct { unreachable; } - fn getTokenId(unit: *Unit, token_index: Token.Index) Token.Id { - const index = Token.unwrap(token_index); - assert(index < unit.token_buffer.length); - const id = unit.token_buffer.ids[index]; - return id; - } - fn getExpectedTokenBytes(unit: *Unit, token_index: Token.Index, expected_id: Token.Id) []const u8 { - const id = unit.getTokenId(token_index); - // logln(.compilation, .token_bytes, "trying to get {s} from token of id {s}", .{ @tagName(expected_id), @tagName(id) }); - if (id != expected_id) @panic("Unexpected token"); - const index = Token.unwrap(token_index); - const offset = unit.token_buffer.offsets[index]; - const len = unit.token_buffer.lengths[index]; + const token = unit.token_buffer.tokens.get(token_index); const file_index = unit.findTokenFile(token_index); const file = unit.files.get(file_index); - const bytes = file.source_code[offset..][0..len]; + if (token.id != expected_id) @panic("Unexpected token"); + const bytes = file.source_code[token.offset..][0..token.length]; return bytes; } @@ -17411,7 +17377,7 @@ pub const Unit = struct { file.status = .loaded_into_memory; assert(file.status == .loaded_into_memory); - file.lexer = try lexer.analyze(context.my_allocator, file.source_code, &unit.token_buffer); + file.lexer = try lexer.analyze(file.source_code, &unit.token_buffer); assert(file.status == .loaded_into_memory); file.status = .lexed; try unit.file_token_offsets.put_no_clobber(context.my_allocator, .{ @@ -17741,84 +17707,11 @@ pub const Token = struct { length: u32, id: Token.Id, - pub const Buffer = struct { - lines: [*]u32 = undefined, - offsets: [*]u32 = undefined, - lengths: [*]u32 = undefined, - ids: [*]Token.Id = undefined, - line_offsets: UnpinnedArray(u32) = .{}, - length: data_structures.IndexType = 0, - capacity: data_structures.IndexType = 0, - - const factor = 2; - const initial_item_count = 16; - - pub fn append_with_capacity(buffer: *Buffer, token: Token) void { - const index = buffer.length; - assert(index < buffer.capacity); - - buffer.lines[index] = token.line; - buffer.offsets[index] = token.offset; - buffer.lengths[index] = token.length; - buffer.ids[index] = token.id; - - buffer.length += 1; - } - - pub fn ensure_with_capacity(buffer: *Buffer, allocator: *MyAllocator, unused_capacity: data_structures.IndexType) !void { - const desired_capacity = buffer.length + unused_capacity; - var new_capacity = @max(buffer.capacity, initial_item_count); - while (new_capacity < desired_capacity) { - new_capacity *= factor; - } - - if (new_capacity > buffer.capacity) { - { - const line_byte_ptr: [*]u8 = @ptrCast(buffer.lines); - const line_bytes = line_byte_ptr[0 .. buffer.length * @sizeOf(u32)]; - const new_line_bytes = try allocator.reallocate(line_bytes, new_capacity * @sizeOf(u32), @alignOf(u32)); - buffer.lines = @ptrCast(@alignCast(new_line_bytes)); - } - - { - const offset_byte_ptr: [*]u8 = @ptrCast(buffer.offsets); - const offset_bytes = offset_byte_ptr[0 .. buffer.length * @sizeOf(u32)]; - const new_offset_bytes = try allocator.reallocate(offset_bytes, new_capacity * @sizeOf(u32), @alignOf(u32)); - buffer.offsets = @ptrCast(@alignCast(new_offset_bytes)); - } - - { - const length_byte_ptr: [*]u8 = @ptrCast(buffer.lengths); - const length_bytes = length_byte_ptr[0 .. buffer.length * @sizeOf(u32)]; - const new_length_bytes = try allocator.reallocate(length_bytes, new_capacity * @sizeOf(u32), @alignOf(u32)); - buffer.lengths = @ptrCast(@alignCast(new_length_bytes)); - } - - { - const id_byte_ptr: [*]u8 = @ptrCast(buffer.ids); - const id_bytes = id_byte_ptr[0 .. buffer.length * @sizeOf(Token.Id)]; - const new_id_bytes = try allocator.reallocate(id_bytes, new_capacity * @sizeOf(Token.Id), @alignOf(Token.Id)); - buffer.ids = @ptrCast(@alignCast(new_id_bytes)); - } - - buffer.capacity = new_capacity; - } - } - - pub fn getOffset(buffer: *const Buffer) Token.Index { - return @enumFromInt(buffer.length); - } - - pub fn getLineOffset(buffer: *const Buffer) u32 { - return @intCast(buffer.line_offsets.length); - } + pub const Buffer = struct{ + line_offsets: PinnedArray(u32) = .{}, + tokens: PinnedArray(Token) = .{}, }; - - pub const Range = struct { - start: Token.Index, - count: u32, - }; - + pub const Id = enum { keyword_unsigned_integer, keyword_signed_integer, @@ -17992,7 +17885,12 @@ pub const Token = struct { } }; - pub usingnamespace data_structures.getIndexForType(@This(), enum {}); + pub const Index = PinnedArray(Token).Index; + + pub const Range = struct { + start: Token.Index, + count: u32, + }; }; pub const InlineAssembly = struct { diff --git a/bootstrap/frontend/lexer.zig b/bootstrap/frontend/lexer.zig index b125f62..53491bd 100644 --- a/bootstrap/frontend/lexer.zig +++ b/bootstrap/frontend/lexer.zig @@ -3,10 +3,12 @@ const Allocator = std.mem.Allocator; const assert = std.debug.assert; const log = std.log; -const data_structures = @import("../library.zig"); -const enumFromString = data_structures.enumFromString; -const MyAllocator = data_structures.MyAllocator; -const UnpinnedArray = data_structures.UnpinnedArray; +const library = @import("../library.zig"); +const byte_equal = library.byte_equal; +const enumFromString = library.enumFromString; +const MyAllocator = library.MyAllocator; +const PinnedArray = library.PinnedArray; +const UnpinnedArray = library.UnpinnedArray; const Compilation = @import("../Compilation.zig"); const File = Compilation.File; @@ -43,31 +45,31 @@ pub const Logger = enum { }); }; -pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.Buffer) !Result { +pub fn analyze(text: []const u8, token_buffer: *Token.Buffer) !Result { assert(text.len <= std.math.maxInt(u32)); const len: u32 = @intCast(text.len); var lexer = Result{ - .offset = token_buffer.getOffset(), - .line_offset = token_buffer.getLineOffset(), + .offset = @enumFromInt(token_buffer.tokens.length), + .line_offset = token_buffer.line_offsets.length, .count = 0, .line_count = 0, }; const time_start = std.time.Instant.now() catch unreachable; - try token_buffer.line_offsets.append(allocator, 0); + token_buffer.line_offsets.append(0); for (text, 0..) |byte, index| { if (byte == '\n') { - try token_buffer.line_offsets.append(allocator, @intCast(index + 1)); + token_buffer.line_offsets.append(@intCast(index + 1)); } } var index: u32 = 0; var line_index: u32 = lexer.line_offset; - try token_buffer.ensure_with_capacity(allocator, len / 3); + // try token_buffer.ensure_with_capacity(allocator, len / 3); // logln(.lexer, .end, "START LEXER - TOKEN OFFSET: {} - LINE OFFSET: {}", .{ Token.unwrap(lexer.offset), lexer.line_offset }); @@ -110,7 +112,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B const string = text[start_index..][0 .. index - start_index]; break :blk if (enumFromString(Compilation.FixedKeyword, string)) |fixed_keyword| switch (fixed_keyword) { inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), - } else if (data_structures.byte_equal(string, "_")) .discard else .identifier; + } else if (byte_equal(string, "_")) .discard else .identifier; }, '0'...'9' => blk: { // Detect other non-decimal literals @@ -481,7 +483,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B }, // Asm statement (special treatment) '`' => { - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_backtick, .line = line_index, .offset = start_index, @@ -508,7 +510,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B } } - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .identifier, .offset = start_i, .length = index - start_i, @@ -516,7 +518,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B }); }, ',' => { - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_comma, .line = line_index, .offset = start_i, @@ -525,7 +527,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B index += 1; }, ';' => { - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_semicolon, .line = line_index, .offset = start_i, @@ -534,7 +536,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B index += 1; }, '{' => { - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_left_brace, .line = line_index, .offset = start_i, @@ -543,7 +545,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B index += 1; }, '}' => { - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_right_brace, .line = line_index, .offset = start_i, @@ -572,7 +574,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B } } - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .number_literal, .line = line_index, .offset = start_i, @@ -586,7 +588,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B } } - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = .operator_backtick, .line = line_index, .length = 1, @@ -606,7 +608,7 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B const end_index = index; const token_length = end_index - start_index; - token_buffer.append_with_capacity(.{ + token_buffer.tokens.append(.{ .id = token_id, .offset = start_index, .length = token_length, @@ -619,8 +621,8 @@ pub fn analyze(allocator: *MyAllocator, text: []const u8, token_buffer: *Token.B // logln(.lexer, .end, "END LEXER - TOKEN OFFSET: {} - LINE OFFSET: {}", .{ Token.unwrap(lexer.offset), lexer.line_offset }); - lexer.count = Token.sub(token_buffer.getOffset(), lexer.offset); - lexer.line_count = token_buffer.getLineOffset() - lexer.line_offset; + lexer.count = token_buffer.tokens.length - @intFromEnum(lexer.offset); + lexer.line_count = token_buffer.line_offsets.length - lexer.line_offset; const time_end = std.time.Instant.now() catch unreachable; lexer.time = time_end.since(time_start); diff --git a/bootstrap/frontend/parser.zig b/bootstrap/frontend/parser.zig index 78c1a0e..b097756 100644 --- a/bootstrap/frontend/parser.zig +++ b/bootstrap/frontend/parser.zig @@ -259,18 +259,14 @@ const Analyzer = struct { } fn getTokenOffset(analyzer: *Analyzer, token_index: Token.Index) u32 { - const index = Token.unwrap(token_index); - assert(index < analyzer.token_buffer.length); - const offset = analyzer.token_buffer.offsets[index]; - return offset; + const token = analyzer.token_buffer.tokens.get(token_index); + return token.offset; } fn peekTokenAhead(analyzer: *Analyzer, ahead_offset: u32) Token.Id { - const token_index = Token.addInt(analyzer.token_i, ahead_offset); - const index = Token.unwrap(token_index); - assert(index < analyzer.token_buffer.length); - const token = analyzer.token_buffer.ids[index]; - return token; + const index = @intFromEnum(analyzer.token_i) + ahead_offset; + const token = analyzer.token_buffer.tokens.get_unchecked(index); + return token.id; } fn peekToken(analyzer: *Analyzer) Token.Id { @@ -280,11 +276,11 @@ const Analyzer = struct { fn hasTokens(analyzer: *Analyzer) bool { const token_end = analyzer.getTokenEnd(); - return Token.unwrap(analyzer.token_i) < token_end; + return @intFromEnum(analyzer.token_i) < token_end; } fn getTokenEnd(analyzer: *const Analyzer) u32 { - return @intFromEnum(Token.addInt(analyzer.lexer.offset, analyzer.lexer.count)); + return @intFromEnum(analyzer.lexer.offset) + analyzer.lexer.count; } fn consumeToken(analyzer: *Analyzer) void { @@ -292,29 +288,26 @@ const Analyzer = struct { } fn consumeTokens(analyzer: *Analyzer, token_count: u32) void { - assert(Token.unwrap(Token.addInt(analyzer.token_i, token_count)) <= analyzer.getTokenEnd()); + assert((@intFromEnum(analyzer.token_i) + token_count) <= analyzer.getTokenEnd()); // log(.parser, .consume_token, "Consuming {} {s}: ", .{ token_count, if (token_count == 1) "token" else "tokens" }); - for (0..token_count) |i_usize| { - const i: u32 = @intCast(i_usize); - const token_id = analyzer.peekTokenAhead(i); - _ = token_id; // autofix - const token_index = Token.addInt(analyzer.token_i, i); - const token_bytes = analyzer.bytes(token_index); - _ = token_bytes; // autofix - // log(.parser, .consume_token, "{s}, '{s}'", .{ @tagName(token_id), token_bytes }); - } + // for (0..token_count) |i_usize| { + // const i: u32 = @intCast(i_usize); + // const token_id = analyzer.peekTokenAhead(i); + // _ = token_id; // autofix + // const token_index = @intFromEnum(analyzer.token_i) + i; + // const token_bytes = analyzer.bytes(token_index); + // _ = token_bytes; // autofix + // // log(.parser, .consume_token, "{s}, '{s}'", .{ @tagName(token_id), token_bytes }); + // } // log(.parser, .consume_token, "\n", .{}); - analyzer.token_i = Token.addInt(analyzer.token_i, token_count); + analyzer.token_i = @enumFromInt(@intFromEnum(analyzer.token_i) + token_count); } fn bytes(analyzer: *const Analyzer, token_index: Token.Index) []const u8 { - const index = Token.unwrap(token_index); - assert(index < analyzer.token_buffer.length); - const offset = analyzer.token_buffer.offsets[index]; - const len = analyzer.token_buffer.lengths[index]; - const slice = analyzer.source_file[offset..][0..len]; + const token = analyzer.token_buffer.tokens.get(token_index); + const slice = analyzer.source_file[token.offset..][0..token.length]; return slice; } @@ -1814,7 +1807,7 @@ const Analyzer = struct { fn processContainerType(analyzer: *Analyzer, maybe_token_id: ?Token.Id) !Node.Index { const token_i = if (maybe_token_id) |tid| try analyzer.expectToken(tid) else analyzer.token_i; - assert(Token.unwrap(analyzer.token_i) < analyzer.token_buffer.length); + assert(@intFromEnum(analyzer.token_i) < analyzer.token_buffer.tokens.length); const token_id = maybe_token_id orelse .fixed_keyword_struct; const container_type: Compilation.ContainerType = switch (token_id) { .fixed_keyword_struct => .@"struct", @@ -2323,7 +2316,7 @@ const Analyzer = struct { .right = blk: { const t = analyzer.token_i; analyzer.consumeToken(); - break :blk Node.wrap(Token.unwrap(t)); + break :blk Node.wrap(@intFromEnum(t)); }, }), else => |t| @panic(@tagName(t)), @@ -2363,7 +2356,7 @@ const Analyzer = struct { try analyzer.node_lists.append(analyzer.my_allocator, node_list); return try analyzer.addNode(.{ .id = .node_list, - .token = Token.wrap(0), + .token = @enumFromInt(0), .left = @enumFromInt(index), .right = Node.Index.null, }); diff --git a/bootstrap/library.zig b/bootstrap/library.zig index 3f1ca56..b68c495 100644 --- a/bootstrap/library.zig +++ b/bootstrap/library.zig @@ -26,7 +26,7 @@ pub const Arena = struct{ pub fn init(requested_size: u64) !*Arena { var size = requested_size; - const size_roundup_granularity = 64 * 1024 * 1024; + const size_roundup_granularity = commit_granularity; size += size_roundup_granularity - 1; size -= size % size_roundup_granularity; const initial_commit_size = commit_granularity; @@ -82,14 +82,82 @@ pub const Arena = struct{ } }; +const pinned_array_page_size = 2 * 1024 * 1024; +const pinned_array_max_size = std.math.maxInt(u32) - pinned_array_page_size; +const pinned_array_default_granularity = pinned_array_page_size; +/// This must be used with big arrays +pub fn PinnedArray(comptime T: type) type { + return struct{ + pointer: [*]T = @constCast((&[_]T{}).ptr), + length: u32 = 0, + granularity: u32 = 0, + + pub const Index = enum(u32){ + null = 0xffff_ffff, + _, + }; + + const Array = @This(); + + pub fn get_unchecked(array: *Array, index: u32) *T { + const slice = array.pointer[0..array.length]; + return &slice[index]; + } + + pub fn get(array: *Array, index: Index) *T { + assert(index != .null); + const i = @intFromEnum(index); + return array.get_unchecked(i); + } + + pub fn get_index(array: *Array, item: *const T) Index{ + assert(item - array.pointer > (@divExact(pinned_array_max_size, @sizeOf(T)))); + return @enumFromInt(item - array.pointer); + } + + pub fn init(granularity: u32) !Array{ + const raw_ptr = try reserve(pinned_array_max_size); + try commit(raw_ptr, granularity); + return Array{ + .pointer = @alignCast(@ptrCast(raw_ptr)), + .length = 0, + .granularity = granularity, + }; + } + + pub fn init_with_default_granularity() !Array{ + return try Array.init(pinned_array_default_granularity); + } + + pub fn append(array: *Array, item: T) void { + if (((array.length + 1) * @sizeOf(T)) & (array.granularity - 1) == 0) { + const length: u64 = array.length; + assert((length + 1) * @sizeOf(T) <= pinned_array_max_size); + const ptr: [*]u8 = @ptrCast(array.pointer); + commit(ptr + ((length + 1) * @sizeOf(T)), array.granularity) catch unreachable; + } + + array.append_with_capacity(item); + } + + pub fn append_with_capacity(array: *Array, item: T) void { + const index = array.length; + assert(index * @sizeOf(T) < pinned_array_max_size); + array.length += 1; + array.pointer[index] = item; + } + }; +} + pub fn reserve(size: u64) ![*]u8{ - return switch (os) { + const slice = switch (os) { .linux, .macos => try std.posix.mmap(null, size, std.posix.PROT.NONE, .{ .ANONYMOUS = true, .TYPE = .PRIVATE, }, -1, 0), else => @compileError("OS not supported"), }; + return slice.ptr; } pub fn commit(bytes: [*]u8, size: u64) !void{