#include enum class TokenId { none, end_of_statement, integer, right_brace, plus, dash, asterisk, forward_slash, percentage, caret, bar, ampersand, exclamation, assign_plus, assign_dash, assign_asterisk, assign_forward_slash, assign_percentage, assign_caret, assign_bar, assign_ampersand, assign_exclamation, value_keyword, identifier, }; enum class TokenIntegerKind { hexadecimal, decimal, octal, binary, }; struct TokenInteger { u64 value; TokenIntegerKind kind; }; enum class ValueKeyword { undefined, unreachable, zero, count, }; struct Token { union { TokenInteger integer; ValueKeyword value_keyword; String identifier; }; TokenId id; }; enum class Precedence { none, assignment, boolean_or, boolean_and, comparison, bitwise, shifting, add_like, div_like, prefix, aggregate_initialization, postfix, }; struct ValueBuilder { Token token; Value* left; Precedence precedence; ValueKind kind; bool allow_assignment_operators; inline ValueBuilder with_precedence(Precedence precedence) { auto result = *this; result.precedence = precedence; return result; } inline ValueBuilder with_token(Token token) { auto result = *this; result.token = token; return result; } inline ValueBuilder with_left(Value* value) { auto result = *this; result.left = value; return result; } inline ValueBuilder with_kind(ValueKind kind) { auto result = *this; result.kind = kind; return result; } }; global_variable constexpr u8 left_bracket = '['; global_variable constexpr u8 right_bracket = ']'; global_variable constexpr u8 left_brace = '{'; global_variable constexpr u8 right_brace = '}'; global_variable constexpr u8 left_parenthesis = '('; global_variable constexpr u8 right_parenthesis = ')'; fn bool is_space(u8 ch) { return ((ch == ' ') | (ch == '\n')) | ((ch == '\t') | (ch == '\r')); } fn bool is_lower(u8 ch) { return ((ch >= 'a') & (ch <= 'z')); } fn bool is_upper(u8 ch) { return ((ch >= 'A') & (ch <= 'Z')); } fn bool is_decimal(u8 ch) { return ((ch >= '0') & (ch <= '9')); } fn bool is_hexadecimal_alpha_lower(u8 ch) { return ((ch >= 'a') & (ch <= 'f')); } fn bool is_hexadecimal_alpha_upper(u8 ch) { return ((ch >= 'A') & (ch <= 'F')); } fn bool is_hexadecimal_alpha(u8 ch) { return is_hexadecimal_alpha_lower(ch) | is_hexadecimal_alpha_upper(ch); } fn bool is_hexadecimal(u8 ch) { return is_decimal(ch) | is_hexadecimal_alpha(ch); } fn bool is_identifier_start(u8 ch) { return (is_lower(ch) | is_upper(ch)) | (ch == '_'); } fn bool is_identifier(u8 ch) { return is_identifier_start(ch) | is_decimal(ch); } fn u32 get_line(Module& module) { auto line = module.line_offset + 1; assert(line < ~(u32)0); return (u32)line; } fn u32 get_column(Module& module) { auto column = module.offset - module.line_character_offset + 1; assert(column < ~(u32)0); return (u32)column; } struct Checkpoint { u64 offset; u64 line_offset; u64 line_character_offset; }; fn Checkpoint get_checkpoint(Module& module) { return { .offset = module.offset, .line_offset = module.line_offset, .line_character_offset = module.line_character_offset, }; } fn void set_checkpoint(Module& module, Checkpoint checkpoint) { module.offset = checkpoint.offset; module.line_offset = checkpoint.line_offset; module.line_character_offset = checkpoint.line_character_offset; } fn bool consume_character_if_match(Module& module, u8 expected_ch) { bool is_ch = false; auto i = module.offset; if (i < module.content.length) { auto ch = module.content[i]; is_ch = expected_ch == ch; module.offset = i + is_ch; } return is_ch; } fn void expect_character(Module& module, u8 expected_ch) { if (!consume_character_if_match(module, expected_ch)) { report_error(); } } fn void skip_space(Module& module) { while (1) { auto iteration_offset = module.offset; while (module.offset < module.content.length) { auto ch = module.content[module.offset]; if (!is_space(ch)) { break; } module.line_offset += ch == '\n'; module.line_character_offset = ch == '\n' ? module.offset : module.line_character_offset; module.offset += 1; } if (module.offset + 1 < module.content.length) { auto i = module.offset; auto first_ch = module.content[i]; auto second_ch = module.content[i + 1]; auto is_comment = first_ch == '/' && second_ch == '/'; if (is_comment) { while (module.offset < module.content.length) { auto ch = module.content[module.offset]; if (ch == '\n') { break; } module.offset += 1; } if (module.offset < module.content.length) { module.line_offset += 1; module.line_character_offset = module.offset; module.offset += 1; } } } if (module.offset - iteration_offset == 0) { break; } } } fn String parse_identifier(Module& module) { auto start = module.offset; if (is_identifier_start(module.content[start])) { module.offset = start + 1; while (module.offset < module.content.length) { auto i = module.offset; if (is_identifier(module.content[i])) { module.offset = i + 1; } else { break; } } } auto end = module.offset; if (end - start == 0) { report_error(); } return module.content(start, end); } fn u64 accumulate_decimal(u64 accumulator, u8 ch) { return (accumulator * 10) + (ch - '0'); } fn u64 parse_integer_decimal_assume_valid(String string) { u64 value = 0; for (u8 ch: string) { assert(is_decimal(ch)); value = accumulate_decimal(value, ch); } return value; } fn Type* parse_type(Module& module) { auto start_character = module.content[module.offset]; if (is_identifier_start(start_character)) { auto identifier = parse_identifier(module); if (identifier.equal(str("void"))) { return void_type(module); } else if (identifier.equal(str("noreturn"))) { return noreturn_type(module); } else { auto is_int_type = identifier.length > 1 && (identifier[0] == 's' || identifier[0] == 'u'); if (is_int_type) { for (auto ch : identifier(1)) { is_int_type = is_int_type && is_decimal(ch); } } if (is_int_type) { bool is_signed; switch (identifier[0]) { case 's': is_signed = true; break; case 'u': is_signed = false; break; default: unreachable(); } auto bit_count = parse_integer_decimal_assume_valid(identifier(1)); if (bit_count == 0) { report_error(); } if (bit_count > 64) { if (bit_count != 128) { report_error(); } } auto result = integer_type(module, { .bit_count = (u32)bit_count, .is_signed = is_signed }); return result; } else { trap_raw(); } } } else if (start_character == '&') { trap_raw(); } else if (start_character == left_bracket) { trap_raw(); } else if (start_character == '#') { trap_raw(); } else { report_error(); } } fn u64 parse_decimal(Module& module) { u64 value = 0; while (true) { auto ch = module.content[module.offset]; if (!is_decimal(ch)) { break; } module.offset += 1; value = accumulate_decimal(value, ch); } return value; } fn Token tokenize(Module& module) { skip_space(module); auto start_index = module.offset; if (start_index == module.content.length) { report_error(); } auto start_character = module.content[start_index]; Token token; switch (start_character) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { auto next_ch = module.content[start_index + 1]; TokenIntegerKind token_integer_kind = TokenIntegerKind::decimal; if (start_character == '0') { switch (next_ch) { case 'x': token_integer_kind = TokenIntegerKind::hexadecimal; break; case 'd': token_integer_kind = TokenIntegerKind::decimal; break; case 'o': token_integer_kind = TokenIntegerKind::octal; break; case 'b': token_integer_kind = TokenIntegerKind::octal; break; default: token_integer_kind = TokenIntegerKind::decimal; break; } auto inferred_decimal = token_integer_kind == TokenIntegerKind::decimal && next_ch != 'd'; module.offset += 2 * (token_integer_kind != TokenIntegerKind::decimal || !inferred_decimal); } u64 value; switch (token_integer_kind) { case TokenIntegerKind::hexadecimal: { trap_raw(); } break; case TokenIntegerKind::decimal: { value = parse_decimal(module); } break; case TokenIntegerKind::octal: { trap_raw(); } break; case TokenIntegerKind::binary: { trap_raw(); } break; } token = { .integer = { .value = value, .kind = token_integer_kind, }, .id = TokenId::integer, }; } break; case ';': { module.offset += 1; token = { .id = TokenId::end_of_statement, }; } break; case right_brace: { module.offset += 1; token = { .id = TokenId::right_brace, }; } break; case '+': case '-': case '*': case '/': case '%': case '&': case '|': case '^': case '!': { auto next_ch = module.content[start_index + 1]; TokenId id; if (next_ch == '=') { switch (start_character) { case '+': id = TokenId::assign_plus; break; case '-': id = TokenId::assign_dash; break; case '*': id = TokenId::assign_asterisk; break; case '/': id = TokenId::assign_forward_slash; break; case '%': id = TokenId::assign_percentage; break; case '&': id = TokenId::assign_ampersand; break; case '|': id = TokenId::assign_bar; break; case '^': id = TokenId::assign_caret; break; case '!': id = TokenId::assign_exclamation; break; default: unreachable(); } } else { switch (start_character) { case '+': id = TokenId::plus; break; case '-': id = TokenId::dash; break; case '*': id = TokenId::asterisk; break; case '/': id = TokenId::forward_slash; break; case '%': id = TokenId::percentage; break; case '&': id = TokenId::ampersand; break; case '|': id = TokenId::bar; break; case '^': id = TokenId::caret; break; case '!': id = TokenId::exclamation; break; default: unreachable(); } } token.id = id; module.offset += 1 + (next_ch == '='); } break; default: { if (is_identifier_start(start_character)) { auto identifier = parse_identifier(module); String value_keywords[] = { str("undefined"), str("unreachable"), str("zero"), }; static_assert(array_length(value_keywords) == (u64)ValueKeyword::count); backing_type(ValueKeyword) i; for (i = 0; i < (backing_type(ValueKeyword))ValueKeyword::count; i += 1) { String candidate = value_keywords[i]; if (candidate.equal(identifier)) { break; } } auto value_keyword = (ValueKeyword)i; if (value_keyword == ValueKeyword::count) { token = { .identifier = identifier, .id = TokenId::identifier, }; } else { token = { .value_keyword = value_keyword, .id = TokenId::value_keyword, }; } } else { report_error(); } } break; } assert(start_index != module.offset); return token; } fn Value* reference_identifier(Module& module, Scope* current_scope, String identifier, ValueKind kind) { assert(!identifier.equal(str(""))); assert(!identifier.equal(str("_"))); Scope* scope = current_scope; while (scope) { switch (scope->kind) { case ScopeKind::global: { trap_raw(); } break; case ScopeKind::function: { trap_raw(); } break; case ScopeKind::local: { trap_raw(); } break; case ScopeKind::for_each: { trap_raw(); } break; case ScopeKind::macro_declaration: { trap_raw(); } break; case ScopeKind::macro_instantiation: { trap_raw(); } break; } } trap_raw(); } fn Value* parse_precedence(Module& module, Scope* scope, ValueBuilder builder); fn Value* parse_left(Module& module, Scope* scope, ValueBuilder builder) { Token token = builder.token; Value* result; switch (token.id) { case TokenId::integer: { auto integer_value = token.integer.value; result = new_value(module); *result = { .constant_integer = { .value = integer_value, .is_signed = false, }, .id = ValueId::constant_integer, }; } break; case TokenId::dash: // Unary { assert(!builder.left); UnaryId id; switch (token.id) { case TokenId::dash: id = UnaryId::minus; break; default: unreachable(); } auto unary_value = parse_precedence(module, scope, builder.with_precedence(Precedence::prefix).with_token({}).with_kind(token.id == TokenId::ampersand ? ValueKind::left : builder.kind)); result = new_value(module); *result = { .unary = { .value = unary_value, .id = id, }, .id = ValueId::unary, }; } break; case TokenId::identifier: { result = reference_identifier(module, scope, token.identifier, builder.kind); } break; default: report_error(); } return result; } fn Precedence get_token_precedence(TokenId id) { Precedence precedence; switch (id) { case TokenId::end_of_statement: case TokenId::right_brace: precedence = Precedence::none; break; case TokenId::plus: precedence = Precedence::add_like; break; default: report_error(); } return precedence; } fn Value* parse_right(Module& module, Scope* scope, ValueBuilder builder) { auto* left = builder.left; assert(left); Token token = builder.token; switch (token.id) { case TokenId::plus: // Binary { auto precedence = get_token_precedence(token.id); assert(precedence != Precedence::assignment); BinaryId id; switch (token.id) { case TokenId::plus: id = BinaryId::add; break; default: unreachable(); } trap_raw(); } break; default: report_error(); } trap_raw(); } fn Value* parse_precedence_left(Module& module, Scope* scope, ValueBuilder builder) { auto result = builder.left; auto precedence = builder.precedence; while (1) { auto checkpoint = get_checkpoint(module); auto token = tokenize(module); auto token_precedence = get_token_precedence(token.id); if (token_precedence == Precedence::assignment) { token_precedence = builder.allow_assignment_operators ? Precedence::assignment : Precedence::none; } if ((backing_type(Precedence))precedence > (backing_type(Precedence))token_precedence) { set_checkpoint(module, checkpoint); break; } auto left = result; auto right = parse_right(module, scope, builder.with_token(token).with_precedence(Precedence::none).with_left(left)); result = right; } return result; } fn Value* parse_precedence(Module& module, Scope* scope, ValueBuilder builder) { assert(builder.token.id == TokenId::none); auto token = tokenize(module); auto left = parse_left(module, scope, builder.with_token(token)); auto result = parse_precedence_left(module, scope, builder.with_left(left)); return result; } fn Value* parse_value(Module& module, Scope* scope, ValueBuilder builder) { assert(builder.precedence == Precedence::none); assert(!builder.left); auto value = parse_precedence(module, scope, builder.with_precedence(Precedence::assignment)); return value; } fn Statement* parse_statement(Module& module, Scope* scope) { bool require_semicolon = true; auto statement_line = get_line(module); auto statement_column = get_column(module); auto* statement = &arena_allocate(module.arena, 1)[0]; *statement = Statement{ .line = statement_line, .column = statement_column, }; auto statement_start_character = module.content[module.offset]; switch (statement_start_character) { case '>': { trap_raw(); } break; case '#': { trap_raw(); } break; case left_brace: { trap_raw(); } break; default: { if (is_identifier_start(statement_start_character)) { auto checkpoint = get_checkpoint(module); auto statement_start_identifier = parse_identifier(module); skip_space(module); enum class StatementStartKeyword { underscore_st, return_st, if_st, // TODO: make `unreachable` a statement start keyword? for_st, while_st, switch_st, break_st, continue_st, count, }; String statement_start_keywords[] = { str("_"), str("return"), str("if"), str("for"), str("while"), str("switch"), str("break"), str("continue"), }; static_assert(array_length(statement_start_keywords) == (u64)StatementStartKeyword::count); backing_type(StatementStartKeyword) i; for (i = 0; i < (backing_type(StatementStartKeyword))StatementStartKeyword::count; i += 1) { auto statement_start_keyword = statement_start_keywords[i]; if (statement_start_keyword.equal(statement_start_identifier)) { break; } } auto statement_start_keyword = (StatementStartKeyword)i; switch (statement_start_keyword) { case StatementStartKeyword::underscore_st: { trap_raw(); } break; case StatementStartKeyword::return_st: { auto return_value = parse_value(module, scope, {}); statement->return_st = return_value; statement->id = StatementId::return_st; } break; case StatementStartKeyword::if_st: { trap_raw(); } break; case StatementStartKeyword::for_st: { trap_raw(); } break; case StatementStartKeyword::while_st: { trap_raw(); } break; case StatementStartKeyword::switch_st: { trap_raw(); } break; case StatementStartKeyword::break_st: { trap_raw(); } break; case StatementStartKeyword::continue_st: { trap_raw(); } break; case StatementStartKeyword::count: { trap_raw(); } break; } } else { trap_raw(); } } break; } if (require_semicolon) { expect_character(module, ';'); } return statement; } fn Block* parse_block(Module& module, Scope* parent_scope) { auto* block = &arena_allocate(module.arena, 1)[0]; *block = { .scope = { .parent = parent_scope, .line = get_line(module), .column = get_column(module), .kind = ScopeKind::local, }, }; auto* scope = &block->scope; expect_character(module, left_brace); Statement* current_statement = 0; while (true) { skip_space(module); if (module.offset == module.content.length) { break; } if (consume_character_if_match(module, right_brace)) { break; } auto* statement = parse_statement(module, scope); if (current_statement) { current_statement->next = statement; } current_statement = statement; } return block; } void parse(Module& module) { while (1) { skip_space(module); if (module.offset == module.content.length) { break; } bool is_export = false; bool is_extern = false; auto global_line = get_line(module); auto global_column = get_column(module); if (consume_character_if_match(module, left_bracket)) { while (module.offset < module.content.length) { auto global_keyword_string = parse_identifier(module); enum class GlobalKeyword { export_keyword, extern_keyword, count, }; String global_keyword_strings[] = { str("export"), str("extern"), }; static_assert(array_length(global_keyword_strings) == (u64)GlobalKeyword::count); u32 i; for (i = 0; i < array_length(global_keyword_strings); i += 1) { String keyword = global_keyword_strings[i]; if (keyword.equal(global_keyword_string)) { break; } } auto global_keyword = (GlobalKeyword)i; switch (global_keyword) { case GlobalKeyword::export_keyword: { is_export = true; } break; case GlobalKeyword::extern_keyword: { is_extern = true; } break; case GlobalKeyword::count: { report_error(); } } if (consume_character_if_match(module, right_bracket)) { break; } else { report_error(); } } skip_space(module); } auto global_name = parse_identifier(module); Global* last_global = module.first_global; while (last_global) { if (global_name.equal(last_global->variable.name)) { report_error(); } if (!last_global->next) { break; } last_global = last_global->next; } Type* type_it = module.first_type; Type* forward_declaration = 0; while (type_it) { if (global_name.equal(type_it->name)) { if (type_it->id == TypeId::forward_declaration) { forward_declaration = type_it; break; } else { report_error(); } } if (!type_it->next) { break; } type_it = type_it->next; } skip_space(module); Type* global_type = 0; if (consume_character_if_match(module, ':')) { skip_space(module); global_type = parse_type(module); skip_space(module); } expect_character(module, '='); skip_space(module); bool is_global_keyword = false; enum class GlobalKeyword { bits, enumerator, function, macro, structure, typealias, union_type, count, }; auto i = (backing_type(GlobalKeyword))GlobalKeyword::count; if (is_identifier_start(module.content[module.offset])) { auto checkpoint = get_checkpoint(module); auto global_string = parse_identifier(module); skip_space(module); String global_keywords[] = { str("bits"), str("enum"), str("fn"), str("macro"), str("struct"), str("typealias"), str("union"), }; static_assert(array_length(global_keywords) == (u64)GlobalKeyword::count); for (i = 0; i < (backing_type(GlobalKeyword))GlobalKeyword::count; i += 1) { String global_keyword = global_keywords[i]; if (global_string.equal(global_keyword)) { break; } } auto global_keyword = (GlobalKeyword)i; switch (global_keyword) { case GlobalKeyword::bits: { trap_raw(); } break; case GlobalKeyword::enumerator: { trap_raw(); } break; case GlobalKeyword::function: { auto calling_convention = CallingConvention::c; auto function_attributes = FunctionAttributes{}; bool is_variable_arguments = false; if (consume_character_if_match(module, left_bracket)) { while (module.offset < module.content.length) { auto function_identifier = parse_identifier(module); enum class FunctionKeyword { cc, count, }; String function_keywords[] = { str("cc"), }; static_assert(array_length(function_keywords) == (u64)FunctionKeyword::count); backing_type(FunctionKeyword) i; for (i = 0; i < (backing_type(FunctionKeyword))(FunctionKeyword::count); i += 1) { auto function_keyword = function_keywords[i]; if (function_keyword.equal(function_identifier)) { break; } } auto function_keyword = (FunctionKeyword)i; skip_space(module); switch (function_keyword) { case FunctionKeyword::cc: { expect_character(module, left_parenthesis); skip_space(module); auto calling_convention_string = parse_identifier(module); String calling_conventions[] = { str("c"), }; static_assert(array_length(calling_conventions) == (u64)CallingConvention::count); backing_type(CallingConvention) i; for (i = 0; i < (backing_type(CallingConvention))CallingConvention::count; i += 1) { auto calling_convention = calling_conventions[i]; if (calling_convention.equal(calling_convention_string)) { break; } } auto candidate_calling_convention = (CallingConvention)i; if (candidate_calling_convention == CallingConvention::count) { report_error(); } calling_convention = candidate_calling_convention; skip_space(module); expect_character(module, right_parenthesis); } break; case FunctionKeyword::count: { report_error(); } break; } skip_space(module); if (consume_character_if_match(module, right_bracket)) { break; } else { report_error(); } } } skip_space(module); expect_character(module, left_parenthesis); Type* semantic_argument_type_buffer[64]; String semantic_argument_name_buffer[64]; u64 semantic_argument_count = 0; while (module.offset < module.content.length) { skip_space(module); if (consume_character_if_match(module, '.')) { expect_character(module, '.'); expect_character(module, '.'); skip_space(module); expect_character(module, right_parenthesis); is_variable_arguments = true; break; } if (consume_character_if_match(module, right_parenthesis)) { break; } auto argument_name = parse_identifier(module); semantic_argument_name_buffer[semantic_argument_count] = argument_name; skip_space(module); expect_character(module, ':'); skip_space(module); auto argument_type = parse_type(module); semantic_argument_type_buffer[semantic_argument_count] = argument_type; skip_space(module); unused(consume_character_if_match(module, ',')); semantic_argument_count += 1; } skip_space(module); auto return_type = parse_type(module); skip_space(module); Slice argument_types = {}; if (semantic_argument_count != 0) { trap_raw(); } auto is_declaration = consume_character_if_match(module, ';'); auto function_type = type_allocate_init(module, { .function = { .semantic_return_type = return_type, .semantic_argument_types = argument_types, .calling_convention = calling_convention, .is_variable_arguments = is_variable_arguments, }, .id = TypeId::function, .name = str(""), }); auto storage = new_value(module); *storage = { .type = get_pointer_type(module, function_type), .id = ValueId::external_function, }; auto global = new_global(module); *global = { .variable = { .storage = storage, .initial_value = 0, .type = function_type, .scope = &module.scope, .name = global_name, .line = global_line, .column = global_column, }, .linkage = (is_export | is_extern) ? Linkage::external : Linkage::internal, }; module.current_function = global; if (!is_declaration) { storage->function = { .arguments = {}, .scope = { .parent = &module.scope, .line = global_line, .column = global_column, .kind = ScopeKind::function, }, .block = 0, .attributes = function_attributes, }; storage->id = ValueId::function; if (semantic_argument_count != 0) { trap_raw(); } storage->function.block = parse_block(module, &storage->function.scope); } } break; case GlobalKeyword::macro: { trap_raw(); } break; case GlobalKeyword::structure: { trap_raw(); } break; case GlobalKeyword::typealias: { trap_raw(); } break; case GlobalKeyword::union_type: { trap_raw(); } break; case GlobalKeyword::count: { set_checkpoint(module, checkpoint); } break; } } if (i == (backing_type(GlobalKeyword))GlobalKeyword::count) { trap_raw(); } } }