#include enum class ValueIntrinsic { align_of, byte_size, enum_name, extend, integer_max, int_from_enum, int_from_pointer, pointer_cast, select, string_to_enum, trap, truncate, va_start, va_end, va_arg, va_copy, count, }; fn Block* scope_to_block(Scope* scope) { assert(scope->kind == ScopeKind::local); auto byte_offset = offsetof(Block, scope); auto result = (Block*)((u8*)scope - byte_offset); return result; } fn ValueFunction* scope_to_function(Scope* scope) { assert(scope->kind == ScopeKind::function); auto byte_offset = offsetof(ValueFunction, scope); auto result = (ValueFunction*)((u8*)scope - byte_offset); return result; } fn Module* scope_to_module(Scope* scope) { assert(scope->kind == ScopeKind::global); auto byte_offset = offsetof(Module, scope); auto result = (Module*)((u8*)scope - byte_offset); return result; } fn Local* new_local(Module* module, Scope* scope) { auto* result = &arena_allocate(module->arena, 1)[0]; switch (scope->kind) { case ScopeKind::local: { auto block = scope_to_block(scope); if (block->last_local) { block->last_local->next = result; block->last_local = result; } else { block->first_local = result; block->last_local = result; } } break; default: report_error(); } return result; } enum class TokenId { none, end_of_statement, integer, left_brace, left_bracket, left_parenthesis, right_brace, right_bracket, right_parenthesis, plus, dash, asterisk, forward_slash, percentage, caret, bar, ampersand, exclamation, assign_plus, assign_dash, assign_asterisk, assign_forward_slash, assign_percentage, assign_caret, assign_bar, assign_ampersand, assign_exclamation, value_keyword, operator_keyword, identifier, value_intrinsic, shift_left, shift_right, assign_shift_left, assign_shift_right, compare_less, compare_less_equal, compare_greater, compare_greater_equal, compare_equal, dot, double_dot, triple_dot, pointer_dereference, }; enum class TokenIntegerKind { hexadecimal, decimal, octal, binary, }; struct TokenInteger { u64 value; TokenIntegerKind kind; }; enum class ValueKeyword { undefined, unreachable, zero, count, }; enum class OperatorKeyword { and_op, or_op, and_op_shortcircuit, or_op_shortcircuit, count, }; struct Token { union { TokenInteger integer; ValueKeyword value_keyword; String identifier; OperatorKeyword operator_keyword; ValueIntrinsic value_intrinsic; }; TokenId id; }; enum class Precedence { none, assignment, boolean_or, boolean_and, comparison, bitwise, shifting, add_like, div_like, prefix, aggregate_initialization, postfix, }; struct ValueBuilder { Token token; Value* left; Precedence precedence; ValueKind kind; bool allow_assignment_operators; inline ValueBuilder with_precedence(Precedence precedence) { auto result = *this; result.precedence = precedence; return result; } inline ValueBuilder with_token(Token token) { auto result = *this; result.token = token; return result; } inline ValueBuilder with_left(Value* value) { auto result = *this; result.left = value; return result; } inline ValueBuilder with_kind(ValueKind kind) { auto result = *this; result.kind = kind; return result; } }; global_variable constexpr u8 left_bracket = '['; global_variable constexpr u8 right_bracket = ']'; global_variable constexpr u8 left_brace = '{'; global_variable constexpr u8 right_brace = '}'; global_variable constexpr u8 left_parenthesis = '('; global_variable constexpr u8 right_parenthesis = ')'; fn bool is_space(u8 ch) { return ((ch == ' ') | (ch == '\n')) | ((ch == '\t') | (ch == '\r')); } fn bool is_lower(u8 ch) { return ((ch >= 'a') & (ch <= 'z')); } fn bool is_upper(u8 ch) { return ((ch >= 'A') & (ch <= 'Z')); } fn bool is_decimal(u8 ch) { return ((ch >= '0') & (ch <= '9')); } fn bool is_hexadecimal_alpha_lower(u8 ch) { return ((ch >= 'a') & (ch <= 'f')); } fn bool is_hexadecimal_alpha_upper(u8 ch) { return ((ch >= 'A') & (ch <= 'F')); } fn bool is_hexadecimal_alpha(u8 ch) { return is_hexadecimal_alpha_lower(ch) | is_hexadecimal_alpha_upper(ch); } fn bool is_hexadecimal(u8 ch) { return is_decimal(ch) | is_hexadecimal_alpha(ch); } fn bool is_identifier_start(u8 ch) { return (is_lower(ch) | is_upper(ch)) | (ch == '_'); } fn bool is_identifier(u8 ch) { return is_identifier_start(ch) | is_decimal(ch); } fn u32 get_line(Module* module) { auto line = module->line_offset + 1; assert(line < ~(u32)0); return (u32)line; } fn u32 get_column(Module* module) { auto column = module->offset - module->line_character_offset + 1; assert(column < ~(u32)0); return (u32)column; } struct Checkpoint { u64 offset; u64 line_offset; u64 line_character_offset; }; fn Checkpoint get_checkpoint(Module* module) { return { .offset = module->offset, .line_offset = module->line_offset, .line_character_offset = module->line_character_offset, }; } fn void set_checkpoint(Module* module, Checkpoint checkpoint) { module->offset = checkpoint.offset; module->line_offset = checkpoint.line_offset; module->line_character_offset = checkpoint.line_character_offset; } fn bool consume_character_if_match(Module* module, u8 expected_ch) { bool is_ch = false; auto i = module->offset; if (i < module->content.length) { auto ch = module->content[i]; is_ch = expected_ch == ch; module->offset = i + is_ch; } return is_ch; } fn void expect_character(Module* module, u8 expected_ch) { if (!consume_character_if_match(module, expected_ch)) { report_error(); } } fn void skip_space(Module* module) { while (1) { auto iteration_offset = module->offset; while (module->offset < module->content.length) { auto ch = module->content[module->offset]; if (!is_space(ch)) { break; } module->line_offset += ch == '\n'; module->line_character_offset = ch == '\n' ? module->offset : module->line_character_offset; module->offset += 1; } if (module->offset + 1 < module->content.length) { auto i = module->offset; auto first_ch = module->content[i]; auto second_ch = module->content[i + 1]; auto is_comment = first_ch == '/' && second_ch == '/'; if (is_comment) { while (module->offset < module->content.length) { auto ch = module->content[module->offset]; if (ch == '\n') { break; } module->offset += 1; } if (module->offset < module->content.length) { module->line_offset += 1; module->line_character_offset = module->offset; module->offset += 1; } } } if (module->offset - iteration_offset == 0) { break; } } } fn String parse_identifier(Module* module) { auto start = module->offset; if (is_identifier_start(module->content[start])) { module->offset = start + 1; while (module->offset < module->content.length) { auto i = module->offset; if (is_identifier(module->content[i])) { module->offset = i + 1; } else { break; } } } auto end = module->offset; if (end - start == 0) { report_error(); } return module->content(start, end); } fn u64 accumulate_decimal(u64 accumulator, u8 ch) { return (accumulator * 10) + (ch - '0'); } fn u64 accumulate_hexadecimal(u64 accumulator, u8 ch) { u64 value; if (is_decimal(ch)) { value = ch - '0'; } else if (is_hexadecimal_alpha_upper(ch)) { value = ch - 'A' + 10; } else if (is_hexadecimal_alpha_lower(ch)) { value = ch - 'a' + 10; } else { unreachable(); } auto result = (accumulator * 16) + value; return result; } fn u64 parse_integer_decimal_assume_valid(String string) { u64 value = 0; for (u8 ch: string) { assert(is_decimal(ch)); value = accumulate_decimal(value, ch); } return value; } fn Type* parse_type(Module* module) { auto start_character = module->content[module->offset]; if (is_identifier_start(start_character)) { auto identifier = parse_identifier(module); if (identifier.equal(str("void"))) { return void_type(module); } else if (identifier.equal(str("noreturn"))) { return noreturn_type(module); } else { auto is_int_type = identifier.length > 1 && (identifier[0] == 's' || identifier[0] == 'u'); if (is_int_type) { for (auto ch : identifier(1)) { is_int_type = is_int_type && is_decimal(ch); } } if (is_int_type) { bool is_signed; switch (identifier[0]) { case 's': is_signed = true; break; case 'u': is_signed = false; break; default: unreachable(); } auto bit_count = parse_integer_decimal_assume_valid(identifier(1)); if (bit_count == 0) { report_error(); } if (bit_count > 64) { if (bit_count != 128) { report_error(); } } auto result = integer_type(module, { .bit_count = (u32)bit_count, .is_signed = is_signed }); return result; } else { trap_raw(); } } } else if (start_character == '&') { trap_raw(); } else if (start_character == left_bracket) { trap_raw(); } else if (start_character == '#') { trap_raw(); } else { report_error(); } } fn u64 parse_hexadecimal(Module* module) { u64 value = 0; while (true) { auto ch = module->content[module->offset]; if (!is_hexadecimal(ch)) { break; } module->offset += 1; value = accumulate_hexadecimal(value, ch); } return value; } fn u64 parse_decimal(Module* module) { u64 value = 0; while (true) { auto ch = module->content[module->offset]; if (!is_decimal(ch)) { break; } module->offset += 1; value = accumulate_decimal(value, ch); } return value; } fn Token tokenize(Module* module) { skip_space(module); auto start_index = module->offset; if (start_index == module->content.length) { report_error(); } auto start_character = module->content[start_index]; Token token; switch (start_character) { case ';': { module->offset += 1; token = { .id = TokenId::end_of_statement, }; } break; case right_brace: { module->offset += 1; token = { .id = TokenId::right_brace, }; } break; case right_parenthesis: { module->offset += 1; token = { .id = TokenId::right_parenthesis, }; } break; case right_bracket: { module->offset += 1; token = { .id = TokenId::right_bracket, }; } break; case '#': { module->offset += 1; if (is_identifier_start(module->content[module->offset])) { auto identifier = parse_identifier(module); String value_intrinsics[] = { str("align_of"), str("byte_size"), str("enum_name"), str("extend"), str("integer_max"), str("int_from_enum"), str("int_from_pointer"), str("pointer_cast"), str("select"), str("string_to_enum"), str("trap"), str("truncate"), str("va_start"), str("va_end"), str("va_arg"), str("va_copy"), }; backing_type(ValueIntrinsic) i; for (i = 0; i < (backing_type(ValueIntrinsic))(ValueIntrinsic::count); i += 1) { String candidate = value_intrinsics[i]; if (identifier.equal(candidate)) { break; } } auto intrinsic = (ValueIntrinsic)i; if (intrinsic == ValueIntrinsic::count) { report_error(); } token = { .value_intrinsic = intrinsic, .id = TokenId::value_intrinsic, }; } else { trap_raw(); } } break; case '<': { auto next_ch = module->content[start_index + 1]; TokenId id; switch (next_ch) { case '<': switch (module->content[start_index + 2]) { case '=': id = TokenId::assign_shift_left; break; default: id = TokenId::shift_left; break; } break; case '=': id = TokenId::compare_less_equal; break; default: id = TokenId::compare_less; break; } u64 add; switch (id) { case TokenId::assign_shift_left: add = 3; break; case TokenId::shift_left: case TokenId::compare_less_equal: add = 2; break; case TokenId::compare_less: add = 1; break; default: unreachable(); } module->offset += add; token = { .id = id, }; } break; case '>': { auto next_ch = module->content[start_index + 1]; TokenId id; switch (next_ch) { case '>': switch (module->content[start_index + 2]) { case '=': id = TokenId::assign_shift_right; break; default: id = TokenId::shift_right; break; } break; case '=': id = TokenId::compare_greater_equal; break; default: id = TokenId::compare_greater; break; } u64 add; switch (id) { case TokenId::assign_shift_right: add = 3; break; case TokenId::shift_right: case TokenId::compare_greater_equal: add = 2; break; case TokenId::compare_greater: add = 1; break; default: unreachable(); } module->offset += add; token = { .id = id, }; } break; case '=': { trap_raw(); } break; case '.': { auto next_ch = module->content[start_index + 1]; TokenId id; switch (next_ch) { default: id = TokenId::dot; break; case '&': id = TokenId::pointer_dereference; break; case '.': switch (module->content[start_index + 2]) { case '.': id = TokenId::triple_dot; break; default: id = TokenId::double_dot; break; } break; } u64 add; switch (id) { case TokenId::dot: add = 1; break; case TokenId::double_dot: add = 2; break; case TokenId::triple_dot: add = 3; break; case TokenId::pointer_dereference: add = 2; break; default: unreachable(); } module->offset += add; token = { .id = id, }; } break; case '"': { trap_raw(); } break; case '\'': { trap_raw(); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { auto next_ch = module->content[start_index + 1]; TokenIntegerKind token_integer_kind = TokenIntegerKind::decimal; if (start_character == '0') { switch (next_ch) { case 'x': token_integer_kind = TokenIntegerKind::hexadecimal; break; case 'd': token_integer_kind = TokenIntegerKind::decimal; break; case 'o': token_integer_kind = TokenIntegerKind::octal; break; case 'b': token_integer_kind = TokenIntegerKind::octal; break; default: token_integer_kind = TokenIntegerKind::decimal; break; } auto inferred_decimal = token_integer_kind == TokenIntegerKind::decimal && next_ch != 'd'; module->offset += 2 * (token_integer_kind != TokenIntegerKind::decimal || !inferred_decimal); } u64 value; switch (token_integer_kind) { case TokenIntegerKind::hexadecimal: value = parse_hexadecimal(module); break; case TokenIntegerKind::decimal: value = parse_decimal(module); break; case TokenIntegerKind::octal: { trap_raw(); } break; case TokenIntegerKind::binary: { trap_raw(); } break; } token = { .integer = { .value = value, .kind = token_integer_kind, }, .id = TokenId::integer, }; } break; case '+': case '-': case '*': case '/': case '%': case '&': case '|': case '^': case '!': { auto next_ch = module->content[start_index + 1]; TokenId id; if (next_ch == '=') { switch (start_character) { case '+': id = TokenId::assign_plus; break; case '-': id = TokenId::assign_dash; break; case '*': id = TokenId::assign_asterisk; break; case '/': id = TokenId::assign_forward_slash; break; case '%': id = TokenId::assign_percentage; break; case '&': id = TokenId::assign_ampersand; break; case '|': id = TokenId::assign_bar; break; case '^': id = TokenId::assign_caret; break; case '!': id = TokenId::assign_exclamation; break; default: unreachable(); } } else { switch (start_character) { case '+': id = TokenId::plus; break; case '-': id = TokenId::dash; break; case '*': id = TokenId::asterisk; break; case '/': id = TokenId::forward_slash; break; case '%': id = TokenId::percentage; break; case '&': id = TokenId::ampersand; break; case '|': id = TokenId::bar; break; case '^': id = TokenId::caret; break; case '!': id = TokenId::exclamation; break; default: unreachable(); } } token.id = id; module->offset += 1 + (next_ch == '='); } break; default: { if (is_identifier_start(start_character)) { auto identifier = parse_identifier(module); String value_keywords[] = { str("undefined"), str("unreachable"), str("zero"), }; static_assert(array_length(value_keywords) == (u64)ValueKeyword::count); backing_type(ValueKeyword) i; for (i = 0; i < (backing_type(ValueKeyword))ValueKeyword::count; i += 1) { String candidate = value_keywords[i]; if (candidate.equal(identifier)) { break; } } auto value_keyword = (ValueKeyword)i; if (value_keyword == ValueKeyword::count) { String operators[] = { str("and"), str("or"), str("and?"), str("or?"), }; static_assert(array_length(operators) == (u64)OperatorKeyword::count); backing_type(OperatorKeyword) i; for (i = 0; i < (backing_type(OperatorKeyword))OperatorKeyword::count; i += 1) { auto candidate = operators[i]; if (candidate.equal(identifier)) { break; } } auto operator_keyword = (OperatorKeyword)i; if (operator_keyword == OperatorKeyword::count) { token = { .identifier = identifier, .id = TokenId::identifier, }; } else { token = { .operator_keyword = operator_keyword, .id = TokenId::operator_keyword, }; } } else { token = { .value_keyword = value_keyword, .id = TokenId::value_keyword, }; } } else { report_error(); } } break; } assert(start_index != module->offset); return token; } fn Value* reference_identifier(Module* module, Scope* current_scope, String identifier, ValueKind kind) { assert(!identifier.equal(str(""))); assert(!identifier.equal(str("_"))); Variable* variable = 0; for (Scope* scope = current_scope; scope; scope = scope->parent) { switch (scope->kind) { case ScopeKind::global: { assert(module == scope_to_module(scope)); trap_raw(); } break; case ScopeKind::function: { assert(scope->parent); auto function = scope_to_function(scope); for (auto argument: function->arguments) { if (identifier.equal(argument->variable.name)) { variable = &argument->variable; break; } } } break; case ScopeKind::local: { assert(scope->parent); assert(scope->parent->kind != ScopeKind::global); auto block = scope_to_block(scope); for (Local* local = block->first_local; local; local = local->next) { if (identifier.equal(local->variable.name)) { variable = &local->variable; break; } } } break; case ScopeKind::for_each: { trap_raw(); } break; case ScopeKind::macro_declaration: { trap_raw(); } break; case ScopeKind::macro_instantiation: { trap_raw(); } break; } if (variable) { break; } } if (variable) { auto result = new_value(module); *result = { .variable_reference = variable, .id = ValueId::variable_reference, .kind = kind, }; return result; } else { report_error(); } } fn Value* parse_value(Module* module, Scope* scope, ValueBuilder builder); fn Value* parse_precedence(Module* module, Scope* scope, ValueBuilder builder); fn Value* parse_left(Module* module, Scope* scope, ValueBuilder builder) { Token token = builder.token; Value* result; switch (token.id) { case TokenId::integer: { auto integer_value = token.integer.value; result = new_value(module); *result = { .constant_integer = { .value = integer_value, .is_signed = false, }, .id = ValueId::constant_integer, .kind = ValueKind::right, }; } break; case TokenId::dash: case TokenId::ampersand: // Unary { assert(!builder.left); UnaryId id; switch (token.id) { case TokenId::dash: id = UnaryId::minus; break; case TokenId::ampersand: id = UnaryId::ampersand; break; default: unreachable(); } auto unary_value = parse_precedence(module, scope, builder.with_precedence(Precedence::prefix).with_token({}).with_kind(token.id == TokenId::ampersand ? ValueKind::left : builder.kind)); result = new_value(module); *result = { .unary = { .value = unary_value, .id = id, }, .id = ValueId::unary, .kind = ValueKind::right, }; } break; case TokenId::identifier: { result = reference_identifier(module, scope, token.identifier, builder.kind); } break; case TokenId::value_intrinsic: { ValueIntrinsic intrinsic = token.value_intrinsic; switch (intrinsic) { case ValueIntrinsic::align_of: { trap_raw(); } break; case ValueIntrinsic::byte_size: { trap_raw(); } break; case ValueIntrinsic::enum_name: { trap_raw(); } break; case ValueIntrinsic::extend: case ValueIntrinsic::truncate: { skip_space(module); expect_character(module, left_parenthesis); skip_space(module); auto argument = parse_value(module, scope, {}); expect_character(module, right_parenthesis); result = new_value(module); UnaryId id; switch (intrinsic) { case ValueIntrinsic::extend: id = UnaryId::extend; break; case ValueIntrinsic::truncate: id = UnaryId::truncate; break; default: unreachable(); } *result = { .unary = { .value = argument, .id = id, }, .id = ValueId::unary, }; } break; case ValueIntrinsic::integer_max: { skip_space(module); expect_character(module, left_parenthesis); skip_space(module); auto type = parse_type(module); expect_character(module, right_parenthesis); result = new_value(module); *result = { .unary_type = { .type = type, .id = UnaryTypeId::integer_max, }, .id = ValueId::unary_type, }; } break; case ValueIntrinsic::int_from_enum: { trap_raw(); } break; case ValueIntrinsic::int_from_pointer: { trap_raw(); } break; case ValueIntrinsic::pointer_cast: { trap_raw(); } break; case ValueIntrinsic::select: { trap_raw(); } break; case ValueIntrinsic::string_to_enum: { trap_raw(); } break; case ValueIntrinsic::trap: { trap_raw(); } break; case ValueIntrinsic::va_start: { trap_raw(); } break; case ValueIntrinsic::va_end: { trap_raw(); } break; case ValueIntrinsic::va_arg: { trap_raw(); } break; case ValueIntrinsic::va_copy: { trap_raw(); } break; case ValueIntrinsic::count: unreachable(); } } break; default: report_error(); } return result; } fn Precedence get_token_precedence(TokenId id) { Precedence precedence; switch (id) { case TokenId::none: unreachable(); case TokenId::end_of_statement: case TokenId::right_brace: case TokenId::right_bracket: case TokenId::right_parenthesis: precedence = Precedence::none; break; case TokenId::ampersand: case TokenId::bar: case TokenId::caret: precedence = Precedence::bitwise; break; case TokenId::shift_left: case TokenId::shift_right: precedence = Precedence::shifting; break; case TokenId::plus: case TokenId::dash: precedence = Precedence::add_like; break; case TokenId::asterisk: case TokenId::forward_slash: case TokenId::percentage: precedence = Precedence::div_like; break; case TokenId::pointer_dereference: precedence = Precedence::postfix; break; default: trap_raw(); } return precedence; } fn Value* parse_right(Module* module, Scope* scope, ValueBuilder builder) { auto* left = builder.left; assert(left); Token token = builder.token; Value* result = 0; switch (token.id) { case TokenId::plus: case TokenId::dash: case TokenId::asterisk: case TokenId::forward_slash: case TokenId::percentage: case TokenId::ampersand: case TokenId::bar: case TokenId::caret: case TokenId::shift_left: case TokenId::shift_right: // Binary { auto precedence = get_token_precedence(token.id); assert(precedence != Precedence::assignment); BinaryId id; switch (token.id) { case TokenId::plus: id = BinaryId::add; break; case TokenId::dash: id = BinaryId::sub; break; case TokenId::asterisk: id = BinaryId::mul; break; case TokenId::forward_slash: id = BinaryId::div; break; case TokenId::percentage: id = BinaryId::rem; break; case TokenId::ampersand: id = BinaryId::bitwise_and; break; case TokenId::bar: id = BinaryId::bitwise_or; break; case TokenId::caret: id = BinaryId::bitwise_xor; break; case TokenId::shift_left: id = BinaryId::shift_left; break; case TokenId::shift_right: id = BinaryId::shift_right; break; default: unreachable(); } auto right_precedence = (Precedence)((backing_type(Precedence))precedence + (precedence != Precedence::assignment)); auto right = parse_precedence(module, scope, builder.with_precedence(right_precedence).with_token({}).with_left(0)); result = new_value(module); *result = { .binary = { .left = left, .right = right, .id = id, }, .id = ValueId::binary, .kind = ValueKind::right, }; } break; case TokenId::pointer_dereference: { result = new_value(module); *result = { .dereference = left, .id = ValueId::dereference, .kind = ValueKind::right, }; } break; default: report_error(); } return result; } fn Value* parse_precedence_left(Module* module, Scope* scope, ValueBuilder builder) { auto result = builder.left; auto precedence = builder.precedence; while (1) { auto checkpoint = get_checkpoint(module); auto token = tokenize(module); auto token_precedence = get_token_precedence(token.id); if (token_precedence == Precedence::assignment) { token_precedence = builder.allow_assignment_operators ? Precedence::assignment : Precedence::none; } if ((backing_type(Precedence))precedence > (backing_type(Precedence))token_precedence) { set_checkpoint(module, checkpoint); break; } auto left = result; auto right = parse_right(module, scope, builder.with_token(token).with_precedence(Precedence::none).with_left(left)); result = right; } return result; } fn Value* parse_precedence(Module* module, Scope* scope, ValueBuilder builder) { assert(builder.token.id == TokenId::none); auto token = tokenize(module); auto left = parse_left(module, scope, builder.with_token(token)); auto result = parse_precedence_left(module, scope, builder.with_left(left)); return result; } fn Value* parse_value(Module* module, Scope* scope, ValueBuilder builder) { assert(builder.precedence == Precedence::none); assert(!builder.left); auto value = parse_precedence(module, scope, builder.with_precedence(Precedence::assignment)); return value; } global_variable constexpr auto invalid_argument_index = ~(u32)0; fn Statement* parse_statement(Module* module, Scope* scope) { bool require_semicolon = true; auto statement_line = get_line(module); auto statement_column = get_column(module); auto* statement = &arena_allocate(module->arena, 1)[0]; *statement = Statement{ .line = statement_line, .column = statement_column, }; auto statement_start_character = module->content[module->offset]; switch (statement_start_character) { case '>': { module->offset += 1; skip_space(module); auto local_name = parse_identifier(module); skip_space(module); Type* local_type = 0; if (consume_character_if_match(module, ':')) { skip_space(module); local_type = parse_type(module); skip_space(module); } expect_character(module, '='); auto initial_value = parse_value(module, scope, {}); auto local = new_local(module, scope); *local = { .variable = { .storage = 0, .initial_value = initial_value, .type = local_type, .scope = scope, .name = local_name, .line = statement_line, .column = statement_column, }, .argument_index = invalid_argument_index, }; statement->local = local; statement->id = StatementId::local; } break; case '#': { trap_raw(); } break; case left_brace: { trap_raw(); } break; default: { if (is_identifier_start(statement_start_character)) { auto checkpoint = get_checkpoint(module); auto statement_start_identifier = parse_identifier(module); skip_space(module); enum class StatementStartKeyword { underscore_st, return_st, if_st, // TODO: make `unreachable` a statement start keyword? for_st, while_st, switch_st, break_st, continue_st, count, }; String statement_start_keywords[] = { str("_"), str("return"), str("if"), str("for"), str("while"), str("switch"), str("break"), str("continue"), }; static_assert(array_length(statement_start_keywords) == (u64)StatementStartKeyword::count); backing_type(StatementStartKeyword) i; for (i = 0; i < (backing_type(StatementStartKeyword))StatementStartKeyword::count; i += 1) { auto statement_start_keyword = statement_start_keywords[i]; if (statement_start_keyword.equal(statement_start_identifier)) { break; } } auto statement_start_keyword = (StatementStartKeyword)i; switch (statement_start_keyword) { case StatementStartKeyword::underscore_st: { trap_raw(); } break; case StatementStartKeyword::return_st: { auto return_value = parse_value(module, scope, {}); statement->return_st = return_value; statement->id = StatementId::return_st; } break; case StatementStartKeyword::if_st: { trap_raw(); } break; case StatementStartKeyword::for_st: { trap_raw(); } break; case StatementStartKeyword::while_st: { trap_raw(); } break; case StatementStartKeyword::switch_st: { trap_raw(); } break; case StatementStartKeyword::break_st: { trap_raw(); } break; case StatementStartKeyword::continue_st: { trap_raw(); } break; case StatementStartKeyword::count: { trap_raw(); } break; } } else { trap_raw(); } } break; } if (require_semicolon) { expect_character(module, ';'); } return statement; } fn Block* parse_block(Module* module, Scope* parent_scope) { auto* block = &arena_allocate(module->arena, 1)[0]; *block = { .scope = { .parent = parent_scope, .line = get_line(module), .column = get_column(module), .kind = ScopeKind::local, }, }; auto* scope = &block->scope; expect_character(module, left_brace); Statement* current_statement = 0; while (true) { skip_space(module); if (module->offset == module->content.length) { break; } if (consume_character_if_match(module, right_brace)) { break; } auto* statement = parse_statement(module, scope); if (current_statement) { current_statement->next = statement; } current_statement = statement; } return block; } void parse(Module* module) { while (1) { skip_space(module); if (module->offset == module->content.length) { break; } bool is_export = false; bool is_extern = false; auto global_line = get_line(module); auto global_column = get_column(module); if (consume_character_if_match(module, left_bracket)) { while (module->offset < module->content.length) { auto global_keyword_string = parse_identifier(module); enum class GlobalKeyword { export_keyword, extern_keyword, count, }; String global_keyword_strings[] = { str("export"), str("extern"), }; static_assert(array_length(global_keyword_strings) == (u64)GlobalKeyword::count); u32 i; for (i = 0; i < array_length(global_keyword_strings); i += 1) { String keyword = global_keyword_strings[i]; if (keyword.equal(global_keyword_string)) { break; } } auto global_keyword = (GlobalKeyword)i; switch (global_keyword) { case GlobalKeyword::export_keyword: { is_export = true; } break; case GlobalKeyword::extern_keyword: { is_extern = true; } break; case GlobalKeyword::count: { report_error(); } } if (consume_character_if_match(module, right_bracket)) { break; } else { report_error(); } } skip_space(module); } auto global_name = parse_identifier(module); Global* last_global = module->first_global; while (last_global) { if (global_name.equal(last_global->variable.name)) { report_error(); } if (!last_global->next) { break; } last_global = last_global->next; } Type* type_it = module->first_type; Type* forward_declaration = 0; while (type_it) { if (global_name.equal(type_it->name)) { if (type_it->id == TypeId::forward_declaration) { forward_declaration = type_it; break; } else { report_error(); } } if (!type_it->next) { break; } type_it = type_it->next; } skip_space(module); Type* global_type = 0; if (consume_character_if_match(module, ':')) { skip_space(module); global_type = parse_type(module); skip_space(module); } expect_character(module, '='); skip_space(module); bool is_global_keyword = false; enum class GlobalKeyword { bits, enumerator, function, macro, structure, typealias, union_type, count, }; auto i = (backing_type(GlobalKeyword))GlobalKeyword::count; if (is_identifier_start(module->content[module->offset])) { auto checkpoint = get_checkpoint(module); auto global_string = parse_identifier(module); skip_space(module); String global_keywords[] = { str("bits"), str("enum"), str("fn"), str("macro"), str("struct"), str("typealias"), str("union"), }; static_assert(array_length(global_keywords) == (u64)GlobalKeyword::count); for (i = 0; i < (backing_type(GlobalKeyword))GlobalKeyword::count; i += 1) { String global_keyword = global_keywords[i]; if (global_string.equal(global_keyword)) { break; } } auto global_keyword = (GlobalKeyword)i; switch (global_keyword) { case GlobalKeyword::bits: { trap_raw(); } break; case GlobalKeyword::enumerator: { trap_raw(); } break; case GlobalKeyword::function: { auto calling_convention = CallingConvention::c; auto function_attributes = FunctionAttributes{}; bool is_variable_arguments = false; if (consume_character_if_match(module, left_bracket)) { while (module->offset < module->content.length) { auto function_identifier = parse_identifier(module); enum class FunctionKeyword { cc, count, }; String function_keywords[] = { str("cc"), }; static_assert(array_length(function_keywords) == (u64)FunctionKeyword::count); backing_type(FunctionKeyword) i; for (i = 0; i < (backing_type(FunctionKeyword))(FunctionKeyword::count); i += 1) { auto function_keyword = function_keywords[i]; if (function_keyword.equal(function_identifier)) { break; } } auto function_keyword = (FunctionKeyword)i; skip_space(module); switch (function_keyword) { case FunctionKeyword::cc: { expect_character(module, left_parenthesis); skip_space(module); auto calling_convention_string = parse_identifier(module); String calling_conventions[] = { str("c"), }; static_assert(array_length(calling_conventions) == (u64)CallingConvention::count); backing_type(CallingConvention) i; for (i = 0; i < (backing_type(CallingConvention))CallingConvention::count; i += 1) { auto calling_convention = calling_conventions[i]; if (calling_convention.equal(calling_convention_string)) { break; } } auto candidate_calling_convention = (CallingConvention)i; if (candidate_calling_convention == CallingConvention::count) { report_error(); } calling_convention = candidate_calling_convention; skip_space(module); expect_character(module, right_parenthesis); } break; case FunctionKeyword::count: { report_error(); } break; } skip_space(module); if (consume_character_if_match(module, right_bracket)) { break; } else { report_error(); } } } skip_space(module); expect_character(module, left_parenthesis); Type* semantic_argument_type_buffer[64]; String semantic_argument_name_buffer[64]; u64 semantic_argument_count = 0; while (module->offset < module->content.length) { skip_space(module); if (consume_character_if_match(module, '.')) { expect_character(module, '.'); expect_character(module, '.'); skip_space(module); expect_character(module, right_parenthesis); is_variable_arguments = true; break; } if (consume_character_if_match(module, right_parenthesis)) { break; } auto argument_name = parse_identifier(module); semantic_argument_name_buffer[semantic_argument_count] = argument_name; skip_space(module); expect_character(module, ':'); skip_space(module); auto argument_type = parse_type(module); semantic_argument_type_buffer[semantic_argument_count] = argument_type; skip_space(module); unused(consume_character_if_match(module, ',')); semantic_argument_count += 1; } skip_space(module); auto return_type = parse_type(module); skip_space(module); Slice argument_types = {}; if (semantic_argument_count != 0) { trap_raw(); } auto is_declaration = consume_character_if_match(module, ';'); auto function_type = type_allocate_init(module, { .function = { .semantic_return_type = return_type, .semantic_argument_types = argument_types, .calling_convention = calling_convention, .is_variable_arguments = is_variable_arguments, }, .id = TypeId::function, .name = str(""), }); auto storage = new_value(module); *storage = { .type = get_pointer_type(module, function_type), .id = ValueId::external_function, // TODO? .kind = ValueKind::left, }; auto global = new_global(module); *global = { .variable = { .storage = storage, .initial_value = 0, .type = function_type, .scope = &module->scope, .name = global_name, .line = global_line, .column = global_column, }, .linkage = (is_export | is_extern) ? Linkage::external : Linkage::internal, }; module->current_function = global; if (!is_declaration) { storage->function = { .arguments = {}, .scope = { .parent = &module->scope, .line = global_line, .column = global_column, .kind = ScopeKind::function, }, .block = 0, .attributes = function_attributes, }; storage->id = ValueId::function; if (semantic_argument_count != 0) { trap_raw(); } storage->function.block = parse_block(module, &storage->function.scope); } } break; case GlobalKeyword::macro: { trap_raw(); } break; case GlobalKeyword::structure: { trap_raw(); } break; case GlobalKeyword::typealias: { trap_raw(); } break; case GlobalKeyword::union_type: { trap_raw(); } break; case GlobalKeyword::count: { set_checkpoint(module, checkpoint); } break; } } if (i == (backing_type(GlobalKeyword))GlobalKeyword::count) { trap_raw(); } } }