From 0166e74e6aac02f2cf8b01a917c707dd735dddfb Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Thu, 1 May 2025 08:33:46 -0600 Subject: [PATCH] Implement parser --- CMakeLists.txt | 23 + build.sh | 63 +- generate.sh | 7 + old_build.sh | 61 + src/compiler.cpp | 423 +++++ src/compiler.h | 996 +++++++++++ src/emitter.cpp | 5 + src/entry_point.cpp | 13 + src/entry_point.h | 2 + src/lib.h | 681 ++++++++ src/parser.cpp | 3817 +++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 6031 insertions(+), 60 deletions(-) create mode 100644 CMakeLists.txt create mode 100755 generate.sh create mode 100755 old_build.sh create mode 100644 src/compiler.cpp create mode 100644 src/compiler.h create mode 100644 src/emitter.cpp create mode 100644 src/entry_point.cpp create mode 100644 src/entry_point.h create mode 100644 src/lib.h create mode 100644 src/parser.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..0bc13c3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.15) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) +endif() +# Set C++ standard +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +project(bb) + +add_executable(bb + src/compiler.cpp + src/entry_point.cpp + src/parser.cpp + src/emitter.cpp +) + +target_include_directories(bb PUBLIC src) +target_compile_definitions(bb PUBLIC + $<$:BB_DEBUG=1> + $<$>:BB_DEBUG=0> +) +#target_compile_options(bb PRIVATE -fsanitize=address) +#target_link_options(bb PRIVATE -fsanitize=address) diff --git a/build.sh b/build.sh index 09b3c32..eead4ed 100755 --- a/build.sh +++ b/build.sh @@ -1,61 +1,4 @@ -#!/usr/bin/env bash set -eu - -MY_CWD=$PWD - -if [[ -z "${BB_CI-}" ]]; then - BB_CI=0 -fi - -if [[ -z "${BB_BUILD_TYPE-}" ]]; then - BB_BUILD_TYPE=debug -fi - -if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then - BB_ERROR_ON_WARNINGS=$BB_CI -fi - -if [[ -z "${BB_ERROR_LIMIT-}" ]]; then - BB_ERROR_LIMIT=$((1 - BB_CI)) -fi - -BB_COMPILE_SHADERS=0 - -BUILD_DIR=cache -LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets -mkdir -p $BUILD_DIR - -if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then - cd $BUILD_DIR - wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s - cd $MY_CWD -fi - -if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then - glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet - glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet -fi - -BUILD_OUT=$BUILD_DIR/build -C_COMPILER=clang -TIME_TRACE=1 -BB_TIMETRACE=0 -GCC_ARGS= -CLANG_ARGS= -TIME_TRACE_ARG= - -if [[ $C_COMPILER == "clang"* ]]; then - CLANG_ARGS=-ferror-limit=1 - if [[ "$TIME_TRACE" == "1" ]]; then - CLANG_ARGS="$CLANG_ARGS -ftime-trace" - BB_TIMETRACE=1 - else - CLANG_ARGS="$CLANG_ARGS -ftime-trace" - fi -elif [[ $C_COMPILER == "gcc"* ]]; then - GCC_ARGS=-fmax-errors=1 -fi - -$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT -$BUILD_OUT $@ -exit 0 +cd build +ninja --quiet +cd .. diff --git a/generate.sh b/generate.sh new file mode 100755 index 0000000..6c7264a --- /dev/null +++ b/generate.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -eu +rm -rf build +mkdir build +cd build +cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +cd .. diff --git a/old_build.sh b/old_build.sh new file mode 100755 index 0000000..09b3c32 --- /dev/null +++ b/old_build.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -eu + +MY_CWD=$PWD + +if [[ -z "${BB_CI-}" ]]; then + BB_CI=0 +fi + +if [[ -z "${BB_BUILD_TYPE-}" ]]; then + BB_BUILD_TYPE=debug +fi + +if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then + BB_ERROR_ON_WARNINGS=$BB_CI +fi + +if [[ -z "${BB_ERROR_LIMIT-}" ]]; then + BB_ERROR_LIMIT=$((1 - BB_CI)) +fi + +BB_COMPILE_SHADERS=0 + +BUILD_DIR=cache +LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets +mkdir -p $BUILD_DIR + +if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then + cd $BUILD_DIR + wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s + cd $MY_CWD +fi + +if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then + glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet + glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet +fi + +BUILD_OUT=$BUILD_DIR/build +C_COMPILER=clang +TIME_TRACE=1 +BB_TIMETRACE=0 +GCC_ARGS= +CLANG_ARGS= +TIME_TRACE_ARG= + +if [[ $C_COMPILER == "clang"* ]]; then + CLANG_ARGS=-ferror-limit=1 + if [[ "$TIME_TRACE" == "1" ]]; then + CLANG_ARGS="$CLANG_ARGS -ftime-trace" + BB_TIMETRACE=1 + else + CLANG_ARGS="$CLANG_ARGS -ftime-trace" + fi +elif [[ $C_COMPILER == "gcc"* ]]; then + GCC_ARGS=-fmax-errors=1 +fi + +$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT +$BUILD_OUT $@ +exit 0 diff --git a/src/compiler.cpp b/src/compiler.cpp new file mode 100644 index 0000000..a1728d4 --- /dev/null +++ b/src/compiler.cpp @@ -0,0 +1,423 @@ +#include + +fn void compile(Arena* arena, Options options) +{ + auto base_allocation_type_count = i128_offset + // 64 * 2 for basic integer types + 2 + // u128, s128 + 2; // void, noreturn + auto base_type_allocation = arena_allocate(arena, base_allocation_type_count); + + auto* type_it = base_type_allocation.pointer; + + bool signs[] = {false, true}; + Type* previous = 0; + + for (bool sign: signs) + { + for (u32 bit_index = 0; bit_index < 64; bit_index += 1) + { + auto bit_count = bit_index + 1; + auto first_digit = (u8)(bit_count < 10 ? bit_count % 10 + '0' : bit_count / 10 + '0'); + auto second_digit = (u8)(bit_count > 9 ? bit_count % 10 + '0' : 0); + u8 name_buffer[] = { u8(sign ? 's' : 'u'), first_digit, second_digit }; + u64 name_length = 2 + (bit_count > 9); + auto name_stack = String{name_buffer, name_length}; + + auto name = arena_duplicate_string(arena, name_stack); + + *type_it = { + .integer = { + .bit_count = bit_count, + .is_signed = sign, + }, + .id = TypeId::integer, + .name = name, + }; + if (previous) previous->next = type_it; + previous = type_it; + type_it += 1; + } + } + + for (bool sign: signs) + { + auto name = sign ? str("s128") : str("u128"); + *type_it = { + .integer = { + .bit_count = 128, + .is_signed = sign, + }, + .id = TypeId::integer, + .name = name, + .next = previous, + }; + if (previous) previous->next = type_it; + previous = type_it; + type_it += 1; + } + + auto void_type = type_it; + type_it += 1; + auto noreturn_type = type_it; + type_it += 1; + assert(type_it - base_type_allocation.pointer == base_allocation_type_count); + + previous->next = void_type; + *void_type = { + .id = TypeId::void_type, + .name = str("void"), + .next = noreturn_type, + }; + *noreturn_type = { + .id = TypeId::noreturn, + .name = str("noreturn"), + }; + + auto module = Module{ + .arena = arena, + .content = options.content, + .first_type = base_type_allocation.pointer, + .last_type = noreturn_type, + .scope = { + .kind = ScopeKind::global, + }, + .name = options.name, + .path = options.path, + .executable = options.executable, + .objects = options.objects, + .target = options.target, + .build_mode = options.build_mode, + .has_debug_info = options.has_debug_info, + .silent = options.silent, + }; + module.void_value = new_value(&module); + *module.void_value = { + .type = void_type, + .id = ValueId::infer_or_ignore, + }; + + parse(&module); + emit(&module); +} + +fn void compile_file(Arena* arena, Compile options) +{ + auto relative_file_path = options.relative_file_path; + if (relative_file_path.length < 5) + { + fail(); + } + + auto extension_start = string_last_character(relative_file_path, '.'); + if (extension_start == string_no_match) + { + fail(); + } + + if (!relative_file_path(extension_start).equal(str(".bbb"))) + { + fail(); + } + + auto separator_index = string_last_character(relative_file_path, '/'); + separator_index = separator_index == string_no_match ? 0 : separator_index; + + auto base_start = separator_index + (separator_index != 0 || relative_file_path[separator_index] == '/'); + auto base_name = relative_file_path(base_start, extension_start); + + auto is_compiler = relative_file_path.equal(str("src/compiler.bbb")); + + String output_path_dir_parts[] = { + str(base_cache_dir), + is_compiler ? str("/compiler") : str("/"), + build_mode_to_string(options.build_mode), + str("_"), + options.has_debug_info ? str("di") : str("nodi"), + }; + auto output_path_dir = arena_join_string(arena, array_to_slice(output_path_dir_parts)); + + make_directory(base_cache_dir); + + if (is_compiler) + { + make_directory(base_cache_dir "/compiler"); + } + + make_directory(cstr(output_path_dir)); + + String output_path_base_parts[] = { + output_path_dir, + str("/"), + base_name, + }; + auto output_path_base = arena_join_string(arena, array_to_slice(output_path_base_parts)); + String output_object_path_parts[] = { + output_path_base, + str(".o"), + }; + auto output_object_path = arena_join_string(arena, array_to_slice(output_object_path_parts)); + auto output_executable_path = output_path_base; + + auto file_content = file_read(arena, relative_file_path); + auto file_path = path_absolute(arena, relative_file_path); + auto c_abi_object_path = str("build/c_abi.o"); + + String objects[] = { + c_abi_object_path, + output_object_path, + }; + Slice object_slice = array_to_slice(objects); + object_slice = object_slice(!base_name.equal(str("c_abi"))); + + compile(arena, { + .content = file_content, + .path = file_path, + .executable = output_executable_path, + .name = base_name, + .objects = object_slice, + .target = { + .cpu = CPUArchitecture::x86_64, + .os = OperatingSystem::linux_, + }, + .build_mode = options.build_mode, + .has_debug_info = options.has_debug_info, + .silent = options.silent, + }); +} + +global_variable String names[] = { + str("minimal"), + str("comments"), + str("constant_add"), + str("constant_and"), + str("constant_div"), + str("constant_mul"), + str("constant_rem"), + str("constant_or"), + str("constant_sub"), + str("constant_xor"), + str("constant_shift_left"), + str("constant_shift_right"), + str("minimal_stack"), + str("minimal_stack_arithmetic"), + str("minimal_stack_arithmetic2"), + str("minimal_stack_arithmetic3"), + str("stack_negation"), + str("stack_add"), + str("stack_sub"), + str("extend"), + str("integer_max"), + str("integer_hex"), + str("basic_pointer"), + str("basic_call"), + str("basic_branch"), + str("basic_array"), + str("basic_enum"), + str("basic_slice"), + str("basic_string"), + str("basic_varargs"), + str("basic_while"), + str("pointer"), + str("pointer_cast"), + str("u1_return"), + str("local_type_inference"), + str("global"), + str("function_pointer"), + str("extern"), + str("byte_size"), + str("argv"), + str("assignment_operators"), + str("not_pointer"), + str("bits"), + str("bits_no_backing_type"), + str("bits_return_u1"), + str("bits_zero"), + str("comparison"), + str("global_struct"), + str("if_no_else"), + str("if_no_else_void"), + str("indirect"), + str("indirect_struct"), + str("indirect_varargs"), + str("ret_c_bool"), + str("return_type_builtin"), + str("return_u64_u64"), + str("select"), + str("slice"), + str("small_struct_ints"), + str("struct_assignment"), + str("struct"), + str("struct_u64_u64"), + str("struct_varargs"), + str("struct_zero"), + str("unreachable"), + str("varargs"), + str("c_abi0"), + str("c_abi1"), + str("c_med_struct_ints"), + str("c_ret_struct_array"), + str("c_split_struct_ints"), + str("c_string_to_slice"), + str("c_struct_with_array"), + str("c_function_pointer"), + str("c_abi"), + str("string_to_enum"), + str("abi_enum_bool"), + str("empty_if"), + str("else_if"), + str("else_if_complicated"), + str("shortcircuiting_if"), + str("field_access_left_assign"), + str("for_each"), + str("pointer_decay"), + str("enum_name"), + str("slice_of_slices"), + str("type_alias"), + str("integer_formats"), + str("return_small_struct"), + str("for_each_int"), + str("bool_array"), + str("basic_union"), + str("break_continue"), + str("constant_global_reference"), + str("self_referential_struct"), + str("forward_declared_type"), + str("basic_macro"), + str("generic_macro"), + str("generic_pointer_macro"), + str("noreturn_macro"), + str("generic_pointer_array"), +}; + +void entry_point(Slice arguments, Slice environment) +{ + Arena* arena = arena_initialize_default(8 * mb); + + if (arguments.length < 2) + { + fail_with_message(str("error: Not enough arguments\n")); + } + + String command_string = c_string_to_slice(arguments[1]); + String command_strings[] = { + str("compile"), + str("test"), + }; + static_assert(array_length(command_strings) == (u64)Command::count); + + backing_type(Command) i; + for (i = 0; i < (backing_type(Command))Command::count; i += 1) + { + String candidate = command_strings[i]; + if (candidate.equal(command_string)) + { + break; + } + } + + auto command = (Command)i; + + switch (command) + { + case Command::compile: + { + if (arguments.length < 3) + { + fail_with_message(str("Not enough arguments for command 'compile'\n")); + } + + auto build_mode = BuildMode::debug_none; + auto has_debug_info = true; + + if (arguments.length >= 4) + { + auto build_mode_string = c_string_to_slice(arguments[3]); + String build_mode_strings[] = { + str("debug_none"), + str("debug"), + str("soft_optimize"), + str("optimize_for_speed"), + str("optimize_for_size"), + str("aggressively_optimize_for_speed"), + str("aggressively_optimize_for_size"), + }; + + backing_type(BuildMode) i; + for (i = 0; i < (backing_type(BuildMode))BuildMode::count; i += 1) + { + String candidate = build_mode_strings[i]; + if (build_mode_string.equal(candidate)) + { + break; + } + } + + build_mode = (BuildMode)i; + if (build_mode == BuildMode::count) + { + fail_with_message(str("Invalid build mode\n")); + } + } + + if (arguments.length >= 5) + { + auto has_debug_info_string = c_string_to_slice(arguments[3]); + if (has_debug_info_string.equal(str("true"))) + { + has_debug_info = true; + } + else if (has_debug_info_string.equal(str("false"))) + { + has_debug_info = false; + } + else + { + fail_with_message(str("Wrong value for has_debug_info\n")); + } + } + + auto relative_file_path = c_string_to_slice(arguments[2]); + + compile_file(arena, { + .relative_file_path = relative_file_path, + .build_mode = build_mode, + .has_debug_info = has_debug_info, + .silent = false, + }); + } break; + case Command::test: + { + // TODO: provide more arguments + if (arguments.length != 2) + { + fail_with_message(str("error: 'test' command takes no arguments")); + } + + // TODO: introduce build mode, debug info switch, etc + + for (auto name: names) + { + auto position = arena->position; + + String relative_file_path_parts[] = { str("tests/"), name, str(".bbb") }; + auto relative_file_path = arena_join_string(arena, array_to_slice(relative_file_path_parts)); + auto build_mode = BuildMode::debug_none; + bool has_debug_info = true; + compile_file(arena, { + .relative_file_path = relative_file_path, + .build_mode = build_mode, + .has_debug_info = has_debug_info, + .silent = false, + }); + + // TODO: introduce test + + arena_restore(arena, position); + } + } break; + case Command::count: + { + fail_with_message(str("error: Invalid command\n")); + } break; + } +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..e85608e --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,996 @@ +#pragma once + +#include + +#define report_error() trap_raw() + +enum class Command +{ + compile, + test, + count, +}; + +enum class BuildMode +{ + debug_none, + debug, + soft_optimize, + optimize_for_speed, + optimize_for_size, + aggressively_optimize_for_speed, + aggressively_optimize_for_size, + count, +}; + +fn String build_mode_to_string(BuildMode build_mode) +{ + switch (build_mode) + { + case_to_name(BuildMode, debug_none); + case_to_name(BuildMode, debug); + case_to_name(BuildMode, soft_optimize); + case_to_name(BuildMode, optimize_for_speed); + case_to_name(BuildMode, optimize_for_size); + case_to_name(BuildMode, aggressively_optimize_for_speed); + case_to_name(BuildMode, aggressively_optimize_for_size); + case BuildMode::count: unreachable(); + } +} + +enum class ValueKind +{ + right, + left, +}; + +enum class CPUArchitecture +{ + x86_64, +}; + +enum class OperatingSystem +{ + linux_, +}; + +struct Type; +struct Value; +struct Local; +struct Global; +struct Block; +struct Statement; +struct Variable; +struct Argument; +struct Scope; +struct MacroDeclaration; + +struct Target +{ + CPUArchitecture cpu; + OperatingSystem os; +}; + +struct Compile +{ + String relative_file_path; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +#define base_cache_dir "bb-cache" + +enum class CallingConvention +{ + c, + count, +}; + +enum class InlineBehavior +{ + normal, + always_inline, + no_inline, + inline_hint, +}; + +struct FunctionAttributes +{ + InlineBehavior inline_behavior; + bool naked; +}; + +enum class TypeId +{ + void_type, + noreturn, + forward_declaration, + integer, + function, + pointer, + array, + enumerator, + structure, + bits, + alias, + union_type, + unresolved, +}; + +struct TypeInteger +{ + u32 bit_count; + bool is_signed; +}; + +struct TypeFunction +{ + Type* semantic_return_type; + Slice semantic_argument_types; + CallingConvention calling_convention; + bool is_variable_arguments; +}; + +struct TypePointer +{ + Type* element_type; + Type* next; +}; + +struct TypeArray +{ + u64 element_count; + Type* element_type; +}; + +struct UnresolvedEnumField +{ + String name; + Value* value; +}; + +struct EnumField +{ + String name; + u64 value; +}; + +struct UnresolvedTypeEnum +{ + Slice fields; + Type* backing_type; + u32 line; + bool implicit_backing_type; + Type* resolved_type; +}; + +struct TypeEnum +{ + Slice fields; + Type* backing_type; + u32 line; +}; + +struct Field +{ + String name; + Type* type; + u64 offset; + u32 line; +}; + +struct TypeStruct +{ + Slice fields; + u64 byte_size; + u32 byte_alignment; + u32 line; + bool is_slice; + Type* next; +}; + +struct TypeBits +{ + Slice fields; + Type* backing_type; + u32 line; + bool is_implicit_backing_type; +}; + +struct TypeAlias +{ + Type* type; + Scope* scope; + u32 line; +}; + +struct UnionField +{ + Type* type; + String name; + u32 line; +}; + +struct TypeUnion +{ + Slice fields; + u64 byte_size; + u32 byte_alignment; + u32 line; + u32 biggest_field; +}; + +struct Type +{ + union + { + TypeInteger integer; + TypeFunction function; + TypePointer pointer; + TypeArray array; + TypeEnum enumerator; + TypeStruct structure; + TypeBits bits; + TypeAlias alias; + TypeUnion union_type; + }; + TypeId id; + String name; + Type* next; +}; + +fn u32 align_bit_count(u32 bit_count) +{ + auto aligned_bit_count = MAX(8, next_power_of_two(bit_count)); + assert(aligned_bit_count % 8 == 0); + return aligned_bit_count; +} + +fn u32 aligned_byte_count_from_bit_count(u32 bit_count) +{ + auto aligned_bit_count = align_bit_count(bit_count); + return aligned_bit_count / 8; +} + +fn u64 get_byte_size(Type* type) +{ + switch (type->id) + { + case TypeId::integer: + { + auto byte_count = aligned_byte_count_from_bit_count(type->integer.bit_count); + assert(byte_count == 1 || byte_count == 2 || byte_count == 4 || byte_count == 8 || byte_count == 16); + return byte_count; + } break; + case TypeId::array: + { + auto element_type = type->array.element_type; + auto element_size = get_byte_size(element_type); + auto element_count = type->array.element_count; + auto result = element_size * element_count; + return result; + } break; + case TypeId::structure: + { + auto result = type->structure.byte_size; + return result; + } break; + case TypeId::enumerator: + { + auto result = get_byte_size(type->enumerator.backing_type); + return result; + } break; + default: trap_raw(); + } +} + +fn u32 get_byte_alignment(Type* type) +{ + switch (type->id) + { + case TypeId::integer: + { + auto aligned_byte_count = aligned_byte_count_from_bit_count(type->integer.bit_count); + assert(aligned_byte_count == 1 || aligned_byte_count == 2 || aligned_byte_count == 4 || aligned_byte_count == 8 || aligned_byte_count == 16); + return aligned_byte_count; + } break; + case TypeId::array: + { + auto element_type = type->array.element_type; + auto result = get_byte_alignment(element_type); + return result; + } break; + case TypeId::structure: + { + auto result = type->structure.byte_alignment; + return result; + } break; + case TypeId::enumerator: + { + auto result = get_byte_alignment(type->enumerator.backing_type); + return result; + } break; + default: trap_raw(); + } +} + +fn u64 get_bit_size(Type* type) +{ + switch (type->id) + { + case TypeId::integer: return type->integer.bit_count; + default: trap_raw(); + } +} + +enum class ScopeKind +{ + global, + function, + local, + for_each, + macro_declaration, + macro_instantiation, +}; + +struct Scope +{ + Scope* parent; + u32 line; + u32 column; + ScopeKind kind; +}; + +enum class StatementId +{ + local, + expression, + return_st, + assignment, + if_st, + block, + while_st, + switch_st, + for_each, + break_st, + continue_st, +}; + +enum class StatementAssignmentId +{ + assign, + assign_add, + assign_sub, + assign_mul, + assign_div, + assign_rem, + assign_shift_left, + assign_shift_right, + assign_and, + assign_or, + assign_xor, +}; + +struct StatementAssignment +{ + Value* left; + Value* right; + StatementAssignmentId id; +}; + +struct StatementIf +{ + Value* condition; + Statement* if_statement; + Statement* else_statement; +}; + +struct StatementWhile +{ + Value* condition; + Block* block; +}; + +struct StatementSwitchClause +{ + Slice values; + Block* block; +}; + +struct StatementSwitch +{ + Value* discriminant; + Slice clauses; +}; + +enum class ForEachKind +{ + slice, + range, +}; + +struct StatementForEach +{ + Local* first_local; + Local* last_local; + Slice left_values; + Slice right_values; + Statement* predicate; + Scope scope; + ForEachKind kind; +}; + +struct Statement +{ + union + { + Local* local; + Value* expression; + Value* return_st; + StatementAssignment assignment; + StatementIf if_st; + Block* block; + StatementWhile while_st; + StatementSwitch switch_st; + StatementForEach for_each; + }; + Statement* next; + StatementId id; + u32 line; + u32 column; +}; + +struct Block +{ + Local* first_local; + Local* last_local; + Statement* first_statement; + Scope scope; +}; + +enum class ValueId +{ + infer_or_ignore, + external_function, + function, + constant_integer, + unary, + binary, + unary_type, + variable_reference, + macro_reference, + macro_instantiation, + dereference, + call, + global, + array_initialization, + array_expression, + slice_expression, + enum_literal, + trap, + field_access, + string_literal, + va_start, + va_arg, + aggregate_initialization, + undefined, + unreachable, + zero, + select, + string_to_enum, +}; + +struct ValueConstantInteger +{ + u64 value; + bool is_signed; +}; + +struct ValueFunction +{ + Slice arguments; + Scope scope; + Block* block; + FunctionAttributes attributes; +}; + +enum class UnaryId +{ + minus, + plus, + ampersand, + exclamation, + tilde, + enum_name, + extend, + truncate, + pointer_cast, + int_from_enum, + int_from_pointer, + va_end, + bitwise_not, +}; + +struct ValueUnary +{ + Value* value; + UnaryId id; +}; + +enum class UnaryTypeId +{ + byte_size, + integer_max, +}; + +struct ValueUnaryType +{ + Type* type; + UnaryTypeId id; +}; + +enum class BinaryId +{ + add, + sub, + mul, + div, + rem, + bitwise_and, + bitwise_or, + bitwise_xor, + shift_left, + shift_right, + compare_equal, + compare_not_equal, + compare_greater, + compare_less, + compare_greater_equal, + compare_less_equal, + logical_and, + logical_or, + logical_and_shortcircuit, + logical_or_shortcircuit, +}; + +struct ValueBinary +{ + Value* left; + Value* right; + BinaryId id; +}; + +struct ValueCall +{ + Value* callable; + Slice arguments; +}; + +struct ValueArrayInitialization +{ + Slice values; + bool is_constant; +}; + +struct ValueArrayExpression +{ + Value* array_like; + Value* index; +}; + +struct ValueFieldAccess +{ + Value* aggregate; + String field_name; +}; + +struct ValueSliceExpression +{ + Value* array_like; + Value* start; + Value* end; +}; + +struct ValueVaArg +{ + Value* va_list; + Type* type; +}; + +struct ValueAggregateInitialization +{ + Slice names; + Slice values; + bool is_constant; + bool zero; +}; + +struct ValueSelect +{ + Value* condition; + Value* true_value; + Value* false_value; +}; + +struct ValueStringToEnum +{ + Type* type; + Value* string; +}; + +enum class ConstantArgumentId +{ + value, + type, +}; + +struct ConstantArgument +{ + String name; + union + { + Type* type; + Value* value; + }; + ConstantArgumentId id; +}; + +struct MacroDeclaration +{ + Slice arguments; + Slice constant_arguments; + Type* return_type; + Block* block; + String name; + Scope scope; + MacroDeclaration* next; + + bool is_generic() + { + return constant_arguments.length != 0; + } +}; + +struct MacroInstantiation +{ + MacroDeclaration* declaration; + Global* instantiation_function; + Slice declaration_arguments; + Slice instantiation_arguments; + Slice constant_arguments; + Type* return_type; + Block* block; + Scope scope; + u32 line; + u32 column; +}; + +struct Value +{ + union + { + ValueConstantInteger constant_integer; + ValueFunction function; + ValueUnary unary; + ValueBinary binary; + Variable* variable_reference; + ValueUnaryType unary_type; + Value* dereference; + ValueCall call; + ValueArrayInitialization array_initialization; + ValueArrayExpression array_expression; + String enum_literal; + ValueFieldAccess field_access; + ValueSliceExpression slice_expression; + String string_literal; + ValueVaArg va_arg; + ValueAggregateInitialization aggregate_initialization; + ValueSelect select; + ValueStringToEnum string_to_enum; + MacroDeclaration* macro_reference; + MacroInstantiation macro_instantiation; + }; + Type* type; + ValueId id; + ValueKind kind; + + bool is_constant() + { + switch (id) + { + case ValueId::constant_integer: + return true; + default: trap_raw(); + } + } +}; + +struct Variable +{ + Value* storage; + Value* initial_value; + Type* type; + Scope* scope; + String name; + u32 line; + u32 column; +}; + +enum class Linkage +{ + internal, + external, +}; + +struct Global +{ + Variable variable; + Linkage linkage; + Global* next; +}; + +struct Local +{ + Variable variable; + Local* next; +}; + +struct Argument +{ + Variable variable; + u32 index; +}; + +struct Module +{ + Arena* arena; + String content; + u64 offset; + u64 line_offset; + u64 line_character_offset; + + Type* first_pointer_type; + Type* first_slice_type; + Type* first_pair_struct_type; + Type* first_array_type; + + Type* first_type; + Type* last_type; + Type* va_list_type; + + Value* void_value; + Global* first_global; + Global* last_global; + Global* current_function; + MacroDeclaration* first_macro_declaration; + MacroDeclaration* last_macro_declaration; + MacroDeclaration* current_macro_declaration; + MacroInstantiation* current_macro_instantiation; + + Scope scope; + + String name; + String path; + String executable; + Sliceobjects; + + Target target; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +constexpr u64 i128_offset = 64 * 2; +constexpr u64 void_offset = i128_offset + 2; + +fn Type* integer_type(Module* module, TypeInteger integer) +{ + assert(integer.bit_count); + assert(integer.bit_count <= 64 || integer.bit_count == 128); + auto index = integer.bit_count == 128 ? (i128_offset + integer.is_signed) : (integer.bit_count - 1 + (64 * integer.is_signed)); + auto* result_type = module->first_type + index; + assert(result_type->id == TypeId::integer); + assert(result_type->integer.bit_count == integer.bit_count); + assert(result_type->integer.is_signed == integer.is_signed); + return result_type; +} + +fn Type* void_type(Module* module) +{ + return module->first_type + void_offset; +} + +fn Type* noreturn_type(Module* module) +{ + return void_type(module) + 1; +} + +struct Options +{ + String content; + String path; + String executable; + String name; + Slice objects; + Target target; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +fn Type* type_allocate_init(Module* module, Type type) +{ + auto* result = &arena_allocate(module->arena, 1)[0]; + *result = type; + + if (module->last_type) + { + module->last_type->next = result; + module->last_type = result; + } + else + { + assert(!module->first_type); + module->first_type = result; + module->last_type = result; + } + + return result; +} + +fn Value* new_value(Module* module) +{ + auto* result = &arena_allocate(module->arena, 1)[0]; + return result; +} + +fn Slice new_value_array(Module* module, u64 count) +{ + auto result = arena_allocate(module->arena, count); + return result; +} + +fn Slice new_type_array(Module* module, u64 count) +{ + auto result = arena_allocate(module->arena, count); + return result; +} + +fn Global* new_global(Module* module) +{ + auto* result = &arena_allocate(module->arena, 1)[0]; + + if (module->last_global) + { + module->last_global->next = result; + module->last_global = result; + } + else + { + assert(!module->first_global); + module->first_global = result; + module->last_global = result; + } + + return result; +} + +fn Type* get_pointer_type(Module* module, Type* element_type) +{ + auto last_pointer_type = module->first_pointer_type; + while (last_pointer_type) + { + assert(last_pointer_type->id == TypeId::pointer); + if (last_pointer_type->pointer.element_type == element_type) + { + return last_pointer_type; + } + + if (!last_pointer_type->pointer.next) + { + break; + } + + last_pointer_type = last_pointer_type->pointer.next; + } + + String name_parts[] = { + str("&"), + element_type->name, + }; + auto result = type_allocate_init(module, { + .pointer = { + .element_type = element_type, + }, + .id = TypeId::pointer, + .name = arena_join_string(module->arena, array_to_slice(name_parts)), + }); + + return result; +} + +fn bool is_slice(Type* type) +{ + switch (type->id) + { + case TypeId::structure: + { + return type->structure.is_slice; + } + default: return false; + } +} + +fn Type* get_slice_type(Module* module, Type* element_type) +{ + Type* slice_type = module->first_slice_type; + + if (slice_type) + { + while (1) + { + assert(is_slice(slice_type)); + assert(slice_type->structure.fields.length == 2); + auto* pointer_type = slice_type->structure.fields[0].type; + assert(pointer_type->id == TypeId::pointer); + auto* candidate_element_type = pointer_type->pointer.element_type; + if (candidate_element_type == element_type) + { + return slice_type; + } + + if (!slice_type->structure.next) + { + break; + } + + slice_type = slice_type->structure.next; + } + } + + Type* last_slice_type = slice_type; + auto fields = arena_allocate(module->arena, 2); + fields[0] = { + .name = str("pointer"), + .type = get_pointer_type(module, element_type), + .offset = 0, + .line = 0, + }; + fields[1] = { + .name = str("length"), + .type = integer_type(module, { .bit_count = 64, .is_signed = false }), + .offset = 8, + .line = 0, + }; + String name_parts[] = { + str("[]"), + element_type->name, + }; + + auto result = type_allocate_init(module, { + .structure = { + .fields = fields, + .byte_size = 16, + .byte_alignment = 8, + .line = 0, + .is_slice = true, + }, + .id = TypeId::structure, + .name = arena_join_string(module->arena, array_to_slice(name_parts)), + }); + + if (last_slice_type) + { + last_slice_type->next = result; + } + else + { + module->first_slice_type = result; + } + + return result; +} + +void parse(Module* module); +void emit(Module* module); diff --git a/src/emitter.cpp b/src/emitter.cpp new file mode 100644 index 0000000..105e6d8 --- /dev/null +++ b/src/emitter.cpp @@ -0,0 +1,5 @@ +#include +void emit(Module* module) +{ + +} diff --git a/src/entry_point.cpp b/src/entry_point.cpp new file mode 100644 index 0000000..8644096 --- /dev/null +++ b/src/entry_point.cpp @@ -0,0 +1,13 @@ +#include +void entry_point(Slice arguments, Slice environment); +int main(int argc, const char* argv[], const char* envp[]) +{ + auto* envp_end = envp; + while (*envp_end) + { + envp_end += 1; + } + + entry_point({argv, (u64)argc}, {envp, (u64)(envp_end - envp)}); + return 0; +} diff --git a/src/entry_point.h b/src/entry_point.h new file mode 100644 index 0000000..8ef2889 --- /dev/null +++ b/src/entry_point.h @@ -0,0 +1,2 @@ +#include + diff --git a/src/lib.h b/src/lib.h new file mode 100644 index 0000000..f8a7c5b --- /dev/null +++ b/src/lib.h @@ -0,0 +1,681 @@ +#pragma once + +#define global_variable static + +#define fn static +#define unused(x) (void)(x) +#define breakpoint() __builtin_debugtrap() +#define string_literal_length(s) (sizeof(s) - 1) +#define string_literal(s) ((String){ .pointer = (u8*)(s), .length = string_literal_length(s), }) +#define str(x) string_literal(x) +#define offsetof(S, f) __builtin_offsetof(S, f) + +#define array_length(arr) sizeof(arr) / sizeof((arr)[0]) +#define array_to_slice(arr) { .pointer = (arr), .length = array_length(arr) } +#define array_to_bytes(arr) { .pointer = (u8*)(arr), .length = sizeof(arr) } +#define backing_type(E) __underlying_type(E) + +#define unreachable_raw() __builtin_unreachable() +#define trap_raw() __builtin_trap() +#if BB_DEBUG +#define unreachable() trap_raw() +#else +#define unreachable() unreachable_raw() +#endif +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#define expect(x, b) __builtin_expect(!!(x), b) +#define likely(x) expect(x, 1) +#define unlikely(x) expect(x, 0) + +#define assert(x) (unlikely(!(x)) ? unreachable() : unused(0)) + +#define clz(x) __builtin_clzg(x) +#define ctz(x) __builtin_ctzg(x) + +#define case_to_name(E,n) case E::n: return str(#n) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; + +typedef signed char s8; +typedef signed short s16; +typedef signed int s32; +typedef signed long s64; + +typedef float f32; +typedef double f64; + +fn u64 align_forward(u64 value, u64 alignment) +{ + assert(alignment != 0); + auto mask = alignment - 1; + auto result = (value + mask) & ~mask; + return result; +} + +constexpr u64 kb = 1024; +constexpr u64 mb = 1024 * 1024; +constexpr u64 gb = 1024 * 1024 * 1024; + +extern "C" [[noreturn]] void exit(s32 status); +extern "C" void *memcpy (void* __restrict destination, const void *__restrict source, u64 byte_count); +extern "C" void *memcmp (const void* a, const void *b, u64 __n); +extern "C" const char* realpath(const char* __restrict path, char* resolved_path); + +struct RawSlice +{ + void* pointer; + u64 length; +}; + +fn bool raw_slice_equal(RawSlice a, RawSlice b, u64 size_of_T) +{ + bool result = a.length == b.length; + if (result) + { + if (a.pointer != b.pointer) + { + result = memcmp(a.pointer, b.pointer, a.length * size_of_T) == 0; + } + } + + return result; +} + +fn RawSlice raw_slice_slice(RawSlice s, u64 start, u64 end, u64 size_of_T) +{ + return {(u8*)s.pointer + (size_of_T * start), end - start}; +} + +template +struct Slice +{ + T* pointer; + u64 length; + + T* begin() + { + return pointer; + } + + T* end() { + return pointer + length; + } + + T& operator[](u64 index) + { + assert(index < length); + return pointer[index]; + } + + bool equal(Slice other) + { + return raw_slice_equal(*(RawSlice*)this, *(RawSlice*)&other, sizeof(T)); + } + + Slice operator()(u64 start, u64 end) + { + return {pointer + start, end - start}; + } + + Slice operator()(u64 start) + { + return {pointer + start, length - start}; + } +}; + +using String = Slice; +fn const char* cstr(String string) +{ + assert(string.pointer[string.length] == 0); + return (const char*) string.pointer; +} + +fn String c_string_to_slice(const char* cstr) +{ + const auto* end = cstr; + while (*end) + { + end += 1; + } + + return { (u8*)cstr, u64(end - cstr) }; +} + +constexpr auto string_no_match = ~(u64)0; + +fn u64 string_first_character(String string, u8 ch) +{ + u64 result = string_no_match; + + for (u64 i = 0; i < string.length; i += 1) + { + if (string[i] == ch) + { + result = i; + break; + } + } + + return result; +} + +fn u64 string_last_character(String string, u8 ch) +{ + u64 result = string_no_match; + u64 i = string.length; + + while (i > 0) + { + i -= 1; + + if (string[i] == ch) + { + result = i; + break; + } + } + + return result; +} + +struct ProtectionFlags +{ + u8 read:1; + u8 write:1; + u8 execute:1; +}; + +struct MapFlags +{ + u8 priv:1; + u8 anonymous:1; + u8 no_reserve:1; + u8 populate:1; +}; + +struct PROT +{ + u32 read:1; + u32 write:1; + u32 execute:1; + u32 sem:1; + u32 _:28; +}; +static_assert(sizeof(PROT) == sizeof(u32)); + +struct MAP +{ + enum class Type : u32 + { + shared = 0, + priv = 1, + shared_validate = 2, + }; + + Type type:4; + u32 fixed:1; + u32 anonymous:1; + u32 bit32:1; + u32 _0: 1; + u32 grows_down:1; + u32 _1: 2; + u32 deny_write:1; + u32 executable:1; + u32 locked:1; + u32 no_reserve:1; + u32 populate:1; + u32 non_block:1; + u32 stack:1; + u32 huge_tlb:1; + u32 sync:1; + u32 fixed_no_replace:1; + u32 _2:5; + u32 uninitialized:1; + u32 _3:5; +}; +static_assert(sizeof(MAP) == sizeof(u32)); + +struct OPEN +{ + enum class AccessMode : u32 + { + read_only = 0, + write_only = 1, + read_write = 2, + }; + + AccessMode access_mode:2; + u32 _0:4; + u32 creat:1; + u32 excl:1; + u32 no_ctty:1; + u32 trunc:1; + u32 append:1; + u32 non_block:1; + u32 d_sync:1; + u32 a_sync:1; + u32 direct:1; + u32 _1:1; + u32 directory:1; + u32 no_follow:1; + u32 no_a_time:1; + u32 cloexec:1; + u32 sync:1; + u32 path:1; + u32 tmp_file:1; + u32 _2:9; +}; +static_assert(sizeof(OPEN) == sizeof(u32)); + +using uid_t = u32; +using gid_t = u32; +using off_t = s64; +using ino_t = u64; +using dev_t = u64; + +struct timespec +{ + s64 seconds; + s64 nanoseconds; +}; + +struct Stat +{ + dev_t dev; + ino_t ino; + u64 nlink; + + u32 mode; + uid_t uid; + gid_t gid; + u32 _0; + dev_t rdev; + off_t size; + s64 blksize; + s64 blocks; + + timespec atim; + timespec mtim; + timespec ctim; + s64 _1[3]; +}; + +extern "C" s32* __errno_location(); +extern "C" void* mmap(void*, u64, PROT, MAP, s32, s64); +extern "C" s32 mprotect(void*, u64, PROT); +extern "C" s64 ptrace(s32, s32, u64, u64); +extern "C" s32 open(const char*, OPEN, ...); +extern "C" s32 close(s32); +extern "C" s32 fstat(s32, Stat*); +extern "C" s64 write(s32, u8*, u64); +extern "C" s64 read(s32, u8*, u64); +extern "C" s32 mkdir(const char*, u64); + +enum class Error : u32 +{ + success = 0, + perm = 1, +}; + +fn Error errno() +{ + return (Error)*__errno_location(); +} + +fn void* os_reserve(void* base, u64 size, ProtectionFlags protection, MapFlags map) +{ + auto protection_flags = PROT + { + .read = protection.read, + .write = protection.write, + .execute = protection.execute, + }; + + auto map_flags = MAP + { + .type = map.priv ? MAP::Type::priv : MAP::Type::shared, + .anonymous = map.anonymous, + .no_reserve = map.no_reserve, + .populate = map.populate, + }; + + auto* address = mmap(base, size, protection_flags, map_flags, -1, 0); + assert((u64)address != ~(u64)0); + + return address; +} + +fn void os_commit(void* address, u64 size, ProtectionFlags protection) +{ + auto protection_flags = PROT + { + .read = protection.read, + .write = protection.write, + .execute = protection.execute, + }; + auto result = mprotect(address, size, protection_flags); + assert(!result); +} + +struct OpenFlags +{ + u32 truncate:1; + u32 execute:1; + u32 write:1; + u32 read:1; + u32 create:1; + u32 directory:1; +}; + +struct Permissions +{ + u32 read:1; + u32 write:1; + u32 execute:1; +}; + +fn s32 os_open(String path, OpenFlags flags, Permissions permissions) +{ + OPEN::AccessMode access_mode; + if (flags.read && flags.write) + { + access_mode = OPEN::AccessMode::read_write; + } + else if (flags.read) + { + access_mode = OPEN::AccessMode::read_only; + } + else if (flags.write) + { + access_mode = OPEN::AccessMode::read_only; + } + else + { + unreachable(); + } + + auto o = OPEN { + .access_mode = access_mode, + .creat = flags.create, + .trunc = flags.truncate, + .directory = flags.directory, + }; + + // TODO: + auto mode = permissions.execute ? 0755 : 0644; + + auto fd = open(cstr(path), o, mode); + return fd; +} + +fn bool is_file_valid(s32 fd) +{ + return fd >= 0; +} + +fn void os_close(s32 fd) +{ + assert(is_file_valid(fd)); + + auto result = close(fd); + assert(result == 0); +} + +fn u64 os_file_size(s32 fd) +{ + Stat stat; + auto result = fstat(fd, &stat); + assert(result == 0); + return (u64)stat.size; +} + +fn u64 os_read_partially(s32 fd, u8* buffer, u64 byte_count) +{ + auto result = read(fd, buffer, byte_count); + assert(result > 0); + return (u64)result; +} + +fn void os_read(s32 fd, String buffer, u64 byte_count) +{ + assert(byte_count <= buffer.length); + u64 it_byte_count = 0; + while (it_byte_count < byte_count) + { + auto read_byte_count = os_read_partially(fd, buffer.pointer + it_byte_count, byte_count - it_byte_count); + it_byte_count += read_byte_count; + } + assert(it_byte_count == byte_count); +} + +fn u64 os_write_partially(s32 fd, u8* buffer, u64 byte_count) +{ + auto result = write(fd, buffer, byte_count); + assert(result > 0); + return (u64)result; +} + +fn void os_write(s32 fd, String content) +{ + u64 it_byte_count = 0; + while (it_byte_count < content.length) + { + auto written_byte_count = os_write_partially(fd, content.pointer + it_byte_count, content.length - it_byte_count); + it_byte_count += written_byte_count; + } + assert(it_byte_count == content.length); +} + +fn String path_absolute_stack(String buffer, String relative_path) +{ + const char* absolute_path = realpath(cstr(relative_path), (char*)buffer.pointer); + if (absolute_path) + { + auto slice = c_string_to_slice(absolute_path); + assert(slice.length < buffer.length); + return slice; + } + return {}; +} + +fn bool os_is_debugger_present() +{ + bool result = false; + if (ptrace(0, 0, 0, 0) == -1) + { + auto errno_error = errno(); + result = errno_error == Error::perm; + } + + return result; +} + +fn void make_directory(const char* path) +{ + auto result = mkdir(path, 0755); + unused(result); +} + +fn void print(String string) +{ + os_write(1, string); +} + +struct ArenaInitialization +{ + u64 reserved_size; + u64 granularity; + u64 initial_size; +}; + +struct Arena +{ + u64 reserved_size; + u64 position; + u64 os_position; + u64 granularity; + u8 reserved[32]; +}; + +constexpr u64 arena_minimum_position = sizeof(Arena); + +fn Arena* arena_initialize(ArenaInitialization i) +{ + ProtectionFlags protection_flags = { + .read = 1, + .write = 1, + }; + MapFlags map_flags = { + .priv = 1, + .anonymous = 1, + .no_reserve = 1, + }; + + auto* arena = (Arena*)os_reserve(0, i.reserved_size, protection_flags, map_flags); + os_commit(arena, i.initial_size, { .read = 1, .write = 1 }); + + *arena = { + .reserved_size = i.reserved_size, + .position = arena_minimum_position, + .os_position = i.initial_size, + .granularity = i.granularity, + }; + + return arena; +} + +fn inline Arena* arena_initialize_default(u64 initial_size) +{ + ArenaInitialization i = { + .reserved_size = 4 * gb, + .granularity = 4 * kb, + .initial_size = initial_size, + }; + return arena_initialize(i); +} + +fn void* arena_allocate_bytes(Arena* arena, u64 size, u64 alignment) +{ + void* result = 0; + + if (size) + { + auto aligned_offset = align_forward(arena->position, alignment); + auto aligned_size_after = aligned_offset + size; + + if (aligned_size_after > arena->os_position) + { + unreachable(); + } + + result = (u8*)arena + aligned_offset; + arena->position = aligned_size_after; + assert(arena->position <= arena->os_position); + } + + return result; +} + +template +fn Slice arena_allocate(Arena* arena, u64 count) +{ + return { (T*)arena_allocate_bytes(arena, sizeof(T) * count, alignof(T)), count }; +} + +fn String arena_join_string(Arena* arena, Slice pieces) +{ + u64 size = 0; + for (auto piece : pieces) + { + size += piece.length; + } + + auto* pointer = (u8*)arena_allocate_bytes(arena, size + 1, 1); + u64 i = 0; + for (auto piece : pieces) + { + memcpy(pointer + i, piece.pointer, piece.length); + i += piece.length; + } + + assert(i == size); + pointer[i] = 0; + + return { pointer, size }; +} + +fn String arena_duplicate_string(Arena* arena, String string) +{ + auto memory = (u8*)arena_allocate_bytes(arena, string.length + 1, 1); + memcpy(memory, string.pointer, string.length); + memory[string.length] = 0; + return { memory, string.length}; +} + +fn void arena_restore(Arena* arena, u64 position) +{ + assert(position <= arena->position); + arena->position = position; +} + +fn void arena_reset(Arena* arena) +{ + arena->position = arena_minimum_position; +} + +fn String path_absolute(Arena* arena, String relative_path) +{ + u8 buffer[4096]; + auto stack = path_absolute_stack(array_to_slice(buffer), relative_path); + auto result = arena_duplicate_string(arena, stack); + return result; +} + +fn String file_read(Arena* arena, String file_path) +{ + auto fd = os_open(file_path, { .read = 1 }, { .read = 1 }); + String result = {}; + + if (is_file_valid(fd)) + { + auto file_size = os_file_size(fd); + result = arena_allocate(arena, file_size); + os_read(fd, result, file_size); + os_close(fd); + } + + return result; +} + +[[noreturn]] fn void fail() +{ + if (os_is_debugger_present()) + { + trap_raw(); + } + exit(1); +} + +[[noreturn]] fn void fail_with_message(String string) +{ + print(string); + fail(); +} + +fn u64 next_power_of_two(u64 n) +{ + n -= 1; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + n += 1; + return n; +} diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..56a1acd --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,3817 @@ +#include + +enum class ValueIntrinsic +{ + align_of, + byte_size, + enum_name, + extend, + integer_max, + int_from_enum, + int_from_pointer, + pointer_cast, + select, + string_to_enum, + trap, + truncate, + va_start, + va_end, + va_arg, + va_copy, + count, +}; + +fn Block* scope_to_block(Scope* scope) +{ + assert(scope->kind == ScopeKind::local); + auto byte_offset = offsetof(Block, scope); + auto result = (Block*)((u8*)scope - byte_offset); + return result; +} + +fn StatementForEach* scope_to_for_each(Scope* scope) +{ + assert(scope->kind == ScopeKind::for_each); + auto byte_offset = offsetof(StatementForEach, scope); + auto result = (StatementForEach*)((u8*)scope - byte_offset); + return result; +} + +fn MacroDeclaration* scope_to_macro_declaration(Scope* scope) +{ + assert(scope->kind == ScopeKind::macro_declaration); + auto byte_offset = offsetof(MacroDeclaration, scope); + auto result = (MacroDeclaration*)((u8*)scope - byte_offset); + return result; +} + +fn ValueFunction* scope_to_function(Scope* scope) +{ + assert(scope->kind == ScopeKind::function); + auto byte_offset = offsetof(ValueFunction, scope); + auto result = (ValueFunction*)((u8*)scope - byte_offset); + return result; +} + +fn Module* scope_to_module(Scope* scope) +{ + assert(scope->kind == ScopeKind::global); + auto byte_offset = offsetof(Module, scope); + auto result = (Module*)((u8*)scope - byte_offset); + return result; +} + +fn Local* new_local(Module* module, Scope* scope) +{ + auto* result = &arena_allocate(module->arena, 1)[0]; + + switch (scope->kind) + { + case ScopeKind::local: + { + auto block = scope_to_block(scope); + if (block->last_local) + { + block->last_local->next = result; + block->last_local = result; + } + else + { + block->first_local = result; + block->last_local = result; + } + } break; + case ScopeKind::for_each: + { + auto for_each = scope_to_for_each(scope); + if (for_each->last_local) + { + for_each->last_local->next = result; + for_each->last_local = result; + } + else + { + for_each->first_local = result; + for_each->last_local = result; + } + } break; + default: report_error(); + } + + return result; +} + +enum class TokenId +{ + none, + comma, + end_of_statement, + integer, + left_brace, + left_bracket, + left_parenthesis, + right_brace, + right_bracket, + right_parenthesis, + + plus, + dash, + asterisk, + forward_slash, + percentage, + caret, + bar, + ampersand, + exclamation, + + assign_plus, + assign_dash, + assign_asterisk, + assign_forward_slash, + assign_percentage, + assign_caret, + assign_bar, + assign_ampersand, + + value_keyword, + operator_keyword, + identifier, + string_literal, + value_intrinsic, + + shift_left, + shift_right, + assign_shift_left, + assign_shift_right, + + compare_less, + compare_less_equal, + compare_greater, + compare_greater_equal, + compare_equal, + compare_not_equal, + + dot, + double_dot, + triple_dot, + + pointer_dereference, + + assign, + tilde, +}; + +enum class TokenIntegerKind +{ + hexadecimal, + decimal, + octal, + binary, + character_literal, +}; + +struct TokenInteger +{ + u64 value; + TokenIntegerKind kind; +}; + +enum class ValueKeyword +{ + undefined, + unreachable, + zero, + count, +}; + +enum class OperatorKeyword +{ + and_op, + or_op, + and_op_shortcircuit, + or_op_shortcircuit, + count, +}; + +struct Token +{ + union + { + TokenInteger integer; + ValueKeyword value_keyword; + String identifier; + OperatorKeyword operator_keyword; + ValueIntrinsic value_intrinsic; + String string_literal; + }; + TokenId id; +}; + +enum class Precedence +{ + none, + assignment, + boolean_or, + boolean_and, + comparison, + bitwise, + shifting, + add_like, + div_like, + prefix, + aggregate_initialization, + postfix, +}; + +struct ValueBuilder +{ + Token token; + Value* left; + Precedence precedence; + ValueKind kind; + bool allow_assignment_operators; + + inline ValueBuilder with_precedence(Precedence precedence) + { + auto result = *this; + result.precedence = precedence; + return result; + } + + inline ValueBuilder with_token(Token token) + { + auto result = *this; + result.token = token; + return result; + } + + inline ValueBuilder with_left(Value* value) + { + auto result = *this; + result.left = value; + return result; + } + + inline ValueBuilder with_kind(ValueKind kind) + { + auto result = *this; + result.kind = kind; + return result; + } +}; + +global_variable constexpr u8 left_bracket = '['; +global_variable constexpr u8 right_bracket = ']'; +global_variable constexpr u8 left_brace = '{'; +global_variable constexpr u8 right_brace = '}'; +global_variable constexpr u8 left_parenthesis = '('; +global_variable constexpr u8 right_parenthesis = ')'; + +fn bool is_space(u8 ch) +{ + return ((ch == ' ') | (ch == '\n')) | ((ch == '\t') | (ch == '\r')); +} + +fn bool is_lower(u8 ch) +{ + return ((ch >= 'a') & (ch <= 'z')); +} + +fn bool is_upper(u8 ch) +{ + return ((ch >= 'A') & (ch <= 'Z')); +} + +fn bool is_decimal(u8 ch) +{ + return ((ch >= '0') & (ch <= '9')); +} + +fn bool is_octal(u8 ch) +{ + return ((ch >= '0') & (ch <= '7')); +} + +fn bool is_binary(u8 ch) +{ + return ((ch == '0') | (ch == '1')); +} + +fn bool is_hexadecimal_alpha_lower(u8 ch) +{ + return ((ch >= 'a') & (ch <= 'f')); +} + +fn bool is_hexadecimal_alpha_upper(u8 ch) +{ + return ((ch >= 'A') & (ch <= 'F')); +} + +fn bool is_hexadecimal_alpha(u8 ch) +{ + return is_hexadecimal_alpha_lower(ch) | is_hexadecimal_alpha_upper(ch); +} + +fn bool is_hexadecimal(u8 ch) +{ + return is_decimal(ch) | is_hexadecimal_alpha(ch); +} + +fn bool is_identifier_start(u8 ch) +{ + return (is_lower(ch) | is_upper(ch)) | (ch == '_'); +} + +fn bool is_identifier(u8 ch) +{ + return is_identifier_start(ch) | is_decimal(ch); +} + +fn u32 get_line(Module* module) +{ + auto line = module->line_offset + 1; + assert(line < ~(u32)0); + return (u32)line; +} + +fn u32 get_column(Module* module) +{ + auto column = module->offset - module->line_character_offset + 1; + assert(column < ~(u32)0); + return (u32)column; +} + +struct Checkpoint +{ + u64 offset; + u64 line_offset; + u64 line_character_offset; +}; + +fn Checkpoint get_checkpoint(Module* module) +{ + return { + .offset = module->offset, + .line_offset = module->line_offset, + .line_character_offset = module->line_character_offset, + }; +} + +fn void set_checkpoint(Module* module, Checkpoint checkpoint) +{ + module->offset = checkpoint.offset; + module->line_offset = checkpoint.line_offset; + module->line_character_offset = checkpoint.line_character_offset; +} + +fn bool consume_character_if_match(Module* module, u8 expected_ch) +{ + bool is_ch = false; + auto i = module->offset; + if (i < module->content.length) + { + auto ch = module->content[i]; + is_ch = expected_ch == ch; + module->offset = i + is_ch; + } + + return is_ch; +} + +fn void expect_character(Module* module, u8 expected_ch) +{ + if (!consume_character_if_match(module, expected_ch)) + { + report_error(); + } +} + +fn void skip_space(Module* module) +{ + while (1) + { + auto iteration_offset = module->offset; + + while (module->offset < module->content.length) + { + auto ch = module->content[module->offset]; + if (!is_space(ch)) + { + break; + } + + module->line_offset += ch == '\n'; + module->line_character_offset = ch == '\n' ? module->offset : module->line_character_offset; + module->offset += 1; + } + + if (module->offset + 1 < module->content.length) + { + auto i = module->offset; + auto first_ch = module->content[i]; + auto second_ch = module->content[i + 1]; + auto is_comment = first_ch == '/' && second_ch == '/'; + + if (is_comment) + { + while (module->offset < module->content.length) + { + auto ch = module->content[module->offset]; + if (ch == '\n') + { + break; + } + module->offset += 1; + } + + if (module->offset < module->content.length) + { + module->line_offset += 1; + module->line_character_offset = module->offset; + module->offset += 1; + } + } + } + + if (module->offset - iteration_offset == 0) + { + break; + } + } +} + +fn String parse_identifier(Module* module) +{ + auto start = module->offset; + + if (is_identifier_start(module->content[start])) + { + module->offset = start + 1; + + while (module->offset < module->content.length) + { + auto i = module->offset; + if (is_identifier(module->content[i])) + { + module->offset = i + 1; + } + else + { + break; + } + } + } + + auto end = module->offset; + if (end - start == 0) + { + report_error(); + } + + return module->content(start, end); +} + +fn u64 accumulate_hexadecimal(u64 accumulator, u8 ch) +{ + u64 value; + + if (is_decimal(ch)) + { + value = ch - '0'; + } + else if (is_hexadecimal_alpha_upper(ch)) + { + value = ch - 'A' + 10; + } + else if (is_hexadecimal_alpha_lower(ch)) + { + value = ch - 'a' + 10; + } + else + { + unreachable(); + } + + auto result = (accumulator * 16) + value; + return result; +} + +fn u64 accumulate_decimal(u64 accumulator, u8 ch) +{ + assert(is_decimal(ch)); + return (accumulator * 10) + (ch - '0'); +} + +fn u64 accumulate_octal(u64 accumulator, u8 ch) +{ + assert(is_octal(ch)); + return (accumulator * 8) + (ch - '0'); +} + +fn u64 accumulate_binary(u64 accumulator, u8 ch) +{ + assert(is_binary(ch)); + return (accumulator * 2) + (ch - '0'); +} + +fn u64 parse_integer_decimal_assume_valid(String string) +{ + u64 value = 0; + + for (u8 ch: string) + { + assert(is_decimal(ch)); + value = accumulate_decimal(value, ch); + } + + return value; +} + +fn Value* parse_value(Module* module, Scope* scope, ValueBuilder builder); + + +fn u8 format_integer_decimal(String buffer, u64 v) +{ + u8 byte_count = 0; + auto value = v; + + if (value != 0) + { + u8 reverse_buffer[64]; + u8 reverse_index = 0; + + while (value != 0) + { + auto digit_value = (u8)(value % 10); + auto ascii_character = digit_value + '0'; + value /= 10; + reverse_buffer[reverse_index] = ascii_character; + reverse_index += 1; + } + + while (reverse_index != 0) + { + reverse_index -= 1; + buffer[byte_count] = reverse_buffer[reverse_index]; + byte_count += 1; + } + } + else + { + buffer[0] = '0'; + byte_count = 1; + } + + return byte_count; +} + +fn String array_name(Module* module, Type* element_type, u64 element_count) +{ + u8 buffer[512]; + auto buffer_slice = String{ .pointer = buffer, .length = array_length(buffer) }; + + u64 i = 0; + + buffer[i] = left_bracket; + i += 1; + + i += format_integer_decimal(buffer_slice(i), element_count); + + buffer[i] = right_bracket; + i += 1; + + auto element_name = element_type->name; + memcpy(buffer + i, element_name.pointer, element_name.length); + i += element_name.length; + + auto name = arena_duplicate_string(module->arena, buffer_slice(0, i)); + return name; +} + +fn Type* parse_type(Module* module, Scope* scope) +{ + auto start_character = module->content[module->offset]; + if (is_identifier_start(start_character)) + { + auto identifier = parse_identifier(module); + if (identifier.equal(str("void"))) + { + return void_type(module); + } + else if (identifier.equal(str("noreturn"))) + { + return noreturn_type(module); + } + else + { + auto is_int_type = identifier.length > 1 && (identifier[0] == 's' || identifier[0] == 'u'); + + if (is_int_type) + { + for (auto ch : identifier(1)) + { + is_int_type = is_int_type && is_decimal(ch); + } + } + + if (is_int_type) + { + bool is_signed; + switch (identifier[0]) + { + case 's': is_signed = true; break; + case 'u': is_signed = false; break; + default: unreachable(); + } + + auto bit_count = parse_integer_decimal_assume_valid(identifier(1)); + if (bit_count == 0) + { + report_error(); + } + if (bit_count > 64) + { + if (bit_count != 128) + { + report_error(); + } + } + + auto result = integer_type(module, { .bit_count = (u32)bit_count, .is_signed = is_signed }); + return result; + } + else + { + for (Type* type = module->first_type; type; type = type->next) + { + if (identifier.equal(type->name)) + { + return type; + } + } + + report_error(); + } + } + } + else if (start_character == '&') + { + module->offset += 1; + skip_space(module); + auto element_type = parse_type(module, scope); + auto pointer_type = get_pointer_type(module, element_type); + return pointer_type; + } + else if (start_character == left_bracket) + { + module->offset += 1; + skip_space(module); + + auto is_slice = consume_character_if_match(module, right_bracket); + if (is_slice) + { + skip_space(module); + auto element_type = parse_type(module, scope); + auto slice_type = get_slice_type(module, element_type); + return slice_type; + } + else + { + bool length_inferred = false; + auto checkpoint = get_checkpoint(module); + if (consume_character_if_match(module, '_')) + { + skip_space(module); + + length_inferred = consume_character_if_match(module, ']'); + } + + Value* length_value = 0; + u64 element_count = 0; + bool resolved = false; + if (!length_inferred) + { + set_checkpoint(module, checkpoint); + + length_value = parse_value(module, scope, {}); + assert(length_value); + + if (length_value->is_constant()) + { + switch (length_value->id) + { + case ValueId::constant_integer: + { + element_count = length_value->constant_integer.value; + if (element_count == 0) + { + report_error(); + } + resolved = true; + } break; + default: + { + report_error(); + } break; + } + } + + skip_space(module); + expect_character(module, right_bracket); + } + + skip_space(module); + + auto element_type = parse_type(module, scope); + + if (length_inferred) + { + assert(!length_value); + auto result = type_allocate_init(module, { + .array = { + .element_count = 0, + .element_type = element_type, + }, + .id = TypeId::array, + .name = str(""), + }); + + return result; + } + else + { + if (!resolved) + { + report_error(); + } + + assert(element_count != 0); + + auto result = type_allocate_init(module, { + .array = { + .element_count = element_count, + .element_type = element_type, + }, + .id = TypeId::array, + .name = array_name(module, element_type, element_count), + }); + return result; + } + } + } + else if (start_character == '#') + { + module->offset += 1; + auto identifier = parse_identifier(module); + enum class TypeIntrinsic + { + return_type, + count, + }; + + String type_intrinsics[] = { + str("ReturnType"), + }; + + static_assert(array_length(type_intrinsics) == (u64)TypeIntrinsic::count); + + backing_type(TypeIntrinsic) i; + for (i = 0; i < (backing_type(TypeIntrinsic))TypeIntrinsic::count; i += 1) + { + String type_intrinsic = type_intrinsics[i]; + if (identifier.equal(type_intrinsic)) + { + break; + } + } + + auto intrinsic = (TypeIntrinsic)i; + switch (intrinsic) + { + case TypeIntrinsic::return_type: + { + auto return_type = module->current_function->variable.type->function.semantic_return_type; + return return_type; + } break; + case TypeIntrinsic::count: report_error(); + } + } + else + { + report_error(); + } +} + +fn u64 parse_hexadecimal(Module* module) +{ + u64 value = 0; + + while (true) + { + auto ch = module->content[module->offset]; + + if (!is_hexadecimal(ch)) + { + break; + } + + module->offset += 1; + value = accumulate_hexadecimal(value, ch); + } + + return value; +} + +fn u64 parse_decimal(Module* module) +{ + u64 value = 0; + + while (true) + { + auto ch = module->content[module->offset]; + + if (!is_decimal(ch)) + { + break; + } + + module->offset += 1; + value = accumulate_decimal(value, ch); + } + + return value; +} + +fn u64 parse_octal(Module* module) +{ + u64 value = 0; + + while (true) + { + auto ch = module->content[module->offset]; + + if (!is_octal(ch)) + { + break; + } + + module->offset += 1; + value = accumulate_octal(value, ch); + } + + return value; +} + +fn u64 parse_binary(Module* module) +{ + u64 value = 0; + + while (true) + { + auto ch = module->content[module->offset]; + + if (!is_binary(ch)) + { + break; + } + + module->offset += 1; + value = accumulate_binary(value, ch); + } + + return value; +} + +fn u8 escape_character(u8 ch) +{ + switch (ch) + { + default: trap_raw(); + } +} + +fn Token tokenize(Module* module) +{ + skip_space(module); + + auto start_index = module->offset; + if (start_index == module->content.length) + { + report_error(); + } + + auto start_character = module->content[start_index]; + + Token token; + + switch (start_character) + { + case ',': + case ';': + case '~': + case left_brace: + case left_parenthesis: + case left_bracket: + case right_brace: + case right_parenthesis: + case right_bracket: + { + module->offset += 1; + TokenId id; + switch (start_character) + { + case ',': id = TokenId::comma; break; + case ';': id = TokenId::end_of_statement; break; + case '~': id = TokenId::tilde; break; + case left_brace: id = TokenId::left_brace; break; + case left_parenthesis: id = TokenId::left_parenthesis; break; + case left_bracket: id = TokenId::left_bracket; break; + case right_brace: id = TokenId::right_brace; break; + case right_parenthesis: id = TokenId::right_parenthesis; break; + case right_bracket: id = TokenId::right_bracket; break; + default: unreachable(); + } + token = { + .id = id, + }; + } break; + case '#': + { + module->offset += 1; + if (is_identifier_start(module->content[module->offset])) + { + auto identifier = parse_identifier(module); + + String value_intrinsics[] = { + str("align_of"), + str("byte_size"), + str("enum_name"), + str("extend"), + str("integer_max"), + str("int_from_enum"), + str("int_from_pointer"), + str("pointer_cast"), + str("select"), + str("string_to_enum"), + str("trap"), + str("truncate"), + str("va_start"), + str("va_end"), + str("va_arg"), + str("va_copy"), + }; + + backing_type(ValueIntrinsic) i; + for (i = 0; i < (backing_type(ValueIntrinsic))(ValueIntrinsic::count); i += 1) + { + String candidate = value_intrinsics[i]; + if (identifier.equal(candidate)) + { + break; + } + } + + auto intrinsic = (ValueIntrinsic)i; + if (intrinsic == ValueIntrinsic::count) + { + report_error(); + } + + token = { + .value_intrinsic = intrinsic, + .id = TokenId::value_intrinsic, + }; + } + else + { + trap_raw(); + } + } break; + case '<': + { + auto next_ch = module->content[start_index + 1]; + TokenId id; + switch (next_ch) + { + case '<': + switch (module->content[start_index + 2]) + { + case '=': id = TokenId::assign_shift_left; break; + default: id = TokenId::shift_left; break; + } break; + case '=': id = TokenId::compare_less_equal; break; + default: id = TokenId::compare_less; break; + } + + u64 add; + switch (id) + { + case TokenId::assign_shift_left: add = 3; break; + case TokenId::shift_left: + case TokenId::compare_less_equal: add = 2; break; + case TokenId::compare_less: add = 1; break; + default: unreachable(); + } + + module->offset += add; + token = { + .id = id, + }; + } break; + case '>': + { + auto next_ch = module->content[start_index + 1]; + TokenId id; + switch (next_ch) + { + case '>': + switch (module->content[start_index + 2]) + { + case '=': id = TokenId::assign_shift_right; break; + default: id = TokenId::shift_right; break; + } break; + case '=': id = TokenId::compare_greater_equal; break; + default: id = TokenId::compare_greater; break; + } + + u64 add; + switch (id) + { + case TokenId::assign_shift_right: add = 3; break; + case TokenId::shift_right: + case TokenId::compare_greater_equal: add = 2; break; + case TokenId::compare_greater: add = 1; break; + default: unreachable(); + } + + module->offset += add; + token = { + .id = id, + }; + } break; + case '=': + { + auto next_ch = module->content[start_index + 1]; + auto is_compare_equal = next_ch == '='; + TokenId id = is_compare_equal ? TokenId::compare_equal : TokenId::assign; + module->offset += is_compare_equal + 1; + token = { + .id = id, + }; + } break; + case '.': + { + auto next_ch = module->content[start_index + 1]; + TokenId id; + switch (next_ch) + { + default: id = TokenId::dot; break; + case '&': id = TokenId::pointer_dereference; break; + case '.': + switch (module->content[start_index + 2]) + { + case '.': id = TokenId::triple_dot; break; + default: id = TokenId::double_dot; break; + } break; + } + + u64 add; + switch (id) + { + case TokenId::dot: add = 1; break; + case TokenId::double_dot: add = 2; break; + case TokenId::triple_dot: add = 3; break; + case TokenId::pointer_dereference: add = 2; break; + default: unreachable(); + } + module->offset += add; + + token = { + .id = id, + }; + } break; + case '"': + { + module->offset += 1; + auto start = module->offset; + + while (1) + { + auto ch = module->content[module->offset]; + if (ch == '"') + { + module->offset += 1; + break; + } + else if (ch == '\\') + { + trap_raw(); + } + else + { + module->offset += 1; + } + } + + auto end = module->offset; + auto string_literal = module->content(start, end); + + token = { + .string_literal = string_literal, + .id = TokenId::string_literal, + }; + } break; + case '\'': + { + module->offset += 1; + + u8 ch; + if (module->content[module->offset] == '\\') + { + module->offset += 1; + ch = escape_character(module->content[module->offset]); + } + else + { + ch = module->content[module->offset]; + if (ch == '\'') + { + report_error(); + } + } + + module->offset += 1; + expect_character(module, '\''); + token = Token{ + .integer = { + .value = ch, + .kind = TokenIntegerKind::character_literal, + }, + .id = TokenId::integer, + }; + } break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + auto next_ch = module->content[start_index + 1]; + TokenIntegerKind token_integer_kind = TokenIntegerKind::decimal; + if (start_character == '0') + { + switch (next_ch) + { + case 'x': token_integer_kind = TokenIntegerKind::hexadecimal; break; + case 'd': token_integer_kind = TokenIntegerKind::decimal; break; + case 'o': token_integer_kind = TokenIntegerKind::octal; break; + case 'b': token_integer_kind = TokenIntegerKind::octal; break; + default: token_integer_kind = TokenIntegerKind::decimal; break; + } + auto inferred_decimal = token_integer_kind == TokenIntegerKind::decimal && next_ch != 'd'; + module->offset += 2 * (token_integer_kind != TokenIntegerKind::decimal || !inferred_decimal); + } + + u64 value; + switch (token_integer_kind) + { + case TokenIntegerKind::hexadecimal: value = parse_hexadecimal(module); break; + case TokenIntegerKind::decimal: value = parse_decimal(module); break; + case TokenIntegerKind::octal: value = parse_octal(module); break; + case TokenIntegerKind::binary: value = parse_binary(module); break; + case TokenIntegerKind::character_literal: report_error(); break; + } + + token = { + .integer = { + .value = value, + .kind = token_integer_kind, + }, + .id = TokenId::integer, + }; + } break; + case '+': + case '-': + case '*': + case '/': + case '%': + case '&': + case '|': + case '^': + case '!': + { + auto next_ch = module->content[start_index + 1]; + TokenId id; + if (next_ch == '=') + { + switch (start_character) + { + case '+': id = TokenId::assign_plus; break; + case '-': id = TokenId::assign_dash; break; + case '*': id = TokenId::assign_asterisk; break; + case '/': id = TokenId::assign_forward_slash; break; + case '%': id = TokenId::assign_percentage; break; + case '&': id = TokenId::assign_ampersand; break; + case '|': id = TokenId::assign_bar; break; + case '^': id = TokenId::assign_caret; break; + case '!': id = TokenId::compare_not_equal; break; + default: unreachable(); + } + } + else + { + switch (start_character) + { + case '+': id = TokenId::plus; break; + case '-': id = TokenId::dash; break; + case '*': id = TokenId::asterisk; break; + case '/': id = TokenId::forward_slash; break; + case '%': id = TokenId::percentage; break; + case '&': id = TokenId::ampersand; break; + case '|': id = TokenId::bar; break; + case '^': id = TokenId::caret; break; + case '!': id = TokenId::exclamation; break; + default: unreachable(); + } + } + + token.id = id; + + module->offset += 1 + (next_ch == '='); + } break; + default: + { + if (is_identifier_start(start_character)) + { + auto identifier = parse_identifier(module); + + String value_keywords[] = { + str("undefined"), + str("unreachable"), + str("zero"), + }; + static_assert(array_length(value_keywords) == (u64)ValueKeyword::count); + + backing_type(ValueKeyword) i; + for (i = 0; i < (backing_type(ValueKeyword))ValueKeyword::count; i += 1) + { + String candidate = value_keywords[i]; + if (candidate.equal(identifier)) + { + break; + } + } + + auto value_keyword = (ValueKeyword)i; + + if (value_keyword == ValueKeyword::count) + { + auto advance = identifier.pointer[identifier.length] == '?'; + identifier.length += advance; + module->offset += advance; + + String operators[] = { + str("and"), + str("or"), + str("and?"), + str("or?"), + }; + static_assert(array_length(operators) == (u64)OperatorKeyword::count); + + backing_type(OperatorKeyword) i; + for (i = 0; i < (backing_type(OperatorKeyword))OperatorKeyword::count; i += 1) + { + auto candidate = operators[i]; + if (candidate.equal(identifier)) + { + break; + } + } + + auto operator_keyword = (OperatorKeyword)i; + if (operator_keyword == OperatorKeyword::count) + { + token = { + .identifier = identifier, + .id = TokenId::identifier, + }; + } + else + { + token = { + .operator_keyword = operator_keyword, + .id = TokenId::operator_keyword, + }; + } + } + else + { + token = { + .value_keyword = value_keyword, + .id = TokenId::value_keyword, + }; + } + } + else + { + report_error(); + } + } break; + } + + assert(start_index != module->offset); + return token; +} + +fn Value* reference_identifier(Module* module, Scope* current_scope, String identifier, ValueKind kind) +{ + assert(!identifier.equal(str(""))); + assert(!identifier.equal(str("_"))); + + Variable* variable = 0; + + for (Scope* scope = current_scope; scope; scope = scope->parent) + { + switch (scope->kind) + { + case ScopeKind::global: + { + assert(module == scope_to_module(scope)); + + for (Global* global = module->first_global; global; global = global->next) + { + if (identifier.equal(global->variable.name)) + { + variable = &global->variable; + break; + } + } + + for (MacroDeclaration* macro_declaration = module->first_macro_declaration; macro_declaration; macro_declaration = macro_declaration->next) + { + if (identifier.equal(macro_declaration->name)) + { + auto result = new_value(module); + *result = { + .macro_reference = macro_declaration, + .id = ValueId::macro_reference, + }; + return result; + } + } + } break; + case ScopeKind::function: + { + assert(scope->parent); + auto function = scope_to_function(scope); + for (auto argument: function->arguments) + { + if (identifier.equal(argument.variable.name)) + { + variable = &argument.variable; + break; + } + } + } break; + case ScopeKind::local: + { + assert(scope->parent); + assert(scope->parent->kind != ScopeKind::global); + + auto block = scope_to_block(scope); + for (Local* local = block->first_local; local; local = local->next) + { + if (identifier.equal(local->variable.name)) + { + variable = &local->variable; + break; + } + } + } break; + case ScopeKind::for_each: + { + assert(scope->parent); + auto for_each = scope_to_for_each(scope); + + for (Local* local = for_each->first_local; local; local = local->next) + { + if (identifier.equal(local->variable.name)) + { + variable = &local->variable; + break; + } + } + } break; + case ScopeKind::macro_declaration: + { + assert(scope->parent); + auto macro_declaration = scope_to_macro_declaration(scope); + + for (auto& constant_argument: macro_declaration->constant_arguments) + { + if (identifier.equal(constant_argument.name)) + { + trap_raw(); + } + } + + for (auto& argument: macro_declaration->arguments) + { + if (identifier.equal(argument.variable.name)) + { + variable = &argument.variable; + break; + } + } + } break; + case ScopeKind::macro_instantiation: + { + trap_raw(); + } break; + } + + if (variable) + { + break; + } + } + + if (variable) + { + auto result = new_value(module); + *result = { + .variable_reference = variable, + .id = ValueId::variable_reference, + .kind = kind, + }; + return result; + } + else + { + report_error(); + } +} +fn Value* parse_value(Module* module, Scope* scope, ValueBuilder builder); + +fn Value* parse_precedence(Module* module, Scope* scope, ValueBuilder builder); +fn Value* parse_left(Module* module, Scope* scope, ValueBuilder builder) +{ + Token token = builder.token; + Value* result; + switch (token.id) + { + case TokenId::integer: + { + auto integer_value = token.integer.value; + result = new_value(module); + *result = { + .constant_integer = { + .value = integer_value, + .is_signed = false, + }, + .id = ValueId::constant_integer, + .kind = ValueKind::right, + }; + } break; + case TokenId::dash: + case TokenId::ampersand: + case TokenId::exclamation: + case TokenId::tilde: + // Unary + { + assert(!builder.left); + UnaryId id; + switch (token.id) + { + case TokenId::dash: id = UnaryId::minus; break; + case TokenId::ampersand: id = UnaryId::ampersand; break; + case TokenId::exclamation: id = UnaryId::exclamation; break; + case TokenId::tilde: id = UnaryId::bitwise_not; break; + default: unreachable(); + } + + auto unary_value = parse_precedence(module, scope, builder.with_precedence(Precedence::prefix).with_token({}).with_kind(token.id == TokenId::ampersand ? ValueKind::left : builder.kind)); + + result = new_value(module); + *result = { + .unary = { + .value = unary_value, + .id = id, + }, + .id = ValueId::unary, + .kind = ValueKind::right, + }; + } break; + case TokenId::identifier: + { + result = reference_identifier(module, scope, token.identifier, builder.kind); + } break; + case TokenId::value_intrinsic: + { + ValueIntrinsic intrinsic = token.value_intrinsic; + result = new_value(module); + + switch (intrinsic) + { + case ValueIntrinsic::align_of: + { + trap_raw(); + } break; + case ValueIntrinsic::enum_name: + case ValueIntrinsic::extend: + case ValueIntrinsic::int_from_enum: + case ValueIntrinsic::int_from_pointer: + case ValueIntrinsic::truncate: + case ValueIntrinsic::pointer_cast: + case ValueIntrinsic::va_end: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + auto argument = parse_value(module, scope, {}); + expect_character(module, right_parenthesis); + + UnaryId id; + switch (intrinsic) + { + case ValueIntrinsic::enum_name: id = UnaryId::enum_name; break; + case ValueIntrinsic::extend: id = UnaryId::extend; break; + case ValueIntrinsic::int_from_enum: id = UnaryId::int_from_enum; break; + case ValueIntrinsic::int_from_pointer: id = UnaryId::int_from_pointer; break; + case ValueIntrinsic::truncate: id = UnaryId::truncate; break; + case ValueIntrinsic::pointer_cast: id = UnaryId::pointer_cast; break; + case ValueIntrinsic::va_end: id = UnaryId::va_end; break; + default: unreachable(); + } + + *result = { + .unary = { + .value = argument, + .id = id, + }, + .id = ValueId::unary, + }; + } break; + case ValueIntrinsic::byte_size: + case ValueIntrinsic::integer_max: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto type = parse_type(module, scope); + + expect_character(module, right_parenthesis); + + UnaryTypeId id; + switch (intrinsic) + { + case ValueIntrinsic::byte_size: id = UnaryTypeId::byte_size; break; + case ValueIntrinsic::integer_max: id = UnaryTypeId::integer_max; break; + default: unreachable(); + } + + *result = { + .unary_type = { + .type = type, + .id = id, + }, + .id = ValueId::unary_type, + }; + } break; + case ValueIntrinsic::select: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto condition = parse_value(module, scope, {}); + + expect_character(module, ','); + skip_space(module); + + auto true_value = parse_value(module, scope, {}); + + expect_character(module, ','); + skip_space(module); + + auto false_value = parse_value(module, scope, {}); + + skip_space(module); + expect_character(module, right_parenthesis); + + *result = { + .select = { + .condition = condition, + .true_value = true_value, + .false_value = false_value, + }, + .id = ValueId::select, + }; + } break; + case ValueIntrinsic::string_to_enum: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto type = parse_type(module, scope); + + skip_space(module); + expect_character(module, ','); + skip_space(module); + + auto string_value = parse_value(module, scope, {}); + + skip_space(module); + expect_character(module, right_parenthesis); + *result = { + .string_to_enum = { + .type = type, + .string = string_value, + }, + .id = ValueId::string_to_enum, + }; + } break; + case ValueIntrinsic::trap: + case ValueIntrinsic::va_start: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + expect_character(module, right_parenthesis); + + ValueId id; + switch (intrinsic) + { + case ValueIntrinsic::trap: id = ValueId::trap; break; + case ValueIntrinsic::va_start: id = ValueId::va_start; break; + default: unreachable(); + } + *result = { + .id = id, + }; + } break; + case ValueIntrinsic::va_arg: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + auto valist = parse_value(module, scope, {}); + skip_space(module); + expect_character(module, ','); + skip_space(module); + auto ty = parse_type(module, scope); + skip_space(module); + expect_character(module, right_parenthesis); + *result = { + .va_arg = { + .va_list = valist, + .type = ty, + }, + .id = ValueId::va_arg, + }; + } break; + case ValueIntrinsic::va_copy: + { + trap_raw(); + } break; + case ValueIntrinsic::count: unreachable(); + } + } break; + case TokenId::left_bracket: + { + u64 element_count = 0; + Value* value_buffer[64]; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + + auto value = parse_value(module, scope, {}); + value_buffer[element_count] = value; + element_count += 1; + + consume_character_if_match(module, ','); + } + + auto values = new_value_array(module, element_count); + memcpy(values.pointer, value_buffer, element_count * sizeof(Value*)); + + result = new_value(module); + *result = { + .array_initialization = { + .values = values, + .is_constant = false, // This is analyzed later + }, + .id = ValueId::array_initialization, + }; + } break; + case TokenId::dot: + { + auto identifier = parse_identifier(module); + result = new_value(module); + + *result = { + .enum_literal = identifier, + .id = ValueId::enum_literal, + }; + } break; + case TokenId::left_parenthesis: + { + result = parse_value(module, scope, { + .kind = builder.kind, + }); + expect_character(module, right_parenthesis); + } break; + case TokenId::string_literal: + { + result = new_value(module); + *result = { + .string_literal = token.string_literal, + .id = ValueId::string_literal, + }; + } break; + case TokenId::left_brace: + { + skip_space(module); + + u64 field_count = 0; + String name_buffer[64]; + Value* value_buffer[64]; + bool zero = false; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_brace)) + { + break; + } + + auto field_index = field_count; + if (consume_character_if_match(module, '.')) + { + auto name = parse_identifier(module); + name_buffer[field_index] = name; + skip_space(module); + expect_character(module, '='); + skip_space(module); + + auto value = parse_value(module, scope, {}); + value_buffer[field_index] = value; + skip_space(module); + consume_character_if_match(module, ','); + } + else + { + auto token = tokenize(module); + zero = token.id == TokenId::value_keyword && token.value_keyword == ValueKeyword::zero; + if (zero) + { + skip_space(module); + + if (consume_character_if_match(module, ',')) + { + skip_space(module); + } + + expect_character(module, right_brace); + break; + } + else + { + report_error(); + } + } + + field_count += 1; + } + + auto names = arena_allocate(module->arena, field_count); + memcpy(names.pointer, name_buffer, sizeof(String) * field_count); + auto values = new_value_array(module, field_count); + memcpy(values.pointer, value_buffer, sizeof(Value*) * field_count); + + result = new_value(module); + *result = { + .aggregate_initialization = { + .names = names, + .values = values, + .is_constant = false, + .zero = zero, + }, + .id = ValueId::aggregate_initialization, + }; + } break; + case TokenId::value_keyword: + { + result = new_value(module); + Value value; + switch (token.value_keyword) + { + case ValueKeyword::undefined: value = { .id = ValueId::undefined }; break; + case ValueKeyword::unreachable: value = { .id = ValueId::unreachable }; break; + case ValueKeyword::zero: value = { .id = ValueId::zero }; break; + case ValueKeyword::count: unreachable(); + } + *result = value; + } break; + default: report_error(); + } + + return result; +} + +fn Precedence get_token_precedence(TokenId id) +{ + Precedence precedence; + + switch (id) + { + case TokenId::none: unreachable(); + case TokenId::comma: + case TokenId::double_dot: + case TokenId::end_of_statement: + case TokenId::right_brace: + case TokenId::right_bracket: + case TokenId::right_parenthesis: + precedence = Precedence::none; + break; + case TokenId::assign: + case TokenId::assign_shift_left: + case TokenId::assign_shift_right: + case TokenId::assign_plus: + case TokenId::assign_dash: + case TokenId::assign_asterisk: + case TokenId::assign_forward_slash: + case TokenId::assign_percentage: + case TokenId::assign_caret: + case TokenId::assign_bar: + case TokenId::assign_ampersand: + precedence = Precedence::assignment; + break; + case TokenId::compare_equal: + case TokenId::compare_not_equal: + case TokenId::compare_less: + case TokenId::compare_less_equal: + case TokenId::compare_greater: + case TokenId::compare_greater_equal: + precedence = Precedence::comparison; + break; + case TokenId::operator_keyword: // TODO: check if any other operator that is not bitwise is added + case TokenId::ampersand: + case TokenId::bar: + case TokenId::caret: + precedence = Precedence::bitwise; + break; + case TokenId::shift_left: + case TokenId::shift_right: + precedence = Precedence::shifting; + break; + case TokenId::plus: + case TokenId::dash: + precedence = Precedence::add_like; + break; + case TokenId::asterisk: + case TokenId::forward_slash: + case TokenId::percentage: + precedence = Precedence::div_like; + break; + case TokenId::pointer_dereference: + case TokenId::left_parenthesis: + case TokenId::left_bracket: + case TokenId::dot: + precedence = Precedence::postfix; + break; + default: trap_raw(); + } + + return precedence; +} + +fn Slice parse_call_arguments(Module* module, Scope* scope) +{ + Slice arguments = {}; + + u32 semantic_argument_count = 0; + Value* semantic_argument_buffer[64]; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_parenthesis)) + { + break; + } + + auto argument = parse_value(module, scope, {}); + auto argument_index = semantic_argument_count; + semantic_argument_buffer[argument_index] = argument; + + skip_space(module); + + consume_character_if_match(module, ','); + + semantic_argument_count += 1; + } + + if (semantic_argument_count != 0) + { + arguments = new_value_array(module, semantic_argument_count); + memcpy(arguments.pointer, semantic_argument_buffer, semantic_argument_count * sizeof(Value*)); + } + + return arguments; +} + +fn Value* parse_right(Module* module, Scope* scope, ValueBuilder builder) +{ + auto* left = builder.left; + assert(left); + + Token token = builder.token; + Value* result = 0; + + switch (token.id) + { + case TokenId::plus: + case TokenId::dash: + case TokenId::asterisk: + case TokenId::forward_slash: + case TokenId::percentage: + case TokenId::ampersand: + case TokenId::bar: + case TokenId::caret: + case TokenId::shift_left: + case TokenId::shift_right: + case TokenId::compare_equal: + case TokenId::compare_not_equal: + case TokenId::compare_less: + case TokenId::compare_less_equal: + case TokenId::compare_greater: + case TokenId::compare_greater_equal: + case TokenId::operator_keyword: + // Binary + { + auto precedence = get_token_precedence(token.id); + assert(precedence != Precedence::assignment); + + BinaryId id; + switch (token.id) + { + case TokenId::plus: id = BinaryId::add; break; + case TokenId::dash: id = BinaryId::sub; break; + case TokenId::asterisk: id = BinaryId::mul; break; + case TokenId::forward_slash: id = BinaryId::div; break; + case TokenId::percentage: id = BinaryId::rem; break; + case TokenId::ampersand: id = BinaryId::bitwise_and; break; + case TokenId::bar: id = BinaryId::bitwise_or; break; + case TokenId::caret: id = BinaryId::bitwise_xor; break; + case TokenId::shift_left: id = BinaryId::shift_left; break; + case TokenId::shift_right: id = BinaryId::shift_right; break; + case TokenId::compare_equal: id = BinaryId::compare_equal; break; + case TokenId::compare_not_equal: id = BinaryId::compare_not_equal; break; + case TokenId::compare_less: id = BinaryId::compare_less; break; + case TokenId::compare_less_equal: id = BinaryId::compare_less_equal; break; + case TokenId::compare_greater: id = BinaryId::compare_greater; break; + case TokenId::compare_greater_equal: id = BinaryId::compare_greater_equal; break; + case TokenId::operator_keyword: switch (token.operator_keyword) + { + case OperatorKeyword::and_op: id = BinaryId::logical_and; break; + case OperatorKeyword::or_op: id = BinaryId::logical_or; break; + case OperatorKeyword::and_op_shortcircuit: id = BinaryId::logical_and_shortcircuit; break; + case OperatorKeyword::or_op_shortcircuit: id = BinaryId::logical_or_shortcircuit; break; + case OperatorKeyword::count: unreachable(); + break; + } break; + default: unreachable(); + } + + auto right_precedence = (Precedence)((backing_type(Precedence))precedence + (precedence != Precedence::assignment)); + auto right = parse_precedence(module, scope, builder.with_precedence(right_precedence).with_token({}).with_left(0)); + + result = new_value(module); + *result = { + .binary = { + .left = left, + .right = right, + .id = id, + }, + .id = ValueId::binary, + .kind = ValueKind::right, + }; + } break; + case TokenId::pointer_dereference: + { + result = new_value(module); + *result = { + .dereference = left, + .id = ValueId::dereference, + .kind = ValueKind::right, + }; + } break; + case TokenId::left_parenthesis: + { + result = new_value(module); + // Callable + switch (left->id) + { + case ValueId::macro_reference: + { + auto* declaration = left->macro_reference; + if (declaration->is_generic()) + { + report_error(); + } + + auto instantiation_line = get_line(module); + auto instantiation_column = get_column(module); + + auto arguments = parse_call_arguments(module, scope); + + *result = { + .macro_instantiation = { + .declaration = declaration, + .instantiation_function = module->current_function, + .declaration_arguments = {}, + .instantiation_arguments = arguments, + .constant_arguments = {}, + .return_type = declaration->return_type, + .scope = { + .parent = scope, + .line = declaration->scope.line, + .column = declaration->scope.column, + .kind = ScopeKind::macro_instantiation, + }, + .line = instantiation_line, + .column = instantiation_column, + }, + .id = ValueId::macro_instantiation, + }; + } break; + default: + { + auto arguments = parse_call_arguments(module, scope); + *result = { + .call = { + .callable = left, + .arguments = arguments, + }, + .id = ValueId::call, + .kind = ValueKind::right, + }; + } break; + } + } break; + case TokenId::left_bracket: + { + skip_space(module); + result = new_value(module); + + if (left->id == ValueId::macro_reference) + { + auto* declaration = left->macro_reference; + if (!declaration->is_generic()) + { + report_error(); + } + + auto instantiation_line = get_line(module); + auto instantiation_column = get_column(module); + auto original_constant_argument_count = declaration->constant_arguments.length; + auto constant_arguments = arena_allocate(module->arena, original_constant_argument_count); + u64 constant_argument_count = 0; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + + auto constant_argument_index = constant_argument_count; + if (constant_argument_index == original_constant_argument_count) + { + report_error(); + } + + auto constant_argument = declaration->constant_arguments[constant_argument_index]; + + switch (constant_argument.id) + { + case ConstantArgumentId::value: + { + trap_raw(); // TODO + } break; + case ConstantArgumentId::type: + { + auto argument_type = parse_type(module, scope); + constant_arguments[constant_argument_index] = { + .name = constant_argument.name, + .type = argument_type, + .id = ConstantArgumentId::type, + }; + } break; + } + + constant_argument_count += 1; + + skip_space(module); + consume_character_if_match(module, ','); + } + + skip_space(module); + + expect_character(module, left_parenthesis); + + auto instantiation_arguments = parse_call_arguments(module, scope); + + *result = { + .macro_instantiation = { + .declaration = declaration, + .instantiation_function = module->current_function, + .declaration_arguments = {}, + .instantiation_arguments = instantiation_arguments, + .constant_arguments = constant_arguments, + .return_type = declaration->return_type, + .block = 0, + .scope = { + .parent = scope, + .line = declaration->scope.line, + .column = declaration->scope.column, + .kind = ScopeKind::macro_instantiation, + }, + .line = instantiation_line, + .column = instantiation_column, + }, + .id = ValueId::macro_instantiation, + }; + } + else + { + left->kind = ValueKind::left; + + Value* start_value = 0; + auto start = !(module->content[module->offset] == '.' && module->content[module->offset + 1] == '.'); + if (start) + { + start_value = parse_value(module, scope, {}); + } + + auto is_array = consume_character_if_match(module, right_bracket); + if (is_array) + { + if (!start_value) + { + report_error(); + } + + auto index = start_value; + *result = { + .array_expression = { + .array_like = left, + .index = index, + }, + .id = ValueId::array_expression, + }; + } + else + { + expect_character(module, '.'); + expect_character(module, '.'); + + Value* end_value = 0; + if (!consume_character_if_match(module, right_bracket)) + { + end_value = parse_value(module, scope, {}); + expect_character(module, right_bracket); + } + + *result = { + .slice_expression = { + .array_like = left, + .start = start_value, + .end = end_value, + }, + }; + } + } + } break; + case TokenId::dot: + { + left->kind = ValueKind::left; + + skip_space(module); + + auto identifier = parse_identifier(module); + result = new_value(module); + *result = { + .field_access = { + .aggregate = left, + .field_name = identifier, + }, + .id = ValueId::field_access, + .kind = builder.kind, + }; + } break; + default: report_error(); + } + + return result; +} + +fn Value* parse_precedence_left(Module* module, Scope* scope, ValueBuilder builder) +{ + auto result = builder.left; + auto precedence = builder.precedence; + + while (1) + { + auto checkpoint = get_checkpoint(module); + auto token = tokenize(module); + auto token_precedence = get_token_precedence(token.id); + if (token_precedence == Precedence::assignment) + { + token_precedence = builder.allow_assignment_operators ? Precedence::assignment : Precedence::none; + } + + if ((backing_type(Precedence))precedence > (backing_type(Precedence))token_precedence) + { + set_checkpoint(module, checkpoint); + break; + } + + auto left = result; + auto right = parse_right(module, scope, builder.with_token(token).with_precedence(Precedence::none).with_left(left)); + result = right; + } + + return result; +} + +fn Value* parse_precedence(Module* module, Scope* scope, ValueBuilder builder) +{ + assert(builder.token.id == TokenId::none); + auto token = tokenize(module); + auto left = parse_left(module, scope, builder.with_token(token)); + auto result = parse_precedence_left(module, scope, builder.with_left(left)); + return result; +} + +fn Value* parse_value(Module* module, Scope* scope, ValueBuilder builder) +{ + assert(builder.precedence == Precedence::none); + assert(!builder.left); + auto value = parse_precedence(module, scope, builder.with_precedence(Precedence::assignment)); + return value; +} + +fn Block* parse_block(Module* module, Scope* parent_scope); + +fn Statement* parse_statement(Module* module, Scope* scope) +{ + bool require_semicolon = true; + + auto statement_line = get_line(module); + auto statement_column = get_column(module); + + auto* statement = &arena_allocate(module->arena, 1)[0]; + *statement = Statement{ + .line = statement_line, + .column = statement_column, + }; + + auto statement_start_character = module->content[module->offset]; + switch (statement_start_character) + { + case '>': + { + module->offset += 1; + skip_space(module); + + auto local_name = parse_identifier(module); + skip_space(module); + + Type* local_type = 0; + if (consume_character_if_match(module, ':')) + { + skip_space(module); + local_type = parse_type(module, scope); + skip_space(module); + } + expect_character(module, '='); + auto initial_value = parse_value(module, scope, {}); + + auto local = new_local(module, scope); + *local = { + .variable = { + .storage = 0, + .initial_value = initial_value, + .type = local_type, + .scope = scope, + .name = local_name, + .line = statement_line, + .column = statement_column, + }, + }; + statement->local = local; + statement->id = StatementId::local; + } break; + case '#': + { + statement->expression = parse_value(module, scope, {}); + statement->id = StatementId::expression; + } break; + case left_brace: + { + auto block = parse_block(module, scope); + statement->block = block; + statement->id = StatementId::block; + require_semicolon = false; + } break; + default: + { + if (is_identifier_start(statement_start_character)) + { + auto checkpoint = get_checkpoint(module); + auto statement_start_identifier = parse_identifier(module); + skip_space(module); + + enum class StatementStartKeyword + { + underscore_st, + return_st, + if_st, + // TODO: make `unreachable` a statement start keyword? + for_st, + while_st, + switch_st, + break_st, + continue_st, + count, + }; + + String statement_start_keywords[] = { + str("_"), + str("return"), + str("if"), + str("for"), + str("while"), + str("switch"), + str("break"), + str("continue"), + }; + + static_assert(array_length(statement_start_keywords) == (u64)StatementStartKeyword::count); + + backing_type(StatementStartKeyword) i; + for (i = 0; i < (backing_type(StatementStartKeyword))StatementStartKeyword::count; i += 1) + { + auto statement_start_keyword = statement_start_keywords[i]; + if (statement_start_keyword.equal(statement_start_identifier)) + { + break; + } + } + + auto statement_start_keyword = (StatementStartKeyword)i; + switch (statement_start_keyword) + { + case StatementStartKeyword::underscore_st: + { + trap_raw(); + } break; + case StatementStartKeyword::return_st: + { + auto return_value = parse_value(module, scope, {}); + statement->return_st = return_value; + statement->id = StatementId::return_st; + } break; + case StatementStartKeyword::if_st: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto condition = parse_value(module, scope, {}); + + skip_space(module); + expect_character(module, right_parenthesis); + skip_space(module); + + auto if_statement = parse_statement(module, scope); + + skip_space(module); + + bool is_else = false; + Statement* else_statement = 0; + if (is_identifier_start(module->content[module->offset])) + { + auto checkpoint = get_checkpoint(module); + auto identifier = parse_identifier(module); + is_else = identifier.equal(str("else")); + + if (is_else) + { + skip_space(module); + else_statement = parse_statement(module, scope); + } + else + { + set_checkpoint(module, checkpoint); + } + } + + require_semicolon = false; + + statement->if_st = { + .condition = condition, + .if_statement = if_statement, + .else_statement = else_statement, + }; + statement->id = StatementId::if_st; + } break; + case StatementStartKeyword::for_st: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto parent_scope = scope; + + *statement = Statement{ + .for_each = { + .first_local = 0, + .last_local = 0, + .left_values = {}, + .right_values = {}, + .predicate = 0, + .scope = { + .parent = scope, + .line = statement_line, + .column = statement_column, + .kind = ScopeKind::for_each, + }, + .kind = {}, + }, + .id = StatementId::for_each, + .line = statement_line, + .column = statement_column, + }; + + auto scope = &statement->for_each.scope; + + ValueKind left_value_buffer[64]; + u64 left_value_count = 0; + + while (1) + { + skip_space(module); + + auto is_left = module->content[module->offset] == '&'; + module->offset += is_left; + + auto for_local_line = get_line(module); + auto for_local_column = get_column(module); + + if (is_identifier_start(module->content[module->offset])) + { + auto local_name = parse_identifier(module); + auto local = new_local(module, scope); + *local = { + .variable = { + .storage = 0, + .initial_value = 0, + .type = 0, + .scope = scope, + .name = local_name, + .line = for_local_line, + .column = for_local_column, + }, + }; + + auto kind = is_left ? ValueKind::left : ValueKind::right; + left_value_buffer[left_value_count] = kind; + left_value_count += 1; + } + else + { + trap_raw(); + } + + skip_space(module); + + if (!consume_character_if_match(module, ',')) + { + expect_character(module, ':'); + break; + } + } + + skip_space(module); + + Value* right_value_buffer[64]; + u64 right_value_count = 0; + + right_value_buffer[right_value_count] = parse_value(module, scope, { .kind = ValueKind::left }); + right_value_count += 1; + + skip_space(module); + + auto token = tokenize(module); + + ForEachKind kind; + switch (token.id) + { + case TokenId::double_dot: + { + if (left_value_count != 1) + { + report_error(); + } + + right_value_buffer[0]->kind = ValueKind::right; + + right_value_buffer[right_value_count] = parse_value(module, scope, {}); + right_value_count += 1; + + expect_character(module, right_parenthesis); + kind = ForEachKind::range; + } break; + case TokenId::right_parenthesis: kind = ForEachKind::slice; break; + default: report_error(); + } + + statement->for_each.kind = kind; + + if (kind == ForEachKind::slice && left_value_count != right_value_count) + { + report_error(); + } + + auto left_values = arena_allocate(module->arena, left_value_count); + memcpy(left_values.pointer, left_value_buffer, left_value_count * sizeof(left_value_buffer[0])); + auto right_values = arena_allocate(module->arena, right_value_count); + memcpy(right_values.pointer, right_value_buffer, right_value_count * sizeof(right_value_buffer[0])); + + statement->for_each.left_values = left_values; + statement->for_each.right_values = right_values; + + skip_space(module); + + auto predicate = parse_statement(module, scope); + statement->for_each.predicate = predicate; + + skip_space(module); + + require_semicolon = false; + } break; + case StatementStartKeyword::while_st: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto condition = parse_value(module, scope, {}); + + skip_space(module); + expect_character(module, right_parenthesis); + skip_space(module); + + auto block = parse_block(module, scope); + + require_semicolon = false; + statement->while_st = { + .condition = condition, + .block = block, + }; + statement->id = StatementId::while_st; + } break; + case StatementStartKeyword::switch_st: + { + skip_space(module); + expect_character(module, left_parenthesis); + skip_space(module); + + auto discriminant = parse_value(module, scope, {}); + + skip_space(module); + expect_character(module, right_parenthesis); + + skip_space(module); + expect_character(module, left_brace); + + StatementSwitchClause clause_buffer[64]; + u64 clause_count = 0; + + while (1) + { + skip_space(module); + + bool is_else = false; + if (is_identifier_start(module->content[module->offset])) + { + auto else_checkpoint = get_checkpoint(module); + auto i = parse_identifier(module); + is_else = i.equal(str("else")); + if (!is_else) + { + set_checkpoint(module, else_checkpoint); + } + } + + + Slice clause_values = {}; + if (is_else) + { + skip_space(module); + + expect_character(module, '='); + expect_character(module, '>'); + } + else + { + Value* case_buffer[64]; + u64 case_count = 0; + + while (1) + { + auto case_value = parse_value(module, scope, {}); + case_buffer[case_count] = case_value; + case_count += 1; + + consume_character_if_match(module, ','); + + skip_space(module); + + if (consume_character_if_match(module, '=')) + { + expect_character(module, '>'); + break; + } + } + + clause_values = new_value_array(module, case_count); + memcpy(clause_values.pointer, case_buffer, case_count * sizeof(Value*)); + } + + skip_space(module); + + auto clause_block = parse_block(module, scope); + + clause_buffer[clause_count] = { + .values = clause_values, + .block = clause_block, + }; + clause_count += 1; + + consume_character_if_match(module, ','); + + skip_space(module); + + if (consume_character_if_match(module, right_brace)) + { + break; + } + } + + auto clauses = arena_allocate(module->arena, clause_count); + memcpy(clauses.pointer, clause_buffer, sizeof(clause_buffer[0]) * clause_count); + + require_semicolon = false; + + statement->switch_st = { + .discriminant = discriminant, + .clauses = clauses, + }; + statement->id = StatementId::switch_st; + } break; + case StatementStartKeyword::break_st: + { + statement->id = StatementId::break_st; + } break; + case StatementStartKeyword::continue_st: + { + statement->id = StatementId::continue_st; + } break; + case StatementStartKeyword::count: + { + set_checkpoint(module, checkpoint); + + auto left = parse_value(module, scope, { .kind = ValueKind::left }); + + skip_space(module); + + if (consume_character_if_match(module, ';')) + { + require_semicolon = false; + statement->expression = left; + statement->id = StatementId::expression; + } + else + { + auto token = tokenize(module); + + StatementAssignmentId id; + switch (token.id) + { + case TokenId::assign: id = StatementAssignmentId::assign; break; + case TokenId::assign_plus: id = StatementAssignmentId::assign_add; break; + case TokenId::assign_dash: id = StatementAssignmentId::assign_sub; break; + case TokenId::assign_asterisk: id = StatementAssignmentId::assign_mul; break; + case TokenId::assign_forward_slash: id = StatementAssignmentId::assign_div; break; + case TokenId::assign_percentage: id = StatementAssignmentId::assign_rem; break; + case TokenId::assign_shift_left: id = StatementAssignmentId::assign_shift_left; break; + case TokenId::assign_shift_right: id = StatementAssignmentId::assign_shift_right; break; + case TokenId::assign_ampersand: id = StatementAssignmentId::assign_and; break; + case TokenId::assign_bar: id = StatementAssignmentId::assign_or; break; + case TokenId::assign_caret: id = StatementAssignmentId::assign_xor; break; + default: trap_raw(); + } + + skip_space(module); + + auto right = parse_value(module, scope, {}); + statement->assignment = { + .left = left, + .right = right, + .id = id, + }; + statement->id = StatementId::assignment; + } + } break; + } + } + else + { + trap_raw(); + } + } break; + } + + if (require_semicolon) + { + expect_character(module, ';'); + } + + return statement; +} + +fn Block* parse_block(Module* module, Scope* parent_scope) +{ + auto* block = &arena_allocate(module->arena, 1)[0]; + *block = { + .scope = { + .parent = parent_scope, + .line = get_line(module), + .column = get_column(module), + .kind = ScopeKind::local, + }, + }; + auto* scope = &block->scope; + + expect_character(module, left_brace); + + Statement* current_statement = 0; + + while (true) + { + skip_space(module); + + if (module->offset == module->content.length) + { + break; + } + + if (consume_character_if_match(module, right_brace)) + { + break; + } + + auto* statement = parse_statement(module, scope); + + if (current_statement) + { + current_statement->next = statement; + } + + current_statement = statement; + } + + return block; +} + +void parse(Module* module) +{ + auto scope = &module->scope; + while (1) + { + skip_space(module); + + if (module->offset == module->content.length) + { + break; + } + + bool is_export = false; + bool is_extern = false; + + auto global_line = get_line(module); + auto global_column = get_column(module); + + if (consume_character_if_match(module, left_bracket)) + { + while (module->offset < module->content.length) + { + auto global_keyword_string = parse_identifier(module); + enum class GlobalKeyword + { + export_keyword, + extern_keyword, + count, + }; + String global_keyword_strings[] = { + str("export"), + str("extern"), + }; + static_assert(array_length(global_keyword_strings) == (u64)GlobalKeyword::count); + + u32 i; + for (i = 0; i < array_length(global_keyword_strings); i += 1) + { + String keyword = global_keyword_strings[i]; + if (keyword.equal(global_keyword_string)) + { + break; + } + } + + auto global_keyword = (GlobalKeyword)i; + switch (global_keyword) + { + case GlobalKeyword::export_keyword: + { + is_export = true; + } break; + case GlobalKeyword::extern_keyword: + { + is_extern = true; + } break; + case GlobalKeyword::count: + { + report_error(); + } + } + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + else + { + report_error(); + } + } + + skip_space(module); + } + + auto global_name = parse_identifier(module); + + Global* last_global = module->first_global; + while (last_global) + { + if (global_name.equal(last_global->variable.name)) + { + report_error(); + } + + if (!last_global->next) + { + break; + } + + last_global = last_global->next; + } + + Type* type_it = module->first_type; + Type* forward_declaration = 0; + while (type_it) + { + if (global_name.equal(type_it->name)) + { + if (type_it->id == TypeId::forward_declaration) + { + forward_declaration = type_it; + break; + } + else + { + report_error(); + } + } + + if (!type_it->next) + { + break; + } + + type_it = type_it->next; + } + + skip_space(module); + + Type* global_type = 0; + + if (consume_character_if_match(module, ':')) + { + skip_space(module); + + global_type = parse_type(module, scope); + + skip_space(module); + } + + expect_character(module, '='); + + skip_space(module); + + bool is_global_keyword = false; + + enum class GlobalKeyword + { + bits, + enumerator, + function, + macro, + structure, + typealias, + union_type, + count, + }; + + auto i = (backing_type(GlobalKeyword))GlobalKeyword::count; + + if (is_identifier_start(module->content[module->offset])) + { + auto checkpoint = get_checkpoint(module); + auto global_string = parse_identifier(module); + skip_space(module); + + String global_keywords[] = { + str("bits"), + str("enum"), + str("fn"), + str("macro"), + str("struct"), + str("typealias"), + str("union"), + }; + static_assert(array_length(global_keywords) == (u64)GlobalKeyword::count); + + for (i = 0; i < (backing_type(GlobalKeyword))GlobalKeyword::count; i += 1) + { + String global_keyword = global_keywords[i]; + if (global_string.equal(global_keyword)) + { + break; + } + } + + auto global_keyword = (GlobalKeyword)i; + switch (global_keyword) + { + case GlobalKeyword::bits: + { + auto is_implicit_type = module->content[module->offset] == left_brace; + Type* backing_type = 0; + if (!is_implicit_type) + { + backing_type = parse_type(module, scope); + } + + skip_space(module); + expect_character(module, left_brace); + + u64 field_bit_offset = 0; + u64 field_count = 0; + Field field_buffer[64]; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_brace)) { + break; + } + + auto field_line = get_line(module); + auto field_name = parse_identifier(module); + + skip_space(module); + expect_character(module, ':'); + skip_space(module); + + auto field_type = parse_type(module, scope); + + auto field_bit_count = get_bit_size(field_type); + + skip_space(module); + + consume_character_if_match(module, ','); + + field_buffer[field_count] = { + .name = field_name, + .type = field_type, + .offset = field_bit_offset, + .line = field_line, + }; + + field_bit_offset += field_bit_count; + field_count += 1; + } + + consume_character_if_match(module, ';'); + + auto fields = arena_allocate(module->arena, field_count); + memcpy(fields.pointer, field_buffer, sizeof(Field) * field_count); + + auto needed_bit_count = MAX(next_power_of_two(field_bit_offset), 8); + if (needed_bit_count > ~(u32)0) + { + report_error(); + } + + auto bit_count = (u32)needed_bit_count; + + if (!backing_type) + { + backing_type = integer_type(module, { .bit_count = bit_count, .is_signed = false }); + } + + if (backing_type->id != TypeId::integer) + { + report_error(); + } + + auto backing_type_bit_size = get_bit_size(backing_type); + if (backing_type_bit_size > 64) + { + report_error(); + } + + auto bits_type = type_allocate_init(module, { + .bits = { + .fields = fields, + .backing_type = backing_type, + .line = global_line, + .is_implicit_backing_type = is_implicit_type, + }, + .id = TypeId::bits, + .name = global_name, + }); + unused(bits_type); + } break; + case GlobalKeyword::enumerator: + { + auto is_implicit_type = module->content[module->offset] == left_brace; + Type* backing_type = 0; + if (!is_implicit_type) + { + backing_type = parse_type(module, scope); + } + + skip_space(module); + expect_character(module, left_brace); + + u64 highest_value = 0; + u64 lowest_value = ~(u64)0; + + u64 field_count = 0; + String name_buffer[64]; + Value* value_buffer[64]; + u64 int_value_buffer[64]; + + bool is_resolved = true; + bool implicit_value = false; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_brace)) { + break; + } + + auto field_index = field_count; + field_count += 1; + + auto field_name = parse_identifier(module); + + skip_space(module); + + u64 field_integer_value = field_index; + Value* field_value = 0; + + if (consume_character_if_match(module, '=')) + { + skip_space(module); + auto field_value = parse_value(module, scope, {}); + if (is_resolved) + { + if (field_value->is_constant()) + { + switch (field_value->id) + { + case ValueId::constant_integer: + { + field_integer_value = field_value->constant_integer.value; + } break; + default: trap_raw(); + } + } + else + { + trap_raw(); + } + } + else + { + trap_raw(); + } + } + else + { + if (!is_resolved) + { + report_error(); + } + } + + skip_space(module); + consume_character_if_match(module, ','); + } + + if (is_resolved) + { + auto fields = arena_allocate(module->arena, field_count); + u64 highest_value = 0; + auto lowest_value = ~(u64)0; + + for (u64 i = 0; i < field_count; i += 1) + { + auto value = int_value_buffer[i]; + fields[i] = { + .name = name_buffer[i], + .value = value, + }; + } + + auto needed_bit_count = 64 - (u32)clz(highest_value); + needed_bit_count = needed_bit_count ? needed_bit_count : 1; + + if (!backing_type) + { + backing_type = integer_type(module, { .bit_count = needed_bit_count, .is_signed = false }); + } + + auto enum_type = type_allocate_init(module, { + .enumerator = { + .fields = fields, + .backing_type = backing_type, + .line = global_line, + }, + .id = TypeId::enumerator, + .name = global_name, + }); + + unused(enum_type); + } + else + { + trap_raw(); + } + } break; + case GlobalKeyword::function: + { + auto calling_convention = CallingConvention::c; + auto function_attributes = FunctionAttributes{}; + bool is_variable_arguments = false; + + if (consume_character_if_match(module, left_bracket)) + { + while (module->offset < module->content.length) + { + auto function_identifier = parse_identifier(module); + + enum class FunctionKeyword + { + cc, + count, + }; + + String function_keywords[] = { + str("cc"), + }; + static_assert(array_length(function_keywords) == (u64)FunctionKeyword::count); + + backing_type(FunctionKeyword) i; + for (i = 0; i < (backing_type(FunctionKeyword))(FunctionKeyword::count); i += 1) + { + auto function_keyword = function_keywords[i]; + if (function_keyword.equal(function_identifier)) + { + break; + } + } + + auto function_keyword = (FunctionKeyword)i; + skip_space(module); + + switch (function_keyword) + { + case FunctionKeyword::cc: + { + expect_character(module, left_parenthesis); + skip_space(module); + auto calling_convention_string = parse_identifier(module); + String calling_conventions[] = { + str("c"), + }; + static_assert(array_length(calling_conventions) == (u64)CallingConvention::count); + + backing_type(CallingConvention) i; + for (i = 0; i < (backing_type(CallingConvention))CallingConvention::count; i += 1) + { + auto calling_convention = calling_conventions[i]; + if (calling_convention.equal(calling_convention_string)) + { + break; + } + } + + auto candidate_calling_convention = (CallingConvention)i; + if (candidate_calling_convention == CallingConvention::count) + { + report_error(); + } + + calling_convention = candidate_calling_convention; + + skip_space(module); + expect_character(module, right_parenthesis); + } break; + case FunctionKeyword::count: + { + report_error(); + } break; + } + + skip_space(module); + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + else + { + report_error(); + } + } + } + + skip_space(module); + + expect_character(module, left_parenthesis); + + Type* semantic_argument_type_buffer[64]; + String semantic_argument_name_buffer[64]; + u32 argument_line_buffer[64]; + u32 semantic_argument_count = 0; + + while (module->offset < module->content.length) + { + skip_space(module); + + if (consume_character_if_match(module, '.')) + { + expect_character(module, '.'); + expect_character(module, '.'); + skip_space(module); + expect_character(module, right_parenthesis); + is_variable_arguments = true; + break; + } + + if (consume_character_if_match(module, right_parenthesis)) + { + break; + } + + auto line = get_line(module); + argument_line_buffer[semantic_argument_count] = line; + + auto argument_name = parse_identifier(module); + semantic_argument_name_buffer[semantic_argument_count] = argument_name; + + skip_space(module); + + expect_character(module, ':'); + + skip_space(module); + + auto argument_type = parse_type(module, scope); + semantic_argument_type_buffer[semantic_argument_count] = argument_type; + + skip_space(module); + + unused(consume_character_if_match(module, ',')); + + semantic_argument_count += 1; + } + + skip_space(module); + + auto return_type = parse_type(module, scope); + + skip_space(module); + + Slice argument_types = {}; + if (semantic_argument_count != 0) + { + argument_types = new_type_array(module, semantic_argument_count); + memcpy(argument_types.pointer, semantic_argument_type_buffer, semantic_argument_count * sizeof(Type*)); + } + + auto is_declaration = consume_character_if_match(module, ';'); + + auto function_type = type_allocate_init(module, { + .function = { + .semantic_return_type = return_type, + .semantic_argument_types = argument_types, + .calling_convention = calling_convention, + .is_variable_arguments = is_variable_arguments, + }, + .id = TypeId::function, + .name = str(""), + }); + + auto storage = new_value(module); + *storage = { + .type = get_pointer_type(module, function_type), + .id = ValueId::external_function, + // TODO? .kind = ValueKind::left, + }; + auto global = new_global(module); + *global = { + .variable = { + .storage = storage, + .initial_value = 0, + .type = function_type, + .scope = scope, + .name = global_name, + .line = global_line, + .column = global_column, + }, + .linkage = (is_export | is_extern) ? Linkage::external : Linkage::internal, + }; + module->current_function = global; + + if (!is_declaration) + { + Slice arguments = arena_allocate(module->arena, semantic_argument_count); + for (u32 i = 0; i < semantic_argument_count; i += 1) + { + Argument* argument = &arguments[i]; + auto name = semantic_argument_name_buffer[i]; + auto* type = semantic_argument_type_buffer[i]; + auto line = argument_line_buffer[i]; + + *argument = { + .variable = { + .storage = 0, + .initial_value = 0, + .type = type, + .scope = &storage->function.scope, + .name = name, + .line = line, + .column = 0, + }, + .index = i, + }; + } + + storage->function = { + .arguments = arguments, + .scope = { + .parent = scope, + .line = global_line, + .column = global_column, + .kind = ScopeKind::function, + }, + .block = 0, + .attributes = function_attributes, + }; + storage->id = ValueId::function; + + storage->function.block = parse_block(module, &storage->function.scope); + } + } break; + case GlobalKeyword::macro: + { + Type* type_argument_buffer[64]; + u64 type_argument_count = 0; + + ConstantArgument constant_argument_buffer[64]; + u64 constant_argument_count = 0; + + auto is_generic = consume_character_if_match(module, left_bracket); + + if (is_generic) + { + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + + auto argument_name = parse_identifier(module); + + skip_space(module); + + auto has_value = consume_character_if_match(module, ':'); + + auto constant_argument_index = constant_argument_count; + + if (has_value) + { + trap_raw(); // TODO + } + else + { + auto ty = type_allocate_init(module, { + .id = TypeId::unresolved, + .name = argument_name, + }); + + constant_argument_buffer[constant_argument_index] = { + .name = argument_name, + .type = ty, + .id = ConstantArgumentId::type, + }; + } + + constant_argument_count += 1; + } + + skip_space(module); + } + + expect_character(module, left_parenthesis); + + if (is_generic) + { + if (constant_argument_count == 0) + { + report_error(); + } + } + else + { + assert(constant_argument_count == 0); + } + + auto constant_arguments = arena_allocate(module->arena, constant_argument_count); + memcpy(constant_arguments.pointer, constant_argument_buffer, sizeof(constant_argument_buffer[0]) * constant_argument_count); + + auto macro_declaration = &arena_allocate(module->arena, 1)[0]; + *macro_declaration = { + .arguments = {}, + .constant_arguments = constant_arguments, + .return_type = 0, + .block = 0, + .name = global_name, + .scope = { + .parent = scope, + .line = global_line, + .column = global_column, + .kind = ScopeKind::macro_declaration, + }, + }; + + if (module->last_macro_declaration) + { + assert(module->first_macro_declaration); + trap_raw(); + } + else + { + assert(!module->first_macro_declaration); + module->first_macro_declaration = macro_declaration; + module->last_macro_declaration = macro_declaration; + } + + module->current_macro_declaration = macro_declaration; + + auto scope = ¯o_declaration->scope; + + Argument argument_buffer[64]; + u32 argument_count = 0; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_parenthesis)) + { + break; + } + + auto argument_index = argument_count; + auto argument_line = get_line(module); + auto argument_column = get_column(module); + + auto argument_name = parse_identifier(module); + + skip_space(module); + expect_character(module, ':'); + skip_space(module); + + auto argument_type = parse_type(module, scope); + + auto argument = &argument_buffer[argument_count]; + *argument = { + .variable = { + .storage = 0, + .initial_value = 0, + .type = argument_type, + .scope = scope, + .name = argument_name, + .line = argument_line, + .column = argument_column, + }, + .index = argument_index, + }; + argument_count += 1; + + skip_space(module); + + consume_character_if_match(module, ','); + } + + skip_space(module); + + auto return_type = parse_type(module, scope); + macro_declaration->return_type = return_type; + + auto arguments = arena_allocate(module->arena, argument_count); + memcpy(arguments.pointer, argument_buffer, sizeof(argument_buffer[0]) * argument_count); + macro_declaration->arguments = arguments; + + skip_space(module); + + auto block = parse_block(module, scope); + macro_declaration->block = block; + + // END OF SCOPE + module->current_macro_declaration = 0; + } break; + case GlobalKeyword::structure: + { + skip_space(module); + + Type* struct_type; + if (forward_declaration) + { + trap_raw(); + } + else + { + struct_type = type_allocate_init(module, { + .id = TypeId::forward_declaration, + .name = global_name, + }); + } + + if (consume_character_if_match(module, left_brace)) + { + Field field_buffer[256]; + + u64 byte_size = 0; + u32 byte_alignment = 1; + + u32 field_count = 0; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_brace)) + { + break; + } + + auto field_index = field_count; + auto field_line = get_line(module); + auto field_name = parse_identifier(module); + + skip_space(module); + expect_character(module, ':'); + skip_space(module); + + auto field_type = parse_type(module, scope); + + auto field_byte_alignment = get_byte_alignment(field_type); + auto field_byte_size = get_byte_size(field_type); + // Align struct size by field alignment + auto field_byte_offset = align_forward(byte_size, field_byte_alignment); + + field_buffer[field_index] = { + .name = field_name, + .type = field_type, + .offset = byte_size, + .line = field_line, + }; + + byte_size = field_byte_offset + field_byte_size; + byte_alignment = MAX(byte_alignment, field_byte_alignment); + + skip_space(module); + + consume_character_if_match(module, ','); + + field_count += 1; + } + + skip_space(module); + consume_character_if_match(module, ';'); + + auto fields = arena_allocate(module->arena, field_count); + memcpy(fields.pointer, field_buffer, sizeof(Field) * field_count); + + struct_type->structure = { + .fields = fields, + .byte_size = byte_size, + .byte_alignment = byte_alignment, + .line = global_line, + .is_slice = false, + .next = 0, + }; + struct_type->id = TypeId::structure; + } + else + { + expect_character(module, ';'); + } + } break; + case GlobalKeyword::typealias: + { + auto aliased_type = parse_type(module, scope); + + if (!consume_character_if_match(module, ';')) + { + report_error(); + } + + auto alias_type = type_allocate_init(module, { + .alias = { + .type = aliased_type, + .scope = scope, + .line = global_line, + }, + .id = TypeId::alias, + .name = global_name, + }); + unused(alias_type); + } break; + case GlobalKeyword::union_type: + { + skip_space(module); + expect_character(module, left_brace); + + Type* union_type; + if (forward_declaration) + { + union_type = forward_declaration; + } + else + { + union_type = type_allocate_init(module, { + .id = TypeId::forward_declaration, + .name = global_name, + }); + } + + u32 field_count = 0; + u32 biggest_field = 0; + u32 byte_alignment = 1; + u32 byte_size = 0; + + UnionField field_buffer[64]; + + while (1) + { + skip_space(module); + + if (consume_character_if_match(module, right_brace)) + { + break; + } + + auto field_index = field_count; + field_count += 1; + + auto field_line = get_line(module); + auto field_name = parse_identifier(module); + + skip_space(module); + expect_character(module, ':'); + skip_space(module); + + auto field_type = parse_type(module, scope); + + auto field_byte_alignment = get_byte_alignment(field_type); + auto field_byte_size = get_byte_size(field_type); + + field_buffer[field_index] = UnionField{ + .type = field_type, + .name = field_name, + .line = field_line, + }; + + biggest_field = byte_size > field_byte_size ? field_index : biggest_field; + byte_alignment = MAX(byte_alignment, field_byte_alignment); + byte_size = MAX(byte_size, field_byte_size); + + skip_space(module); + + consume_character_if_match(module, ','); + } + + skip_space(module); + consume_character_if_match(module, ';'); + + auto fields = arena_allocate(module->arena, field_count); + memcpy(fields.pointer, field_buffer, sizeof(field_buffer[0]) * field_count); + + union_type->union_type = { + .fields = fields, + .byte_size = byte_size, + .byte_alignment = byte_alignment, + .line = global_line, + .biggest_field = biggest_field, + }; + union_type->id = TypeId::union_type; + } break; + case GlobalKeyword::count: + { + set_checkpoint(module, checkpoint); + } break; + } + } + + if (i == (backing_type(GlobalKeyword))GlobalKeyword::count) + { + auto initial_value = parse_value(module, scope, {}); + skip_space(module); + expect_character(module, ';'); + + auto global_storage = new_value(module); + *global_storage = { + .id = ValueId::global, + }; + + auto global = new_global(module); + *global = { + .variable = { + .storage = global_storage, + .initial_value = initial_value, + .type = global_type, + .scope = scope, + .name = global_name, + .line = global_line, + .column = global_column, + }, + .linkage = Linkage::internal, // TODO: linkage + }; + } + } +}