diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..0bc13c3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.15) +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE) +endif() +# Set C++ standard +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +project(bb) + +add_executable(bb + src/compiler.cpp + src/entry_point.cpp + src/parser.cpp + src/emitter.cpp +) + +target_include_directories(bb PUBLIC src) +target_compile_definitions(bb PUBLIC + $<$:BB_DEBUG=1> + $<$>:BB_DEBUG=0> +) +#target_compile_options(bb PRIVATE -fsanitize=address) +#target_link_options(bb PRIVATE -fsanitize=address) diff --git a/build.sh b/build.sh index 09b3c32..eead4ed 100755 --- a/build.sh +++ b/build.sh @@ -1,61 +1,4 @@ -#!/usr/bin/env bash set -eu - -MY_CWD=$PWD - -if [[ -z "${BB_CI-}" ]]; then - BB_CI=0 -fi - -if [[ -z "${BB_BUILD_TYPE-}" ]]; then - BB_BUILD_TYPE=debug -fi - -if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then - BB_ERROR_ON_WARNINGS=$BB_CI -fi - -if [[ -z "${BB_ERROR_LIMIT-}" ]]; then - BB_ERROR_LIMIT=$((1 - BB_CI)) -fi - -BB_COMPILE_SHADERS=0 - -BUILD_DIR=cache -LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets -mkdir -p $BUILD_DIR - -if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then - cd $BUILD_DIR - wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s - cd $MY_CWD -fi - -if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then - glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet - glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet -fi - -BUILD_OUT=$BUILD_DIR/build -C_COMPILER=clang -TIME_TRACE=1 -BB_TIMETRACE=0 -GCC_ARGS= -CLANG_ARGS= -TIME_TRACE_ARG= - -if [[ $C_COMPILER == "clang"* ]]; then - CLANG_ARGS=-ferror-limit=1 - if [[ "$TIME_TRACE" == "1" ]]; then - CLANG_ARGS="$CLANG_ARGS -ftime-trace" - BB_TIMETRACE=1 - else - CLANG_ARGS="$CLANG_ARGS -ftime-trace" - fi -elif [[ $C_COMPILER == "gcc"* ]]; then - GCC_ARGS=-fmax-errors=1 -fi - -$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT -$BUILD_OUT $@ -exit 0 +cd build +ninja --quiet +cd .. diff --git a/generate.sh b/generate.sh new file mode 100755 index 0000000..6c7264a --- /dev/null +++ b/generate.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -eu +rm -rf build +mkdir build +cd build +cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +cd .. diff --git a/old_build.sh b/old_build.sh new file mode 100755 index 0000000..09b3c32 --- /dev/null +++ b/old_build.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -eu + +MY_CWD=$PWD + +if [[ -z "${BB_CI-}" ]]; then + BB_CI=0 +fi + +if [[ -z "${BB_BUILD_TYPE-}" ]]; then + BB_BUILD_TYPE=debug +fi + +if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then + BB_ERROR_ON_WARNINGS=$BB_CI +fi + +if [[ -z "${BB_ERROR_LIMIT-}" ]]; then + BB_ERROR_LIMIT=$((1 - BB_CI)) +fi + +BB_COMPILE_SHADERS=0 + +BUILD_DIR=cache +LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets +mkdir -p $BUILD_DIR + +if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then + cd $BUILD_DIR + wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s + cd $MY_CWD +fi + +if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then + glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet + glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet +fi + +BUILD_OUT=$BUILD_DIR/build +C_COMPILER=clang +TIME_TRACE=1 +BB_TIMETRACE=0 +GCC_ARGS= +CLANG_ARGS= +TIME_TRACE_ARG= + +if [[ $C_COMPILER == "clang"* ]]; then + CLANG_ARGS=-ferror-limit=1 + if [[ "$TIME_TRACE" == "1" ]]; then + CLANG_ARGS="$CLANG_ARGS -ftime-trace" + BB_TIMETRACE=1 + else + CLANG_ARGS="$CLANG_ARGS -ftime-trace" + fi +elif [[ $C_COMPILER == "gcc"* ]]; then + GCC_ARGS=-fmax-errors=1 +fi + +$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT +$BUILD_OUT $@ +exit 0 diff --git a/src/compiler.cpp b/src/compiler.cpp new file mode 100644 index 0000000..e704998 --- /dev/null +++ b/src/compiler.cpp @@ -0,0 +1,323 @@ +#include + +fn void compile(Arena& arena, Options options) +{ + auto base_allocation_type_count = i128_offset + // 64 * 2 for basic integer types + 2 + // u128, s128 + 2; // void, noreturn + auto base_type_allocation = arena_allocate(arena, base_allocation_type_count); + + auto* type_it = base_type_allocation.pointer; + + bool signs[] = {false, true}; + Type* previous = 0; + + for (bool sign: signs) + { + for (u32 bit_index = 0; bit_index < 64; bit_index += 1) + { + auto bit_count = bit_index + 1; + auto first_digit = (u8)(bit_count < 10 ? bit_count % 10 + '0' : bit_count / 10 + '0'); + auto second_digit = (u8)(bit_count > 9 ? bit_count % 10 + '0' : 0); + u8 name_buffer[] = { u8(sign ? 's' : 'u'), first_digit, second_digit }; + u64 name_length = 2 + (bit_count > 9); + auto name_stack = String{name_buffer, name_length}; + + auto name = arena_duplicate_string(arena, name_stack); + + *type_it = { + .integer = { + .bit_count = bit_count, + .is_signed = sign, + }, + .id = TypeId::integer, + .name = name, + }; + if (previous) previous->next = type_it; + previous = type_it; + type_it += 1; + } + } + + for (bool sign: signs) + { + auto name = sign ? str("s128") : str("u128"); + *type_it = { + .integer = { + .bit_count = 128, + .is_signed = sign, + }, + .id = TypeId::integer, + .name = name, + .next = previous, + }; + if (previous) previous->next = type_it; + previous = type_it; + type_it += 1; + } + + auto void_type = type_it; + type_it += 1; + auto noreturn_type = type_it; + type_it += 1; + assert(type_it - base_type_allocation.pointer == base_allocation_type_count); + + previous->next = void_type; + *void_type = { + .id = TypeId::void_type, + .name = str("void"), + .next = noreturn_type, + }; + *noreturn_type = { + .id = TypeId::noreturn, + .name = str("noreturn"), + }; + + auto module = Module{ + .arena = arena, + .content = options.content, + .first_type = base_type_allocation.pointer, + .last_type = noreturn_type, + .scope = { + .kind = ScopeKind::global, + }, + .name = options.name, + .path = options.path, + .executable = options.executable, + .objects = options.objects, + .target = options.target, + .build_mode = options.build_mode, + .has_debug_info = options.has_debug_info, + .silent = options.silent, + }; + module.void_value = new_value(module); + *module.void_value = { + .type = void_type, + .id = ValueId::infer_or_ignore, + }; + + parse(module); + emit(module); +} + +fn void compile_file(Arena& arena, Compile options) +{ + auto relative_file_path = options.relative_file_path; + if (relative_file_path.length < 5) + { + fail(); + } + + auto extension_start = string_last_character(relative_file_path, '.'); + if (extension_start == string_no_match) + { + fail(); + } + + if (!relative_file_path(extension_start).equal(str(".bbb"))) + { + fail(); + } + + auto separator_index = string_last_character(relative_file_path, '/'); + separator_index = separator_index == string_no_match ? 0 : separator_index; + + auto base_start = separator_index + (separator_index != 0 || relative_file_path[separator_index] == '/'); + auto base_name = relative_file_path(base_start, extension_start); + + auto is_compiler = relative_file_path.equal(str("src/compiler.bbb")); + + String output_path_dir_parts[] = { + str(base_cache_dir), + is_compiler ? str("/compiler") : str("/"), + build_mode_to_string(options.build_mode), + str("_"), + options.has_debug_info ? str("di") : str("nodi"), + }; + auto output_path_dir = arena_join_string(arena, array_to_slice(output_path_dir_parts)); + + make_directory(base_cache_dir); + + if (is_compiler) + { + make_directory(base_cache_dir "/compiler"); + } + + make_directory(cstr(output_path_dir)); + + String output_path_base_parts[] = { + output_path_dir, + str("/"), + base_name, + }; + auto output_path_base = arena_join_string(arena, array_to_slice(output_path_base_parts)); + String output_object_path_parts[] = { + output_path_base, + str(".o"), + }; + auto output_object_path = arena_join_string(arena, array_to_slice(output_object_path_parts)); + auto output_executable_path = output_path_base; + + auto file_content = file_read(arena, relative_file_path); + auto file_path = path_absolute(arena, relative_file_path); + auto c_abi_object_path = str("build/c_abi.o"); + + String objects[] = { + c_abi_object_path, + output_object_path, + }; + Slice object_slice = array_to_slice(objects); + object_slice = object_slice(!base_name.equal(str("c_abi"))); + + compile(arena, { + .content = file_content, + .path = file_path, + .executable = output_executable_path, + .name = base_name, + .objects = object_slice, + .target = { + .cpu = CPUArchitecture::x86_64, + .os = OperatingSystem::linux_, + }, + .build_mode = options.build_mode, + .has_debug_info = options.has_debug_info, + .silent = options.silent, + }); +} + +global_variable String names[] = { + str("minimal"), +}; + +void entry_point(Slice arguments, Slice environment) +{ + Arena& arena = arena_initialize_default(8 * mb); + + if (arguments.length < 2) + { + fail_with_message(str("error: Not enough arguments\n")); + } + + String command_string = c_string_to_slice(arguments[1]); + String command_strings[] = { + str("compile"), + str("test"), + }; + static_assert(array_length(command_strings) == (u64)Command::count); + + backing_type(Command) i; + for (i = 0; i < (backing_type(Command))Command::count; i += 1) + { + String candidate = command_strings[i]; + if (candidate.equal(command_string)) + { + break; + } + } + + auto command = (Command)i; + + switch (command) + { + case Command::compile: + { + if (arguments.length < 3) + { + fail_with_message(str("Not enough arguments for command 'compile'\n")); + } + + auto build_mode = BuildMode::debug_none; + auto has_debug_info = true; + + if (arguments.length >= 4) + { + auto build_mode_string = c_string_to_slice(arguments[3]); + String build_mode_strings[] = { + str("debug_none"), + str("debug"), + str("soft_optimize"), + str("optimize_for_speed"), + str("optimize_for_size"), + str("aggressively_optimize_for_speed"), + str("aggressively_optimize_for_size"), + }; + + backing_type(BuildMode) i; + for (i = 0; i < (backing_type(BuildMode))BuildMode::count; i += 1) + { + String candidate = build_mode_strings[i]; + if (build_mode_string.equal(candidate)) + { + break; + } + } + + build_mode = (BuildMode)i; + if (build_mode == BuildMode::count) + { + fail_with_message(str("Invalid build mode\n")); + } + } + + if (arguments.length >= 5) + { + auto has_debug_info_string = c_string_to_slice(arguments[3]); + if (has_debug_info_string.equal(str("true"))) + { + has_debug_info = true; + } + else if (has_debug_info_string.equal(str("false"))) + { + has_debug_info = false; + } + else + { + fail_with_message(str("Wrong value for has_debug_info\n")); + } + } + + auto relative_file_path = c_string_to_slice(arguments[2]); + + compile_file(arena, { + .relative_file_path = relative_file_path, + .build_mode = build_mode, + .has_debug_info = has_debug_info, + .silent = false, + }); + } break; + case Command::test: + { + // TODO: provide more arguments + if (arguments.length != 2) + { + fail_with_message(str("error: 'test' command takes no arguments")); + } + + // TODO: introduce build mode, debug info switch, etc + + for (auto name: names) + { + auto position = arena.position; + + String relative_file_path_parts[] = { str("tests/"), name, str(".bbb") }; + auto relative_file_path = arena_join_string(arena, array_to_slice(relative_file_path_parts)); + auto build_mode = BuildMode::debug_none; + bool has_debug_info = true; + compile_file(arena, { + .relative_file_path = relative_file_path, + .build_mode = build_mode, + .has_debug_info = has_debug_info, + .silent = false, + }); + + // TODO: introduce test + + arena_restore(arena, position); + } + } break; + case Command::count: + { + fail_with_message(str("error: Invalid command\n")); + } break; + } +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..8f6ac99 --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,411 @@ +#pragma once + +#include + +#define report_error() trap_raw() + +enum class Command +{ + compile, + test, + count, +}; + +enum class BuildMode +{ + debug_none, + debug, + soft_optimize, + optimize_for_speed, + optimize_for_size, + aggressively_optimize_for_speed, + aggressively_optimize_for_size, + count, +}; + +fn String build_mode_to_string(BuildMode build_mode) +{ + switch (build_mode) + { + case_to_name(BuildMode, debug_none); + case_to_name(BuildMode, debug); + case_to_name(BuildMode, soft_optimize); + case_to_name(BuildMode, optimize_for_speed); + case_to_name(BuildMode, optimize_for_size); + case_to_name(BuildMode, aggressively_optimize_for_speed); + case_to_name(BuildMode, aggressively_optimize_for_size); + case BuildMode::count: unreachable(); + } +} + +enum class CPUArchitecture +{ + x86_64, +}; + +enum class OperatingSystem +{ + linux_, +}; + +struct Type; +struct Value; +struct Local; +struct Global; +struct Block; +struct Statement; + +struct Target +{ + CPUArchitecture cpu; + OperatingSystem os; +}; + +struct Compile +{ + String relative_file_path; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +#define base_cache_dir "bb-cache" + +enum class CallingConvention +{ + c, + count, +}; + +enum class InlineBehavior +{ + normal, + always_inline, + no_inline, + inline_hint, +}; + +struct FunctionAttributes +{ + InlineBehavior inline_behavior; + bool naked; +}; + +enum class TypeId +{ + void_type, + noreturn, + forward_declaration, + integer, + function, + pointer, +}; + +struct TypeInteger +{ + u32 bit_count; + bool is_signed; +}; + +struct TypeFunction +{ + Type* semantic_return_type; + Slice semantic_argument_types; + CallingConvention calling_convention; + bool is_variable_arguments; +}; + +struct TypePointer +{ + Type* element_type; + Type* next; +}; + +struct Type +{ + union + { + TypeInteger integer; + TypeFunction function; + TypePointer pointer; + }; + TypeId id; + String name; + Type* next; +}; + +enum class ScopeKind +{ + global, + function, + local, + for_each, + macro_declaration, + macro_instantiation, +}; + +struct Scope +{ + Scope* parent; + u32 line; + u32 column; + ScopeKind kind; +}; + +enum class StatementId +{ + local, + expression, + return_st, +}; + +struct Statement +{ + union + { + Local* local; + Value* expression; + Value* return_st; + }; + Statement* next; + StatementId id; + u32 line; + u32 column; +}; + +struct Block +{ + Local* first_local; + Local* last_local; + Statement* first_statement; + Scope scope; +}; + +enum class ValueId +{ + infer_or_ignore, + external_function, + function, + constant_integer, +}; + +struct ValueConstantInteger +{ + u64 value; + bool is_signed; +}; + +struct ValueFunction +{ + Slice arguments; + Scope scope; + Block* block; + FunctionAttributes attributes; +}; + +enum class ValueKind +{ + right, + left, +}; + +struct Value +{ + Type* type; + union + { + ValueConstantInteger constant_integer; + ValueFunction function; + }; + ValueId id; +}; + +struct Variable +{ + Value* storage; + Value* initial_value; + Type* type; + Scope* scope; + String name; + u32 line; + u32 column; +}; + +enum class Linkage +{ + internal, + external, +}; + +struct Global +{ + Variable variable; + Linkage linkage; + Global* next; +}; + +struct Local +{ + Variable variable; +}; + +struct MacroDeclaration +{ +}; + +struct MacroInstantiation +{ +}; + +struct Module +{ + Arena& arena; + String content; + u64 offset; + u64 line_offset; + u64 line_character_offset; + + Type* first_pointer_type; + Type* first_slice_type; + Type* first_pair_struct_type; + Type* first_array_type; + + Type* first_type; + Type* last_type; + Type* va_list_type; + + Value* void_value; + Global* first_global; + Global* last_global; + Global* current_function; + MacroDeclaration* current_macro_declaration; + MacroInstantiation* current_macro_instantiation; + + Scope scope; + + String name; + String path; + String executable; + Sliceobjects; + + Target target; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +constexpr u64 i128_offset = 64 * 2; +constexpr u64 void_offset = i128_offset + 2; + +fn Type* integer_type(Module& module, TypeInteger integer) +{ + assert(integer.bit_count); + assert(integer.bit_count <= 64 || integer.bit_count == 128); + auto index = integer.bit_count ? (i128_offset + integer.is_signed) : (integer.bit_count - 1 + (64 * integer.is_signed)); + return module.first_type + index; +} + +fn Type* void_type(Module& module) +{ + return module.first_type + void_offset; +} + +fn Type* noreturn_type(Module& module) +{ + return void_type(module) + 1; +} + +struct Options +{ + String content; + String path; + String executable; + String name; + Slice objects; + Target target; + BuildMode build_mode; + bool has_debug_info; + bool silent; +}; + +fn Type* type_allocate_init(Module& module, Type type) +{ + auto* result = &arena_allocate(module.arena, 1)[0]; + *result = type; + + if (module.last_type) + { + module.last_type->next = result; + module.last_type = result; + } + else + { + assert(!module.first_type); + module.first_type = result; + module.last_type = result; + } + + return result; +} + +fn Value* new_value(Module& module) +{ + auto* result = &arena_allocate(module.arena, 1)[0]; + return result; +} + +fn Global* new_global(Module& module) +{ + auto* result = &arena_allocate(module.arena, 1)[0]; + + if (module.last_global) + { + module.last_global->next = result; + module.last_global = result; + } + else + { + assert(!module.first_global); + module.first_global = result; + module.last_global = result; + } + + return result; +} + +fn Type* get_pointer_type(Module& module, Type* element_type) +{ + auto last_pointer_type = module.first_pointer_type; + while (last_pointer_type) + { + assert(last_pointer_type->id == TypeId::pointer); + if (last_pointer_type->pointer.element_type == element_type) + { + return last_pointer_type; + } + + if (!last_pointer_type->pointer.next) + { + break; + } + + last_pointer_type = last_pointer_type->pointer.next; + } + + String name_parts[] = { + str("&"), + element_type->name, + }; + auto result = type_allocate_init(module, { + .pointer = { + .element_type = element_type, + }, + .id = TypeId::pointer, + .name = arena_join_string(module.arena, array_to_slice(name_parts)), + }); + + return result; +} + +void parse(Module& module); +void emit(Module& module); diff --git a/src/emitter.cpp b/src/emitter.cpp new file mode 100644 index 0000000..6c71d81 --- /dev/null +++ b/src/emitter.cpp @@ -0,0 +1,5 @@ +#include +void emit(Module& module) +{ + +} diff --git a/src/entry_point.cpp b/src/entry_point.cpp new file mode 100644 index 0000000..8644096 --- /dev/null +++ b/src/entry_point.cpp @@ -0,0 +1,13 @@ +#include +void entry_point(Slice arguments, Slice environment); +int main(int argc, const char* argv[], const char* envp[]) +{ + auto* envp_end = envp; + while (*envp_end) + { + envp_end += 1; + } + + entry_point({argv, (u64)argc}, {envp, (u64)(envp_end - envp)}); + return 0; +} diff --git a/src/entry_point.h b/src/entry_point.h new file mode 100644 index 0000000..8ef2889 --- /dev/null +++ b/src/entry_point.h @@ -0,0 +1,2 @@ +#include + diff --git a/src/lib.h b/src/lib.h new file mode 100644 index 0000000..4b2a92d --- /dev/null +++ b/src/lib.h @@ -0,0 +1,651 @@ +#pragma once + +#define global_variable static + +#define fn static +#define unused(x) (void)(x) +#define breakpoint() __builtin_debugtrap() +#define string_literal_length(s) (sizeof(s) - 1) +#define string_literal(s) ((String){ .pointer = (u8*)(s), .length = string_literal_length(s), }) +#define str(x) string_literal(x) + +#define array_length(arr) sizeof(arr) / sizeof((arr)[0]) +#define array_to_slice(arr) { .pointer = (arr), .length = array_length(arr) } +#define array_to_bytes(arr) { .pointer = (u8*)(arr), .length = sizeof(arr) } +#define backing_type(E) __underlying_type(E) + +#define unreachable_raw() __builtin_unreachable() +#define trap_raw() __builtin_trap() +#if BB_DEBUG +#define unreachable() trap_raw() +#else +#define unreachable() unreachable_raw() +#endif + +#define expect(x, b) __builtin_expect(!!(x), b) +#define likely(x) expect(x, 1) +#define unlikely(x) expect(x, 0) + +#define assert(x) (unlikely(!(x)) ? unreachable() : unused(0)) + +#define case_to_name(E,n) case E::n: return str(#n) + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; + +typedef signed char s8; +typedef signed short s16; +typedef signed int s32; +typedef signed long s64; + +typedef float f32; +typedef double f64; + +fn u64 align_forward(u64 value, u64 alignment) +{ + assert(alignment != 0); + auto mask = alignment - 1; + auto result = (value + mask) & ~mask; + return result; +} + +constexpr u64 kb = 1024; +constexpr u64 mb = 1024 * 1024; +constexpr u64 gb = 1024 * 1024 * 1024; + +extern "C" [[noreturn]] void exit(s32 status); +extern "C" void *memcpy (void* __restrict destination, const void *__restrict source, u64 byte_count); +extern "C" void *memcmp (const void* a, const void *b, u64 __n); +extern "C" const char* realpath(const char* __restrict path, char* resolved_path); + +struct RawSlice +{ + void* pointer; + u64 length; +}; + +fn bool raw_slice_equal(RawSlice a, RawSlice b, u64 size_of_T) +{ + bool result = a.length == b.length; + if (result) + { + if (a.pointer != b.pointer) + { + result = memcmp(a.pointer, b.pointer, a.length * size_of_T) == 0; + } + } + + return result; +} + +fn RawSlice raw_slice_slice(RawSlice s, u64 start, u64 end, u64 size_of_T) +{ + return {(u8*)s.pointer + (size_of_T * start), end - start}; +} + +template +struct Slice +{ + T* pointer; + u64 length; + + T* begin() { return pointer; } + T* end() { return pointer + length; } + + T& operator[](u64 index) + { + assert(index < length); + return pointer[index]; + } + + bool equal(Slice other) + { + return raw_slice_equal(*(RawSlice*)this, *(RawSlice*)&other, sizeof(T)); + } + + Slice operator()(u64 start, u64 end) + { + return {pointer + start, end - start}; + } + + Slice operator()(u64 start) + { + return {pointer + start, length - start}; + } +}; + +using String = Slice; +fn const char* cstr(String string) +{ + assert(string.pointer[string.length] == 0); + return (const char*) string.pointer; +} + +fn String c_string_to_slice(const char* cstr) +{ + const auto* end = cstr; + while (*end) + { + end += 1; + } + + return { (u8*)cstr, u64(end - cstr) }; +} + +constexpr auto string_no_match = ~(u64)0; + +fn u64 string_first_character(String string, u8 ch) +{ + u64 result = string_no_match; + + for (u64 i = 0; i < string.length; i += 1) + { + if (string[i] == ch) + { + result = i; + break; + } + } + + return result; +} + +fn u64 string_last_character(String string, u8 ch) +{ + u64 result = string_no_match; + u64 i = string.length; + + while (i > 0) + { + i -= 1; + + if (string[i] == ch) + { + result = i; + break; + } + } + + return result; +} + +struct ProtectionFlags +{ + u8 read:1; + u8 write:1; + u8 execute:1; +}; + +struct MapFlags +{ + u8 priv:1; + u8 anonymous:1; + u8 no_reserve:1; + u8 populate:1; +}; + +struct PROT +{ + u32 read:1; + u32 write:1; + u32 execute:1; + u32 sem:1; + u32 _:28; +}; +static_assert(sizeof(PROT) == sizeof(u32)); + +struct MAP +{ + enum class Type : u32 + { + shared = 0, + priv = 1, + shared_validate = 2, + }; + + Type type:4; + u32 fixed:1; + u32 anonymous:1; + u32 bit32:1; + u32 _0: 1; + u32 grows_down:1; + u32 _1: 2; + u32 deny_write:1; + u32 executable:1; + u32 locked:1; + u32 no_reserve:1; + u32 populate:1; + u32 non_block:1; + u32 stack:1; + u32 huge_tlb:1; + u32 sync:1; + u32 fixed_no_replace:1; + u32 _2:5; + u32 uninitialized:1; + u32 _3:5; +}; +static_assert(sizeof(MAP) == sizeof(u32)); + +struct OPEN +{ + enum class AccessMode : u32 + { + read_only = 0, + write_only = 1, + read_write = 2, + }; + + AccessMode access_mode:2; + u32 _0:4; + u32 creat:1; + u32 excl:1; + u32 no_ctty:1; + u32 trunc:1; + u32 append:1; + u32 non_block:1; + u32 d_sync:1; + u32 a_sync:1; + u32 direct:1; + u32 _1:1; + u32 directory:1; + u32 no_follow:1; + u32 no_a_time:1; + u32 cloexec:1; + u32 sync:1; + u32 path:1; + u32 tmp_file:1; + u32 _2:9; +}; +static_assert(sizeof(OPEN) == sizeof(u32)); + +using uid_t = u32; +using gid_t = u32; +using off_t = s64; +using ino_t = u64; +using dev_t = u64; + +struct timespec +{ + s64 seconds; + s64 nanoseconds; +}; + +struct Stat +{ + dev_t dev; + ino_t ino; + u64 nlink; + + u32 mode; + uid_t uid; + gid_t gid; + u32 _0; + dev_t rdev; + off_t size; + s64 blksize; + s64 blocks; + + timespec atim; + timespec mtim; + timespec ctim; + s64 _1[3]; +}; + +extern "C" s32* __errno_location(); +extern "C" void* mmap(void*, u64, PROT, MAP, s32, s64); +extern "C" s32 mprotect(void*, u64, PROT); +extern "C" s64 ptrace(s32, s32, u64, u64); +extern "C" s32 open(const char*, OPEN, ...); +extern "C" s32 close(s32); +extern "C" s32 fstat(s32, Stat*); +extern "C" s64 write(s32, u8*, u64); +extern "C" s64 read(s32, u8*, u64); +extern "C" s32 mkdir(const char*, u64); + +enum class Error : u32 +{ + success = 0, + perm = 1, +}; + +fn Error errno() +{ + return (Error)*__errno_location(); +} + +fn void* os_reserve(void* base, u64 size, ProtectionFlags protection, MapFlags map) +{ + auto protection_flags = PROT + { + .read = protection.read, + .write = protection.write, + .execute = protection.execute, + }; + + auto map_flags = MAP + { + .type = map.priv ? MAP::Type::priv : MAP::Type::shared, + .anonymous = map.anonymous, + .no_reserve = map.no_reserve, + .populate = map.populate, + }; + + auto* address = mmap(base, size, protection_flags, map_flags, -1, 0); + assert((u64)address != ~(u64)0); + + return address; +} + +fn void os_commit(void* address, u64 size, ProtectionFlags protection) +{ + auto protection_flags = PROT + { + .read = protection.read, + .write = protection.write, + .execute = protection.execute, + }; + auto result = mprotect(address, size, protection_flags); + assert(!result); +} + +struct OpenFlags +{ + u32 truncate:1; + u32 execute:1; + u32 write:1; + u32 read:1; + u32 create:1; + u32 directory:1; +}; + +struct Permissions +{ + u32 read:1; + u32 write:1; + u32 execute:1; +}; + +fn s32 os_open(String path, OpenFlags flags, Permissions permissions) +{ + OPEN::AccessMode access_mode; + if (flags.read && flags.write) + { + access_mode = OPEN::AccessMode::read_write; + } + else if (flags.read) + { + access_mode = OPEN::AccessMode::read_only; + } + else if (flags.write) + { + access_mode = OPEN::AccessMode::read_only; + } + else + { + unreachable(); + } + + auto o = OPEN { + .access_mode = access_mode, + .creat = flags.create, + .trunc = flags.truncate, + .directory = flags.directory, + }; + + // TODO: + auto mode = permissions.execute ? 0755 : 0644; + + auto fd = open(cstr(path), o, mode); + return fd; +} + +fn bool is_file_valid(s32 fd) +{ + return fd >= 0; +} + +fn void os_close(s32 fd) +{ + assert(is_file_valid(fd)); + + auto result = close(fd); + assert(result == 0); +} + +fn u64 os_file_size(s32 fd) +{ + Stat stat; + auto result = fstat(fd, &stat); + assert(result == 0); + return (u64)stat.size; +} + +fn u64 os_read_partially(s32 fd, u8* buffer, u64 byte_count) +{ + auto result = read(fd, buffer, byte_count); + assert(result > 0); + return (u64)result; +} + +fn void os_read(s32 fd, String buffer, u64 byte_count) +{ + assert(byte_count <= buffer.length); + u64 it_byte_count = 0; + while (it_byte_count < byte_count) + { + auto read_byte_count = os_read_partially(fd, buffer.pointer + it_byte_count, byte_count - it_byte_count); + it_byte_count += read_byte_count; + } + assert(it_byte_count == byte_count); +} + +fn u64 os_write_partially(s32 fd, u8* buffer, u64 byte_count) +{ + auto result = write(fd, buffer, byte_count); + assert(result > 0); + return (u64)result; +} + +fn void os_write(s32 fd, String content) +{ + u64 it_byte_count = 0; + while (it_byte_count < content.length) + { + auto written_byte_count = os_write_partially(fd, content.pointer + it_byte_count, content.length - it_byte_count); + it_byte_count += written_byte_count; + } + assert(it_byte_count == content.length); +} + +fn String path_absolute_stack(String buffer, String relative_path) +{ + const char* absolute_path = realpath(cstr(relative_path), (char*)buffer.pointer); + if (absolute_path) + { + auto slice = c_string_to_slice(absolute_path); + assert(slice.length < buffer.length); + return slice; + } + return {}; +} + +fn bool os_is_debugger_present() +{ + bool result = false; + if (ptrace(0, 0, 0, 0) == -1) + { + auto errno_error = errno(); + result = errno_error == Error::perm; + } + + return result; +} + +fn void make_directory(const char* path) +{ + auto result = mkdir(path, 0755); + unused(result); +} + +fn void print(String string) +{ + os_write(1, string); +} + +struct ArenaInitialization +{ + u64 reserved_size; + u64 granularity; + u64 initial_size; +}; + +struct Arena +{ + u64 reserved_size; + u64 position; + u64 os_position; + u64 granularity; + u8 reserved[32]; +}; + +constexpr u64 arena_minimum_position = sizeof(Arena); + +fn Arena& arena_initialize(ArenaInitialization i) +{ + ProtectionFlags protection_flags = { + .read = 1, + .write = 1, + }; + MapFlags map_flags = { + .priv = 1, + .anonymous = 1, + .no_reserve = 1, + }; + + auto* arena = (Arena*)os_reserve(0, i.reserved_size, protection_flags, map_flags); + os_commit(arena, i.initial_size, { .read = 1, .write = 1 }); + + *arena = { + .reserved_size = i.reserved_size, + .position = arena_minimum_position, + .os_position = i.initial_size, + .granularity = i.granularity, + }; + + return *arena; +} + +fn inline Arena& arena_initialize_default(u64 initial_size) +{ + ArenaInitialization i = { + .reserved_size = 4 * gb, + .granularity = 4 * kb, + .initial_size = initial_size, + }; + return arena_initialize(i); +} + +fn void* arena_allocate_bytes(Arena& arena, u64 size, u64 alignment) +{ + auto aligned_offset = align_forward(arena.position, alignment); + auto aligned_size_after = aligned_offset + size; + + if (aligned_size_after > arena.os_position) + { + unreachable(); + } + + auto* result = (u8*)&arena + aligned_offset; + arena.position = aligned_size_after; + assert(arena.position <= arena.os_position); + + return result; +} + +template +fn Slice arena_allocate(Arena& arena, u64 count) +{ + return { (T*)arena_allocate_bytes(arena, sizeof(T) * count, alignof(T)), count }; +} + +fn String arena_join_string(Arena& arena, Slice pieces) +{ + u64 size = 0; + for (auto piece : pieces) + { + size += piece.length; + } + + auto* pointer = (u8*)arena_allocate_bytes(arena, size + 1, 1); + u64 i = 0; + for (auto piece : pieces) + { + memcpy(pointer + i, piece.pointer, piece.length); + i += piece.length; + } + + assert(i == size); + pointer[i] = 0; + + return { pointer, size }; +} + +fn String arena_duplicate_string(Arena& arena, String string) +{ + auto memory = (u8*)arena_allocate_bytes(arena, string.length + 1, 1); + memcpy(memory, string.pointer, string.length); + memory[string.length] = 0; + return { memory, string.length}; +} + +fn void arena_restore(Arena& arena, u64 position) +{ + assert(position <= arena.position); + arena.position = position; +} + +fn void arena_reset(Arena& arena) +{ + arena.position = arena_minimum_position; +} + +fn String path_absolute(Arena& arena, String relative_path) +{ + u8 buffer[4096]; + auto stack = path_absolute_stack(array_to_slice(buffer), relative_path); + auto result = arena_duplicate_string(arena, stack); + return result; +} + +fn String file_read(Arena& arena, String file_path) +{ + auto fd = os_open(file_path, { .read = 1 }, { .read = 1 }); + String result = {}; + + if (is_file_valid(fd)) + { + auto file_size = os_file_size(fd); + result = arena_allocate(arena, file_size); + os_read(fd, result, file_size); + os_close(fd); + } + + return result; +} + +[[noreturn]] fn void fail() +{ + if (os_is_debugger_present()) + { + trap_raw(); + } + exit(1); +} + +[[noreturn]] fn void fail_with_message(String string) +{ + print(string); + fail(); +} diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..5f2c834 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,1149 @@ +#include + +enum class TokenId +{ + none, + end_of_statement, + integer, + right_brace, +}; + +enum class TokenIntegerKind +{ + hexadecimal, + decimal, + octal, + binary, +}; + +struct TokenInteger +{ + u64 value; + TokenIntegerKind kind; +}; + +struct Token +{ + union + { + TokenInteger integer; + }; + TokenId id; +}; + +enum class Precedence +{ + none, + assignment, + boolean_or, + boolean_and, + comparison, + bitwise, + shifting, + add_like, + div_like, + prefix, + aggregate_initialization, + postfix, +}; + +struct ValueBuilder +{ + Token token; + Value* left; + Precedence precedence; + ValueKind kind; + bool allow_assignment_operators; + + inline ValueBuilder with_precedence(Precedence precedence) + { + auto result = *this; + result.precedence = precedence; + return result; + } + + inline ValueBuilder with_token(Token token) + { + auto result = *this; + result.token = token; + return result; + } + + inline ValueBuilder with_left(Value* value) + { + auto result = *this; + result.left = value; + return result; + } +}; + +global_variable constexpr u8 left_bracket = '['; +global_variable constexpr u8 right_bracket = ']'; +global_variable constexpr u8 left_brace = '{'; +global_variable constexpr u8 right_brace = '}'; +global_variable constexpr u8 left_parenthesis = '('; +global_variable constexpr u8 right_parenthesis = ')'; + +fn bool is_space(u8 ch) +{ + return ((ch == ' ') | (ch == '\n')) | ((ch == '\t') | (ch == '\r')); +} + +fn bool is_lower(u8 ch) +{ + return ((ch >= 'a') & (ch <= 'z')); +} + +fn bool is_upper(u8 ch) +{ + return ((ch >= 'A') & (ch <= 'Z')); +} + +fn bool is_decimal(u8 ch) +{ + return ((ch >= '0') & (ch <= '9')); +} + +fn bool is_hexadecimal_alpha_lower(u8 ch) +{ + return ((ch >= 'a') & (ch <= 'f')); +} + +fn bool is_hexadecimal_alpha_upper(u8 ch) +{ + return ((ch >= 'A') & (ch <= 'F')); +} + +fn bool is_hexadecimal_alpha(u8 ch) +{ + return is_hexadecimal_alpha_lower(ch) | is_hexadecimal_alpha_upper(ch); +} + +fn bool is_hexadecimal(u8 ch) +{ + return is_decimal(ch) | is_hexadecimal_alpha(ch); +} + +fn bool is_identifier_start(u8 ch) +{ + return (is_lower(ch) | is_upper(ch)) | (ch == '_'); +} + +fn bool is_identifier(u8 ch) +{ + return is_identifier_start(ch) | is_decimal(ch); +} + +fn u32 get_line(Module& module) +{ + auto line = module.line_offset + 1; + assert(line < ~(u32)0); + return (u32)line; +} + +fn u32 get_column(Module& module) +{ + auto column = module.offset - module.line_character_offset + 1; + assert(column < ~(u32)0); + return (u32)column; +} + +struct Checkpoint +{ + u64 offset; + u64 line_offset; + u64 line_character_offset; +}; + +fn Checkpoint get_checkpoint(Module& module) +{ + return { + .offset = module.offset, + .line_offset = module.line_offset, + .line_character_offset = module.line_character_offset, + }; +} + +fn void set_checkpoint(Module& module, Checkpoint checkpoint) +{ + module.offset = checkpoint.offset; + module.line_offset = checkpoint.line_offset; + module.line_character_offset = checkpoint.line_character_offset; +} + +fn bool consume_character_if_match(Module& module, u8 expected_ch) +{ + bool is_ch = false; + auto i = module.offset; + if (i < module.content.length) + { + auto ch = module.content[i]; + is_ch = expected_ch == ch; + module.offset = i + is_ch; + } + + return is_ch; +} + +fn void expect_character(Module& module, u8 expected_ch) +{ + if (!consume_character_if_match(module, expected_ch)) + { + report_error(); + } +} + +fn void skip_space(Module& module) +{ + while (1) + { + auto iteration_offset = module.offset; + + while (module.offset < module.content.length) + { + auto ch = module.content[module.offset]; + if (!is_space(ch)) + { + break; + } + + module.line_offset += ch == '\n'; + module.line_character_offset = ch == '\n' ? module.offset : module.line_character_offset; + module.offset += 1; + } + + if (module.offset + 1 < module.content.length) + { + auto i = module.offset; + auto first_ch = module.content[i]; + auto second_ch = module.content[i + 1]; + auto is_comment = first_ch == '/' && second_ch == '/'; + + if (is_comment) + { + while (module.offset < module.content.length) + { + auto ch = module.content[module.offset]; + if (ch == '\n') + { + break; + } + module.offset += 1; + } + + if (module.offset < module.content.length) + { + module.line_offset += 1; + module.line_character_offset = module.offset; + module.offset += 1; + } + } + } + + if (module.offset - iteration_offset == 0) + { + break; + } + } +} + +fn String parse_identifier(Module& module) +{ + auto start = module.offset; + + if (is_identifier_start(module.content[start])) + { + module.offset = start + 1; + + while (module.offset < module.content.length) + { + auto i = module.offset; + if (is_identifier(module.content[i])) + { + module.offset = i + 1; + } + else + { + break; + } + } + } + + auto end = module.offset; + if (end - start == 0) + { + report_error(); + } + + return module.content(start, end); +} + +fn u64 accumulate_decimal(u64 accumulator, u8 ch) +{ + return (accumulator * 10) + (ch - '0'); +} + +fn u64 parse_integer_decimal_assume_valid(String string) +{ + u64 value = 0; + + for (u8 ch: string) + { + assert(is_decimal(ch)); + value = accumulate_decimal(value, ch); + } + + return value; +} + +fn Type* parse_type(Module& module) +{ + auto start_character = module.content[module.offset]; + if (is_identifier_start(start_character)) + { + auto identifier = parse_identifier(module); + if (identifier.equal(str("void"))) + { + return void_type(module); + } + else if (identifier.equal(str("noreturn"))) + { + return noreturn_type(module); + } + else + { + auto is_int_type = identifier.length > 1 && (identifier[0] == 's' || identifier[0] == 'u'); + + if (is_int_type) + { + for (auto ch : identifier(1)) + { + is_int_type = is_int_type && is_decimal(ch); + } + } + + if (is_int_type) + { + bool is_signed; + switch (identifier[0]) + { + case 's': is_signed = true; break; + case 'u': is_signed = false; break; + default: unreachable(); + } + + auto bit_count = parse_integer_decimal_assume_valid(identifier(1)); + if (bit_count == 0) + { + report_error(); + } + if (bit_count > 64) + { + if (bit_count != 128) + { + report_error(); + } + } + + auto result = integer_type(module, { .bit_count = (u32)bit_count, .is_signed = is_signed }); + return result; + } + else + { + trap_raw(); + } + } + } + else if (start_character == '&') + { + trap_raw(); + } + else if (start_character == left_bracket) + { + trap_raw(); + } + else if (start_character == '#') + { + trap_raw(); + } + else + { + report_error(); + } +} + +fn u64 parse_decimal(Module& module) +{ + u64 value = 0; + + while (true) + { + auto ch = module.content[module.offset]; + if (!is_decimal(ch)) + { + break; + } + + module.offset += 1; + value = accumulate_decimal(value, ch); + } + + return value; +} + +fn Token tokenize(Module& module) +{ + skip_space(module); + + auto start_index = module.offset; + if (start_index == module.content.length) + { + report_error(); + } + + auto start_character = module.content[start_index]; + + Token token; + + switch (start_character) + { + case '0': + { + auto next_ch = module.content[start_index + 1]; + TokenIntegerKind token_integer_kind; + switch (next_ch) + { + case 'x': token_integer_kind = TokenIntegerKind::hexadecimal; break; + case 'd': token_integer_kind = TokenIntegerKind::decimal; break; + case 'o': token_integer_kind = TokenIntegerKind::octal; break; + case 'b': token_integer_kind = TokenIntegerKind::octal; break; + default: token_integer_kind = TokenIntegerKind::decimal; break; + } + + auto inferred_decimal = token_integer_kind == TokenIntegerKind::decimal && next_ch != 'd'; + + module.offset += 1 + (token_integer_kind != TokenIntegerKind::decimal || !inferred_decimal); + + u64 value; + switch (token_integer_kind) + { + case TokenIntegerKind::hexadecimal: + { + trap_raw(); + } break; + case TokenIntegerKind::decimal: + { + value = parse_decimal(module); + } break; + case TokenIntegerKind::octal: + { + trap_raw(); + } break; + case TokenIntegerKind::binary: + { + trap_raw(); + } break; + } + + token = { + .integer = { + .value = value, + .kind = token_integer_kind, + }, + .id = TokenId::integer, + }; + } break; + case ';': + { + module.offset += 1; + token = { + .id = TokenId::end_of_statement, + }; + } break; + case right_brace: + { + module.offset += 1; + token = { + .id = TokenId::right_brace, + }; + } break; + default: trap_raw(); break; + } + + assert(start_index != module.offset); + return token; +} + +fn Value* parse_left(Module& module, Scope* scope, ValueBuilder builder) +{ + Token token = builder.token; + Value* result; + switch (token.id) + { + case TokenId::integer: + { + auto integer_value = token.integer.value; + result = new_value(module); + *result = { + .constant_integer = { + .value = integer_value, + .is_signed = false, + }, + .id = ValueId::constant_integer, + }; + } break; + default: report_error(); + } + + return result; +} + +fn Precedence get_token_precedence(TokenId id, bool allow_assignment_operators) +{ + Precedence precedence; + + switch (id) + { + case TokenId::end_of_statement: + case TokenId::right_brace: + precedence = Precedence::none; + break; + default: report_error(); + } + + return precedence; +} + +fn Value* parse_precedence_left(Module& module, Scope* scope, ValueBuilder builder) +{ + auto result = builder.left; + auto precedence = builder.precedence; + + while (1) + { + auto checkpoint = get_checkpoint(module); + auto token = tokenize(module); + auto token_precedence = get_token_precedence(token.id, builder.allow_assignment_operators); + + if ((backing_type(Precedence))precedence > (backing_type(Precedence))token_precedence) + { + set_checkpoint(module, checkpoint); + break; + } + + trap_raw(); + } + + return result; +} + +fn Value* parse_precedence(Module& module, Scope* scope, ValueBuilder builder) +{ + assert(builder.token.id == TokenId::none); + auto token = tokenize(module); + auto left = parse_left(module, scope, builder.with_token(token)); + auto result = parse_precedence_left(module, scope, builder.with_left(left)); + return result; +} + +fn Value* parse_value(Module& module, Scope* scope, ValueBuilder builder) +{ + assert(builder.precedence == Precedence::none); + assert(!builder.left); + auto value = parse_precedence(module, scope, builder.with_precedence(Precedence::assignment)); + return value; +} + +fn Statement* parse_statement(Module& module, Scope* scope) +{ + bool require_semicolon = true; + + auto statement_line = get_line(module); + auto statement_column = get_column(module); + + auto* statement = &arena_allocate(module.arena, 1)[0]; + *statement = Statement{ + .line = statement_line, + .column = statement_column, + }; + + auto statement_start_character = module.content[module.offset]; + switch (statement_start_character) + { + case '>': + { + trap_raw(); + } break; + case '#': + { + trap_raw(); + } break; + case left_brace: + { + trap_raw(); + } break; + default: + { + if (is_identifier_start(statement_start_character)) + { + auto checkpoint = get_checkpoint(module); + auto statement_start_identifier = parse_identifier(module); + skip_space(module); + + enum class StatementStartKeyword + { + underscore_st, + return_st, + if_st, + // TODO: make `unreachable` a statement start keyword? + for_st, + while_st, + switch_st, + break_st, + continue_st, + count, + }; + + String statement_start_keywords[] = { + str("_"), + str("return"), + str("if"), + str("for"), + str("while"), + str("switch"), + str("break"), + str("continue"), + }; + + static_assert(array_length(statement_start_keywords) == (u64)StatementStartKeyword::count); + + backing_type(StatementStartKeyword) i; + for (i = 0; i < (backing_type(StatementStartKeyword))StatementStartKeyword::count; i += 1) + { + auto statement_start_keyword = statement_start_keywords[i]; + if (statement_start_keyword.equal(statement_start_identifier)) + { + break; + } + } + + auto statement_start_keyword = (StatementStartKeyword)i; + switch (statement_start_keyword) + { + case StatementStartKeyword::underscore_st: + { + trap_raw(); + } break; + case StatementStartKeyword::return_st: + { + auto return_value = parse_value(module, scope, {}); + statement->return_st = return_value; + statement->id = StatementId::return_st; + } break; + case StatementStartKeyword::if_st: + { + trap_raw(); + } break; + case StatementStartKeyword::for_st: + { + trap_raw(); + } break; + case StatementStartKeyword::while_st: + { + trap_raw(); + } break; + case StatementStartKeyword::switch_st: + { + trap_raw(); + } break; + case StatementStartKeyword::break_st: + { + trap_raw(); + } break; + case StatementStartKeyword::continue_st: + { + trap_raw(); + } break; + case StatementStartKeyword::count: + { + trap_raw(); + } break; + } + } + else + { + trap_raw(); + } + } break; + } + + if (require_semicolon) + { + expect_character(module, ';'); + } + + return statement; +} + +fn Block* parse_block(Module& module, Scope* parent_scope) +{ + auto* block = &arena_allocate(module.arena, 1)[0]; + *block = { + .scope = { + .parent = parent_scope, + .line = get_line(module), + .column = get_column(module), + .kind = ScopeKind::local, + }, + }; + auto* scope = &block->scope; + + expect_character(module, left_brace); + + Statement* current_statement = 0; + + while (true) + { + skip_space(module); + + if (module.offset == module.content.length) + { + break; + } + + if (consume_character_if_match(module, right_brace)) + { + break; + } + + auto* statement = parse_statement(module, scope); + + if (current_statement) + { + current_statement->next = statement; + } + + current_statement = statement; + } + + return block; +} + +void parse(Module& module) +{ + while (1) + { + skip_space(module); + + if (module.offset == module.content.length) + { + break; + } + + bool is_export = false; + bool is_extern = false; + + auto global_line = get_line(module); + auto global_column = get_column(module); + + if (consume_character_if_match(module, left_bracket)) + { + while (module.offset < module.content.length) + { + auto global_keyword_string = parse_identifier(module); + enum class GlobalKeyword + { + export_keyword, + extern_keyword, + count, + }; + String global_keyword_strings[] = { + str("export"), + str("extern"), + }; + static_assert(array_length(global_keyword_strings) == (u64)GlobalKeyword::count); + + u32 i; + for (i = 0; i < array_length(global_keyword_strings); i += 1) + { + String keyword = global_keyword_strings[i]; + if (keyword.equal(global_keyword_string)) + { + break; + } + } + + auto global_keyword = (GlobalKeyword)i; + switch (global_keyword) + { + case GlobalKeyword::export_keyword: + { + is_export = true; + } break; + case GlobalKeyword::extern_keyword: + { + is_extern = true; + } break; + case GlobalKeyword::count: + { + report_error(); + } + } + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + else + { + report_error(); + } + } + + skip_space(module); + } + + auto global_name = parse_identifier(module); + + Global* last_global = module.first_global; + while (last_global) + { + if (global_name.equal(last_global->variable.name)) + { + report_error(); + } + + if (!last_global->next) + { + break; + } + + last_global = last_global->next; + } + + Type* type_it = module.first_type; + Type* forward_declaration = 0; + while (type_it) + { + if (global_name.equal(type_it->name)) + { + if (type_it->id == TypeId::forward_declaration) + { + forward_declaration = type_it; + break; + } + else + { + report_error(); + } + } + + if (!type_it->next) + { + break; + } + + type_it = type_it->next; + } + + skip_space(module); + + Type* global_type = 0; + + if (consume_character_if_match(module, ':')) + { + skip_space(module); + + global_type = parse_type(module); + + skip_space(module); + } + + expect_character(module, '='); + + skip_space(module); + + bool is_global_keyword = false; + + enum class GlobalKeyword + { + bits, + enumerator, + function, + macro, + structure, + typealias, + union_type, + count, + }; + + auto i = (backing_type(GlobalKeyword))GlobalKeyword::count; + + if (is_identifier_start(module.content[module.offset])) + { + auto checkpoint = get_checkpoint(module); + auto global_string = parse_identifier(module); + skip_space(module); + + String global_keywords[] = { + str("bits"), + str("enum"), + str("fn"), + str("macro"), + str("struct"), + str("typealias"), + str("union"), + }; + static_assert(array_length(global_keywords) == (u64)GlobalKeyword::count); + + for (i = 0; i < (backing_type(GlobalKeyword))GlobalKeyword::count; i += 1) + { + String global_keyword = global_keywords[i]; + if (global_string.equal(global_keyword)) + { + break; + } + } + + auto global_keyword = (GlobalKeyword)i; + switch (global_keyword) + { + case GlobalKeyword::bits: + { + trap_raw(); + } break; + case GlobalKeyword::enumerator: + { + trap_raw(); + } break; + case GlobalKeyword::function: + { + auto calling_convention = CallingConvention::c; + auto function_attributes = FunctionAttributes{}; + bool is_variable_arguments = false; + + if (consume_character_if_match(module, left_bracket)) + { + while (module.offset < module.content.length) + { + auto function_identifier = parse_identifier(module); + + enum class FunctionKeyword + { + cc, + count, + }; + + String function_keywords[] = { + str("cc"), + }; + static_assert(array_length(function_keywords) == (u64)FunctionKeyword::count); + + backing_type(FunctionKeyword) i; + for (i = 0; i < (backing_type(FunctionKeyword))(FunctionKeyword::count); i += 1) + { + auto function_keyword = function_keywords[i]; + if (function_keyword.equal(function_identifier)) + { + break; + } + } + + auto function_keyword = (FunctionKeyword)i; + skip_space(module); + + switch (function_keyword) + { + case FunctionKeyword::cc: + { + expect_character(module, left_parenthesis); + skip_space(module); + auto calling_convention_string = parse_identifier(module); + String calling_conventions[] = { + str("c"), + }; + static_assert(array_length(calling_conventions) == (u64)CallingConvention::count); + + backing_type(CallingConvention) i; + for (i = 0; i < (backing_type(CallingConvention))CallingConvention::count; i += 1) + { + auto calling_convention = calling_conventions[i]; + if (calling_convention.equal(calling_convention_string)) + { + break; + } + } + + auto candidate_calling_convention = (CallingConvention)i; + if (candidate_calling_convention == CallingConvention::count) + { + report_error(); + } + + calling_convention = candidate_calling_convention; + + skip_space(module); + expect_character(module, right_parenthesis); + } break; + case FunctionKeyword::count: + { + report_error(); + } break; + } + + skip_space(module); + + if (consume_character_if_match(module, right_bracket)) + { + break; + } + else + { + report_error(); + } + } + } + + skip_space(module); + + expect_character(module, left_parenthesis); + + Type* semantic_argument_type_buffer[64]; + String semantic_argument_name_buffer[64]; + u64 semantic_argument_count = 0; + + while (module.offset < module.content.length) + { + skip_space(module); + + if (consume_character_if_match(module, '.')) + { + expect_character(module, '.'); + expect_character(module, '.'); + skip_space(module); + expect_character(module, right_parenthesis); + is_variable_arguments = true; + break; + } + + if (consume_character_if_match(module, right_parenthesis)) + { + break; + } + + auto argument_name = parse_identifier(module); + semantic_argument_name_buffer[semantic_argument_count] = argument_name; + + skip_space(module); + + expect_character(module, ':'); + + skip_space(module); + + auto argument_type = parse_type(module); + semantic_argument_type_buffer[semantic_argument_count] = argument_type; + + skip_space(module); + + unused(consume_character_if_match(module, ',')); + + semantic_argument_count += 1; + } + + skip_space(module); + + auto return_type = parse_type(module); + + skip_space(module); + + Slice argument_types = {}; + if (semantic_argument_count != 0) + { + trap_raw(); + } + + auto is_declaration = consume_character_if_match(module, ';'); + + auto function_type = type_allocate_init(module, { + .function = { + .semantic_return_type = return_type, + .semantic_argument_types = argument_types, + .calling_convention = calling_convention, + .is_variable_arguments = is_variable_arguments, + }, + .id = TypeId::function, + .name = str(""), + }); + + auto storage = new_value(module); + *storage = { + .type = get_pointer_type(module, function_type), + .id = ValueId::external_function, + }; + auto global = new_global(module); + *global = { + .variable = { + .storage = storage, + .initial_value = 0, + .type = function_type, + .scope = &module.scope, + .name = global_name, + .line = global_line, + .column = global_column, + }, + .linkage = (is_export | is_extern) ? Linkage::external : Linkage::internal, + }; + module.current_function = global; + + if (!is_declaration) + { + storage->function = { + .arguments = {}, + .scope = { + .parent = &module.scope, + .line = global_line, + .column = global_column, + .kind = ScopeKind::function, + }, + .block = 0, + .attributes = function_attributes, + }; + storage->id = ValueId::function; + + if (semantic_argument_count != 0) + { + trap_raw(); + } + + storage->function.block = parse_block(module, &storage->function.scope); + } + } break; + case GlobalKeyword::macro: + { + trap_raw(); + } break; + case GlobalKeyword::structure: + { + trap_raw(); + } break; + case GlobalKeyword::typealias: + { + trap_raw(); + } break; + case GlobalKeyword::union_type: + { + trap_raw(); + } break; + case GlobalKeyword::count: + { + set_checkpoint(module, checkpoint); + } break; + } + } + + if (i == (backing_type(GlobalKeyword))GlobalKeyword::count) + { + trap_raw(); + } + } +}