Implement parser

This commit is contained in:
David Gonzalez Martin 2025-05-01 08:33:46 -06:00
parent 0eee2a4ff3
commit 0166e74e6a
11 changed files with 6031 additions and 60 deletions

23
CMakeLists.txt Normal file
View File

@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.15)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type" FORCE)
endif()
# Set C++ standard
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
project(bb)
add_executable(bb
src/compiler.cpp
src/entry_point.cpp
src/parser.cpp
src/emitter.cpp
)
target_include_directories(bb PUBLIC src)
target_compile_definitions(bb PUBLIC
$<$<CONFIG:Debug>:BB_DEBUG=1>
$<$<NOT:$<CONFIG:Debug>>:BB_DEBUG=0>
)
#target_compile_options(bb PRIVATE -fsanitize=address)
#target_link_options(bb PRIVATE -fsanitize=address)

View File

@ -1,61 +1,4 @@
#!/usr/bin/env bash
set -eu
MY_CWD=$PWD
if [[ -z "${BB_CI-}" ]]; then
BB_CI=0
fi
if [[ -z "${BB_BUILD_TYPE-}" ]]; then
BB_BUILD_TYPE=debug
fi
if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then
BB_ERROR_ON_WARNINGS=$BB_CI
fi
if [[ -z "${BB_ERROR_LIMIT-}" ]]; then
BB_ERROR_LIMIT=$((1 - BB_CI))
fi
BB_COMPILE_SHADERS=0
BUILD_DIR=cache
LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets
mkdir -p $BUILD_DIR
if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then
cd $BUILD_DIR
wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s
cd $MY_CWD
fi
if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then
glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet
glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet
fi
BUILD_OUT=$BUILD_DIR/build
C_COMPILER=clang
TIME_TRACE=1
BB_TIMETRACE=0
GCC_ARGS=
CLANG_ARGS=
TIME_TRACE_ARG=
if [[ $C_COMPILER == "clang"* ]]; then
CLANG_ARGS=-ferror-limit=1
if [[ "$TIME_TRACE" == "1" ]]; then
CLANG_ARGS="$CLANG_ARGS -ftime-trace"
BB_TIMETRACE=1
else
CLANG_ARGS="$CLANG_ARGS -ftime-trace"
fi
elif [[ $C_COMPILER == "gcc"* ]]; then
GCC_ARGS=-fmax-errors=1
fi
$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT
$BUILD_OUT $@
exit 0
cd build
ninja --quiet
cd ..

7
generate.sh Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -eu
rm -rf build
mkdir build
cd build
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_LINKER_TYPE=MOLD -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
cd ..

61
old_build.sh Executable file
View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -eu
MY_CWD=$PWD
if [[ -z "${BB_CI-}" ]]; then
BB_CI=0
fi
if [[ -z "${BB_BUILD_TYPE-}" ]]; then
BB_BUILD_TYPE=debug
fi
if [[ -z "${BB_ERROR_ON_WARNINGS-}" ]]; then
BB_ERROR_ON_WARNINGS=$BB_CI
fi
if [[ -z "${BB_ERROR_LIMIT-}" ]]; then
BB_ERROR_LIMIT=$((1 - BB_CI))
fi
BB_COMPILE_SHADERS=0
BUILD_DIR=cache
LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets
mkdir -p $BUILD_DIR
if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then
cd $BUILD_DIR
wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s
cd $MY_CWD
fi
if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then
glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet
glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet
fi
BUILD_OUT=$BUILD_DIR/build
C_COMPILER=clang
TIME_TRACE=1
BB_TIMETRACE=0
GCC_ARGS=
CLANG_ARGS=
TIME_TRACE_ARG=
if [[ $C_COMPILER == "clang"* ]]; then
CLANG_ARGS=-ferror-limit=1
if [[ "$TIME_TRACE" == "1" ]]; then
CLANG_ARGS="$CLANG_ARGS -ftime-trace"
BB_TIMETRACE=1
else
CLANG_ARGS="$CLANG_ARGS -ftime-trace"
fi
elif [[ $C_COMPILER == "gcc"* ]]; then
GCC_ARGS=-fmax-errors=1
fi
$C_COMPILER build.c -g -o $BUILD_OUT -Ibootstrap -std=gnu2x $CLANG_ARGS $GCC_ARGS -DBB_TIMETRACE=$BB_TIMETRACE -DBB_CI=$BB_CI -DBB_BUILD_TYPE=\"$BB_BUILD_TYPE\" -DBB_ERROR_ON_WARNINGS=$BB_ERROR_ON_WARNINGS -DBB_ERROR_LIMIT=$BB_ERROR_LIMIT
$BUILD_OUT $@
exit 0

423
src/compiler.cpp Normal file
View File

@ -0,0 +1,423 @@
#include <compiler.h>
fn void compile(Arena* arena, Options options)
{
auto base_allocation_type_count = i128_offset + // 64 * 2 for basic integer types
2 + // u128, s128
2; // void, noreturn
auto base_type_allocation = arena_allocate<Type>(arena, base_allocation_type_count);
auto* type_it = base_type_allocation.pointer;
bool signs[] = {false, true};
Type* previous = 0;
for (bool sign: signs)
{
for (u32 bit_index = 0; bit_index < 64; bit_index += 1)
{
auto bit_count = bit_index + 1;
auto first_digit = (u8)(bit_count < 10 ? bit_count % 10 + '0' : bit_count / 10 + '0');
auto second_digit = (u8)(bit_count > 9 ? bit_count % 10 + '0' : 0);
u8 name_buffer[] = { u8(sign ? 's' : 'u'), first_digit, second_digit };
u64 name_length = 2 + (bit_count > 9);
auto name_stack = String{name_buffer, name_length};
auto name = arena_duplicate_string(arena, name_stack);
*type_it = {
.integer = {
.bit_count = bit_count,
.is_signed = sign,
},
.id = TypeId::integer,
.name = name,
};
if (previous) previous->next = type_it;
previous = type_it;
type_it += 1;
}
}
for (bool sign: signs)
{
auto name = sign ? str("s128") : str("u128");
*type_it = {
.integer = {
.bit_count = 128,
.is_signed = sign,
},
.id = TypeId::integer,
.name = name,
.next = previous,
};
if (previous) previous->next = type_it;
previous = type_it;
type_it += 1;
}
auto void_type = type_it;
type_it += 1;
auto noreturn_type = type_it;
type_it += 1;
assert(type_it - base_type_allocation.pointer == base_allocation_type_count);
previous->next = void_type;
*void_type = {
.id = TypeId::void_type,
.name = str("void"),
.next = noreturn_type,
};
*noreturn_type = {
.id = TypeId::noreturn,
.name = str("noreturn"),
};
auto module = Module{
.arena = arena,
.content = options.content,
.first_type = base_type_allocation.pointer,
.last_type = noreturn_type,
.scope = {
.kind = ScopeKind::global,
},
.name = options.name,
.path = options.path,
.executable = options.executable,
.objects = options.objects,
.target = options.target,
.build_mode = options.build_mode,
.has_debug_info = options.has_debug_info,
.silent = options.silent,
};
module.void_value = new_value(&module);
*module.void_value = {
.type = void_type,
.id = ValueId::infer_or_ignore,
};
parse(&module);
emit(&module);
}
fn void compile_file(Arena* arena, Compile options)
{
auto relative_file_path = options.relative_file_path;
if (relative_file_path.length < 5)
{
fail();
}
auto extension_start = string_last_character(relative_file_path, '.');
if (extension_start == string_no_match)
{
fail();
}
if (!relative_file_path(extension_start).equal(str(".bbb")))
{
fail();
}
auto separator_index = string_last_character(relative_file_path, '/');
separator_index = separator_index == string_no_match ? 0 : separator_index;
auto base_start = separator_index + (separator_index != 0 || relative_file_path[separator_index] == '/');
auto base_name = relative_file_path(base_start, extension_start);
auto is_compiler = relative_file_path.equal(str("src/compiler.bbb"));
String output_path_dir_parts[] = {
str(base_cache_dir),
is_compiler ? str("/compiler") : str("/"),
build_mode_to_string(options.build_mode),
str("_"),
options.has_debug_info ? str("di") : str("nodi"),
};
auto output_path_dir = arena_join_string(arena, array_to_slice(output_path_dir_parts));
make_directory(base_cache_dir);
if (is_compiler)
{
make_directory(base_cache_dir "/compiler");
}
make_directory(cstr(output_path_dir));
String output_path_base_parts[] = {
output_path_dir,
str("/"),
base_name,
};
auto output_path_base = arena_join_string(arena, array_to_slice(output_path_base_parts));
String output_object_path_parts[] = {
output_path_base,
str(".o"),
};
auto output_object_path = arena_join_string(arena, array_to_slice(output_object_path_parts));
auto output_executable_path = output_path_base;
auto file_content = file_read(arena, relative_file_path);
auto file_path = path_absolute(arena, relative_file_path);
auto c_abi_object_path = str("build/c_abi.o");
String objects[] = {
c_abi_object_path,
output_object_path,
};
Slice<String> object_slice = array_to_slice(objects);
object_slice = object_slice(!base_name.equal(str("c_abi")));
compile(arena, {
.content = file_content,
.path = file_path,
.executable = output_executable_path,
.name = base_name,
.objects = object_slice,
.target = {
.cpu = CPUArchitecture::x86_64,
.os = OperatingSystem::linux_,
},
.build_mode = options.build_mode,
.has_debug_info = options.has_debug_info,
.silent = options.silent,
});
}
global_variable String names[] = {
str("minimal"),
str("comments"),
str("constant_add"),
str("constant_and"),
str("constant_div"),
str("constant_mul"),
str("constant_rem"),
str("constant_or"),
str("constant_sub"),
str("constant_xor"),
str("constant_shift_left"),
str("constant_shift_right"),
str("minimal_stack"),
str("minimal_stack_arithmetic"),
str("minimal_stack_arithmetic2"),
str("minimal_stack_arithmetic3"),
str("stack_negation"),
str("stack_add"),
str("stack_sub"),
str("extend"),
str("integer_max"),
str("integer_hex"),
str("basic_pointer"),
str("basic_call"),
str("basic_branch"),
str("basic_array"),
str("basic_enum"),
str("basic_slice"),
str("basic_string"),
str("basic_varargs"),
str("basic_while"),
str("pointer"),
str("pointer_cast"),
str("u1_return"),
str("local_type_inference"),
str("global"),
str("function_pointer"),
str("extern"),
str("byte_size"),
str("argv"),
str("assignment_operators"),
str("not_pointer"),
str("bits"),
str("bits_no_backing_type"),
str("bits_return_u1"),
str("bits_zero"),
str("comparison"),
str("global_struct"),
str("if_no_else"),
str("if_no_else_void"),
str("indirect"),
str("indirect_struct"),
str("indirect_varargs"),
str("ret_c_bool"),
str("return_type_builtin"),
str("return_u64_u64"),
str("select"),
str("slice"),
str("small_struct_ints"),
str("struct_assignment"),
str("struct"),
str("struct_u64_u64"),
str("struct_varargs"),
str("struct_zero"),
str("unreachable"),
str("varargs"),
str("c_abi0"),
str("c_abi1"),
str("c_med_struct_ints"),
str("c_ret_struct_array"),
str("c_split_struct_ints"),
str("c_string_to_slice"),
str("c_struct_with_array"),
str("c_function_pointer"),
str("c_abi"),
str("string_to_enum"),
str("abi_enum_bool"),
str("empty_if"),
str("else_if"),
str("else_if_complicated"),
str("shortcircuiting_if"),
str("field_access_left_assign"),
str("for_each"),
str("pointer_decay"),
str("enum_name"),
str("slice_of_slices"),
str("type_alias"),
str("integer_formats"),
str("return_small_struct"),
str("for_each_int"),
str("bool_array"),
str("basic_union"),
str("break_continue"),
str("constant_global_reference"),
str("self_referential_struct"),
str("forward_declared_type"),
str("basic_macro"),
str("generic_macro"),
str("generic_pointer_macro"),
str("noreturn_macro"),
str("generic_pointer_array"),
};
void entry_point(Slice<const char*> arguments, Slice<const char*> environment)
{
Arena* arena = arena_initialize_default(8 * mb);
if (arguments.length < 2)
{
fail_with_message(str("error: Not enough arguments\n"));
}
String command_string = c_string_to_slice(arguments[1]);
String command_strings[] = {
str("compile"),
str("test"),
};
static_assert(array_length(command_strings) == (u64)Command::count);
backing_type(Command) i;
for (i = 0; i < (backing_type(Command))Command::count; i += 1)
{
String candidate = command_strings[i];
if (candidate.equal(command_string))
{
break;
}
}
auto command = (Command)i;
switch (command)
{
case Command::compile:
{
if (arguments.length < 3)
{
fail_with_message(str("Not enough arguments for command 'compile'\n"));
}
auto build_mode = BuildMode::debug_none;
auto has_debug_info = true;
if (arguments.length >= 4)
{
auto build_mode_string = c_string_to_slice(arguments[3]);
String build_mode_strings[] = {
str("debug_none"),
str("debug"),
str("soft_optimize"),
str("optimize_for_speed"),
str("optimize_for_size"),
str("aggressively_optimize_for_speed"),
str("aggressively_optimize_for_size"),
};
backing_type(BuildMode) i;
for (i = 0; i < (backing_type(BuildMode))BuildMode::count; i += 1)
{
String candidate = build_mode_strings[i];
if (build_mode_string.equal(candidate))
{
break;
}
}
build_mode = (BuildMode)i;
if (build_mode == BuildMode::count)
{
fail_with_message(str("Invalid build mode\n"));
}
}
if (arguments.length >= 5)
{
auto has_debug_info_string = c_string_to_slice(arguments[3]);
if (has_debug_info_string.equal(str("true")))
{
has_debug_info = true;
}
else if (has_debug_info_string.equal(str("false")))
{
has_debug_info = false;
}
else
{
fail_with_message(str("Wrong value for has_debug_info\n"));
}
}
auto relative_file_path = c_string_to_slice(arguments[2]);
compile_file(arena, {
.relative_file_path = relative_file_path,
.build_mode = build_mode,
.has_debug_info = has_debug_info,
.silent = false,
});
} break;
case Command::test:
{
// TODO: provide more arguments
if (arguments.length != 2)
{
fail_with_message(str("error: 'test' command takes no arguments"));
}
// TODO: introduce build mode, debug info switch, etc
for (auto name: names)
{
auto position = arena->position;
String relative_file_path_parts[] = { str("tests/"), name, str(".bbb") };
auto relative_file_path = arena_join_string(arena, array_to_slice(relative_file_path_parts));
auto build_mode = BuildMode::debug_none;
bool has_debug_info = true;
compile_file(arena, {
.relative_file_path = relative_file_path,
.build_mode = build_mode,
.has_debug_info = has_debug_info,
.silent = false,
});
// TODO: introduce test
arena_restore(arena, position);
}
} break;
case Command::count:
{
fail_with_message(str("error: Invalid command\n"));
} break;
}
}

996
src/compiler.h Normal file
View File

@ -0,0 +1,996 @@
#pragma once
#include <lib.h>
#define report_error() trap_raw()
enum class Command
{
compile,
test,
count,
};
enum class BuildMode
{
debug_none,
debug,
soft_optimize,
optimize_for_speed,
optimize_for_size,
aggressively_optimize_for_speed,
aggressively_optimize_for_size,
count,
};
fn String build_mode_to_string(BuildMode build_mode)
{
switch (build_mode)
{
case_to_name(BuildMode, debug_none);
case_to_name(BuildMode, debug);
case_to_name(BuildMode, soft_optimize);
case_to_name(BuildMode, optimize_for_speed);
case_to_name(BuildMode, optimize_for_size);
case_to_name(BuildMode, aggressively_optimize_for_speed);
case_to_name(BuildMode, aggressively_optimize_for_size);
case BuildMode::count: unreachable();
}
}
enum class ValueKind
{
right,
left,
};
enum class CPUArchitecture
{
x86_64,
};
enum class OperatingSystem
{
linux_,
};
struct Type;
struct Value;
struct Local;
struct Global;
struct Block;
struct Statement;
struct Variable;
struct Argument;
struct Scope;
struct MacroDeclaration;
struct Target
{
CPUArchitecture cpu;
OperatingSystem os;
};
struct Compile
{
String relative_file_path;
BuildMode build_mode;
bool has_debug_info;
bool silent;
};
#define base_cache_dir "bb-cache"
enum class CallingConvention
{
c,
count,
};
enum class InlineBehavior
{
normal,
always_inline,
no_inline,
inline_hint,
};
struct FunctionAttributes
{
InlineBehavior inline_behavior;
bool naked;
};
enum class TypeId
{
void_type,
noreturn,
forward_declaration,
integer,
function,
pointer,
array,
enumerator,
structure,
bits,
alias,
union_type,
unresolved,
};
struct TypeInteger
{
u32 bit_count;
bool is_signed;
};
struct TypeFunction
{
Type* semantic_return_type;
Slice<Type*> semantic_argument_types;
CallingConvention calling_convention;
bool is_variable_arguments;
};
struct TypePointer
{
Type* element_type;
Type* next;
};
struct TypeArray
{
u64 element_count;
Type* element_type;
};
struct UnresolvedEnumField
{
String name;
Value* value;
};
struct EnumField
{
String name;
u64 value;
};
struct UnresolvedTypeEnum
{
Slice<UnresolvedEnumField> fields;
Type* backing_type;
u32 line;
bool implicit_backing_type;
Type* resolved_type;
};
struct TypeEnum
{
Slice<EnumField> fields;
Type* backing_type;
u32 line;
};
struct Field
{
String name;
Type* type;
u64 offset;
u32 line;
};
struct TypeStruct
{
Slice<Field> fields;
u64 byte_size;
u32 byte_alignment;
u32 line;
bool is_slice;
Type* next;
};
struct TypeBits
{
Slice<Field> fields;
Type* backing_type;
u32 line;
bool is_implicit_backing_type;
};
struct TypeAlias
{
Type* type;
Scope* scope;
u32 line;
};
struct UnionField
{
Type* type;
String name;
u32 line;
};
struct TypeUnion
{
Slice<UnionField> fields;
u64 byte_size;
u32 byte_alignment;
u32 line;
u32 biggest_field;
};
struct Type
{
union
{
TypeInteger integer;
TypeFunction function;
TypePointer pointer;
TypeArray array;
TypeEnum enumerator;
TypeStruct structure;
TypeBits bits;
TypeAlias alias;
TypeUnion union_type;
};
TypeId id;
String name;
Type* next;
};
fn u32 align_bit_count(u32 bit_count)
{
auto aligned_bit_count = MAX(8, next_power_of_two(bit_count));
assert(aligned_bit_count % 8 == 0);
return aligned_bit_count;
}
fn u32 aligned_byte_count_from_bit_count(u32 bit_count)
{
auto aligned_bit_count = align_bit_count(bit_count);
return aligned_bit_count / 8;
}
fn u64 get_byte_size(Type* type)
{
switch (type->id)
{
case TypeId::integer:
{
auto byte_count = aligned_byte_count_from_bit_count(type->integer.bit_count);
assert(byte_count == 1 || byte_count == 2 || byte_count == 4 || byte_count == 8 || byte_count == 16);
return byte_count;
} break;
case TypeId::array:
{
auto element_type = type->array.element_type;
auto element_size = get_byte_size(element_type);
auto element_count = type->array.element_count;
auto result = element_size * element_count;
return result;
} break;
case TypeId::structure:
{
auto result = type->structure.byte_size;
return result;
} break;
case TypeId::enumerator:
{
auto result = get_byte_size(type->enumerator.backing_type);
return result;
} break;
default: trap_raw();
}
}
fn u32 get_byte_alignment(Type* type)
{
switch (type->id)
{
case TypeId::integer:
{
auto aligned_byte_count = aligned_byte_count_from_bit_count(type->integer.bit_count);
assert(aligned_byte_count == 1 || aligned_byte_count == 2 || aligned_byte_count == 4 || aligned_byte_count == 8 || aligned_byte_count == 16);
return aligned_byte_count;
} break;
case TypeId::array:
{
auto element_type = type->array.element_type;
auto result = get_byte_alignment(element_type);
return result;
} break;
case TypeId::structure:
{
auto result = type->structure.byte_alignment;
return result;
} break;
case TypeId::enumerator:
{
auto result = get_byte_alignment(type->enumerator.backing_type);
return result;
} break;
default: trap_raw();
}
}
fn u64 get_bit_size(Type* type)
{
switch (type->id)
{
case TypeId::integer: return type->integer.bit_count;
default: trap_raw();
}
}
enum class ScopeKind
{
global,
function,
local,
for_each,
macro_declaration,
macro_instantiation,
};
struct Scope
{
Scope* parent;
u32 line;
u32 column;
ScopeKind kind;
};
enum class StatementId
{
local,
expression,
return_st,
assignment,
if_st,
block,
while_st,
switch_st,
for_each,
break_st,
continue_st,
};
enum class StatementAssignmentId
{
assign,
assign_add,
assign_sub,
assign_mul,
assign_div,
assign_rem,
assign_shift_left,
assign_shift_right,
assign_and,
assign_or,
assign_xor,
};
struct StatementAssignment
{
Value* left;
Value* right;
StatementAssignmentId id;
};
struct StatementIf
{
Value* condition;
Statement* if_statement;
Statement* else_statement;
};
struct StatementWhile
{
Value* condition;
Block* block;
};
struct StatementSwitchClause
{
Slice<Value*> values;
Block* block;
};
struct StatementSwitch
{
Value* discriminant;
Slice<StatementSwitchClause> clauses;
};
enum class ForEachKind
{
slice,
range,
};
struct StatementForEach
{
Local* first_local;
Local* last_local;
Slice<ValueKind> left_values;
Slice<Value*> right_values;
Statement* predicate;
Scope scope;
ForEachKind kind;
};
struct Statement
{
union
{
Local* local;
Value* expression;
Value* return_st;
StatementAssignment assignment;
StatementIf if_st;
Block* block;
StatementWhile while_st;
StatementSwitch switch_st;
StatementForEach for_each;
};
Statement* next;
StatementId id;
u32 line;
u32 column;
};
struct Block
{
Local* first_local;
Local* last_local;
Statement* first_statement;
Scope scope;
};
enum class ValueId
{
infer_or_ignore,
external_function,
function,
constant_integer,
unary,
binary,
unary_type,
variable_reference,
macro_reference,
macro_instantiation,
dereference,
call,
global,
array_initialization,
array_expression,
slice_expression,
enum_literal,
trap,
field_access,
string_literal,
va_start,
va_arg,
aggregate_initialization,
undefined,
unreachable,
zero,
select,
string_to_enum,
};
struct ValueConstantInteger
{
u64 value;
bool is_signed;
};
struct ValueFunction
{
Slice<Argument> arguments;
Scope scope;
Block* block;
FunctionAttributes attributes;
};
enum class UnaryId
{
minus,
plus,
ampersand,
exclamation,
tilde,
enum_name,
extend,
truncate,
pointer_cast,
int_from_enum,
int_from_pointer,
va_end,
bitwise_not,
};
struct ValueUnary
{
Value* value;
UnaryId id;
};
enum class UnaryTypeId
{
byte_size,
integer_max,
};
struct ValueUnaryType
{
Type* type;
UnaryTypeId id;
};
enum class BinaryId
{
add,
sub,
mul,
div,
rem,
bitwise_and,
bitwise_or,
bitwise_xor,
shift_left,
shift_right,
compare_equal,
compare_not_equal,
compare_greater,
compare_less,
compare_greater_equal,
compare_less_equal,
logical_and,
logical_or,
logical_and_shortcircuit,
logical_or_shortcircuit,
};
struct ValueBinary
{
Value* left;
Value* right;
BinaryId id;
};
struct ValueCall
{
Value* callable;
Slice<Value*> arguments;
};
struct ValueArrayInitialization
{
Slice<Value*> values;
bool is_constant;
};
struct ValueArrayExpression
{
Value* array_like;
Value* index;
};
struct ValueFieldAccess
{
Value* aggregate;
String field_name;
};
struct ValueSliceExpression
{
Value* array_like;
Value* start;
Value* end;
};
struct ValueVaArg
{
Value* va_list;
Type* type;
};
struct ValueAggregateInitialization
{
Slice<String> names;
Slice<Value*> values;
bool is_constant;
bool zero;
};
struct ValueSelect
{
Value* condition;
Value* true_value;
Value* false_value;
};
struct ValueStringToEnum
{
Type* type;
Value* string;
};
enum class ConstantArgumentId
{
value,
type,
};
struct ConstantArgument
{
String name;
union
{
Type* type;
Value* value;
};
ConstantArgumentId id;
};
struct MacroDeclaration
{
Slice<Argument> arguments;
Slice<ConstantArgument> constant_arguments;
Type* return_type;
Block* block;
String name;
Scope scope;
MacroDeclaration* next;
bool is_generic()
{
return constant_arguments.length != 0;
}
};
struct MacroInstantiation
{
MacroDeclaration* declaration;
Global* instantiation_function;
Slice<Argument> declaration_arguments;
Slice<Value*> instantiation_arguments;
Slice<ConstantArgument> constant_arguments;
Type* return_type;
Block* block;
Scope scope;
u32 line;
u32 column;
};
struct Value
{
union
{
ValueConstantInteger constant_integer;
ValueFunction function;
ValueUnary unary;
ValueBinary binary;
Variable* variable_reference;
ValueUnaryType unary_type;
Value* dereference;
ValueCall call;
ValueArrayInitialization array_initialization;
ValueArrayExpression array_expression;
String enum_literal;
ValueFieldAccess field_access;
ValueSliceExpression slice_expression;
String string_literal;
ValueVaArg va_arg;
ValueAggregateInitialization aggregate_initialization;
ValueSelect select;
ValueStringToEnum string_to_enum;
MacroDeclaration* macro_reference;
MacroInstantiation macro_instantiation;
};
Type* type;
ValueId id;
ValueKind kind;
bool is_constant()
{
switch (id)
{
case ValueId::constant_integer:
return true;
default: trap_raw();
}
}
};
struct Variable
{
Value* storage;
Value* initial_value;
Type* type;
Scope* scope;
String name;
u32 line;
u32 column;
};
enum class Linkage
{
internal,
external,
};
struct Global
{
Variable variable;
Linkage linkage;
Global* next;
};
struct Local
{
Variable variable;
Local* next;
};
struct Argument
{
Variable variable;
u32 index;
};
struct Module
{
Arena* arena;
String content;
u64 offset;
u64 line_offset;
u64 line_character_offset;
Type* first_pointer_type;
Type* first_slice_type;
Type* first_pair_struct_type;
Type* first_array_type;
Type* first_type;
Type* last_type;
Type* va_list_type;
Value* void_value;
Global* first_global;
Global* last_global;
Global* current_function;
MacroDeclaration* first_macro_declaration;
MacroDeclaration* last_macro_declaration;
MacroDeclaration* current_macro_declaration;
MacroInstantiation* current_macro_instantiation;
Scope scope;
String name;
String path;
String executable;
Slice<String>objects;
Target target;
BuildMode build_mode;
bool has_debug_info;
bool silent;
};
constexpr u64 i128_offset = 64 * 2;
constexpr u64 void_offset = i128_offset + 2;
fn Type* integer_type(Module* module, TypeInteger integer)
{
assert(integer.bit_count);
assert(integer.bit_count <= 64 || integer.bit_count == 128);
auto index = integer.bit_count == 128 ? (i128_offset + integer.is_signed) : (integer.bit_count - 1 + (64 * integer.is_signed));
auto* result_type = module->first_type + index;
assert(result_type->id == TypeId::integer);
assert(result_type->integer.bit_count == integer.bit_count);
assert(result_type->integer.is_signed == integer.is_signed);
return result_type;
}
fn Type* void_type(Module* module)
{
return module->first_type + void_offset;
}
fn Type* noreturn_type(Module* module)
{
return void_type(module) + 1;
}
struct Options
{
String content;
String path;
String executable;
String name;
Slice<String> objects;
Target target;
BuildMode build_mode;
bool has_debug_info;
bool silent;
};
fn Type* type_allocate_init(Module* module, Type type)
{
auto* result = &arena_allocate<Type>(module->arena, 1)[0];
*result = type;
if (module->last_type)
{
module->last_type->next = result;
module->last_type = result;
}
else
{
assert(!module->first_type);
module->first_type = result;
module->last_type = result;
}
return result;
}
fn Value* new_value(Module* module)
{
auto* result = &arena_allocate<Value>(module->arena, 1)[0];
return result;
}
fn Slice<Value*> new_value_array(Module* module, u64 count)
{
auto result = arena_allocate<Value*>(module->arena, count);
return result;
}
fn Slice<Type*> new_type_array(Module* module, u64 count)
{
auto result = arena_allocate<Type*>(module->arena, count);
return result;
}
fn Global* new_global(Module* module)
{
auto* result = &arena_allocate<Global>(module->arena, 1)[0];
if (module->last_global)
{
module->last_global->next = result;
module->last_global = result;
}
else
{
assert(!module->first_global);
module->first_global = result;
module->last_global = result;
}
return result;
}
fn Type* get_pointer_type(Module* module, Type* element_type)
{
auto last_pointer_type = module->first_pointer_type;
while (last_pointer_type)
{
assert(last_pointer_type->id == TypeId::pointer);
if (last_pointer_type->pointer.element_type == element_type)
{
return last_pointer_type;
}
if (!last_pointer_type->pointer.next)
{
break;
}
last_pointer_type = last_pointer_type->pointer.next;
}
String name_parts[] = {
str("&"),
element_type->name,
};
auto result = type_allocate_init(module, {
.pointer = {
.element_type = element_type,
},
.id = TypeId::pointer,
.name = arena_join_string(module->arena, array_to_slice(name_parts)),
});
return result;
}
fn bool is_slice(Type* type)
{
switch (type->id)
{
case TypeId::structure:
{
return type->structure.is_slice;
}
default: return false;
}
}
fn Type* get_slice_type(Module* module, Type* element_type)
{
Type* slice_type = module->first_slice_type;
if (slice_type)
{
while (1)
{
assert(is_slice(slice_type));
assert(slice_type->structure.fields.length == 2);
auto* pointer_type = slice_type->structure.fields[0].type;
assert(pointer_type->id == TypeId::pointer);
auto* candidate_element_type = pointer_type->pointer.element_type;
if (candidate_element_type == element_type)
{
return slice_type;
}
if (!slice_type->structure.next)
{
break;
}
slice_type = slice_type->structure.next;
}
}
Type* last_slice_type = slice_type;
auto fields = arena_allocate<Field>(module->arena, 2);
fields[0] = {
.name = str("pointer"),
.type = get_pointer_type(module, element_type),
.offset = 0,
.line = 0,
};
fields[1] = {
.name = str("length"),
.type = integer_type(module, { .bit_count = 64, .is_signed = false }),
.offset = 8,
.line = 0,
};
String name_parts[] = {
str("[]"),
element_type->name,
};
auto result = type_allocate_init(module, {
.structure = {
.fields = fields,
.byte_size = 16,
.byte_alignment = 8,
.line = 0,
.is_slice = true,
},
.id = TypeId::structure,
.name = arena_join_string(module->arena, array_to_slice(name_parts)),
});
if (last_slice_type)
{
last_slice_type->next = result;
}
else
{
module->first_slice_type = result;
}
return result;
}
void parse(Module* module);
void emit(Module* module);

5
src/emitter.cpp Normal file
View File

@ -0,0 +1,5 @@
#include <compiler.h>
void emit(Module* module)
{
}

13
src/entry_point.cpp Normal file
View File

@ -0,0 +1,13 @@
#include <lib.h>
void entry_point(Slice<const char*> arguments, Slice<const char*> environment);
int main(int argc, const char* argv[], const char* envp[])
{
auto* envp_end = envp;
while (*envp_end)
{
envp_end += 1;
}
entry_point({argv, (u64)argc}, {envp, (u64)(envp_end - envp)});
return 0;
}

2
src/entry_point.h Normal file
View File

@ -0,0 +1,2 @@
#include <lib.h>

681
src/lib.h Normal file
View File

@ -0,0 +1,681 @@
#pragma once
#define global_variable static
#define fn static
#define unused(x) (void)(x)
#define breakpoint() __builtin_debugtrap()
#define string_literal_length(s) (sizeof(s) - 1)
#define string_literal(s) ((String){ .pointer = (u8*)(s), .length = string_literal_length(s), })
#define str(x) string_literal(x)
#define offsetof(S, f) __builtin_offsetof(S, f)
#define array_length(arr) sizeof(arr) / sizeof((arr)[0])
#define array_to_slice(arr) { .pointer = (arr), .length = array_length(arr) }
#define array_to_bytes(arr) { .pointer = (u8*)(arr), .length = sizeof(arr) }
#define backing_type(E) __underlying_type(E)
#define unreachable_raw() __builtin_unreachable()
#define trap_raw() __builtin_trap()
#if BB_DEBUG
#define unreachable() trap_raw()
#else
#define unreachable() unreachable_raw()
#endif
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define expect(x, b) __builtin_expect(!!(x), b)
#define likely(x) expect(x, 1)
#define unlikely(x) expect(x, 0)
#define assert(x) (unlikely(!(x)) ? unreachable() : unused(0))
#define clz(x) __builtin_clzg(x)
#define ctz(x) __builtin_ctzg(x)
#define case_to_name(E,n) case E::n: return str(#n)
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long s64;
typedef float f32;
typedef double f64;
fn u64 align_forward(u64 value, u64 alignment)
{
assert(alignment != 0);
auto mask = alignment - 1;
auto result = (value + mask) & ~mask;
return result;
}
constexpr u64 kb = 1024;
constexpr u64 mb = 1024 * 1024;
constexpr u64 gb = 1024 * 1024 * 1024;
extern "C" [[noreturn]] void exit(s32 status);
extern "C" void *memcpy (void* __restrict destination, const void *__restrict source, u64 byte_count);
extern "C" void *memcmp (const void* a, const void *b, u64 __n);
extern "C" const char* realpath(const char* __restrict path, char* resolved_path);
struct RawSlice
{
void* pointer;
u64 length;
};
fn bool raw_slice_equal(RawSlice a, RawSlice b, u64 size_of_T)
{
bool result = a.length == b.length;
if (result)
{
if (a.pointer != b.pointer)
{
result = memcmp(a.pointer, b.pointer, a.length * size_of_T) == 0;
}
}
return result;
}
fn RawSlice raw_slice_slice(RawSlice s, u64 start, u64 end, u64 size_of_T)
{
return {(u8*)s.pointer + (size_of_T * start), end - start};
}
template <typename T>
struct Slice
{
T* pointer;
u64 length;
T* begin()
{
return pointer;
}
T* end() {
return pointer + length;
}
T& operator[](u64 index)
{
assert(index < length);
return pointer[index];
}
bool equal(Slice<T> other)
{
return raw_slice_equal(*(RawSlice*)this, *(RawSlice*)&other, sizeof(T));
}
Slice<T> operator()(u64 start, u64 end)
{
return {pointer + start, end - start};
}
Slice<T> operator()(u64 start)
{
return {pointer + start, length - start};
}
};
using String = Slice<u8>;
fn const char* cstr(String string)
{
assert(string.pointer[string.length] == 0);
return (const char*) string.pointer;
}
fn String c_string_to_slice(const char* cstr)
{
const auto* end = cstr;
while (*end)
{
end += 1;
}
return { (u8*)cstr, u64(end - cstr) };
}
constexpr auto string_no_match = ~(u64)0;
fn u64 string_first_character(String string, u8 ch)
{
u64 result = string_no_match;
for (u64 i = 0; i < string.length; i += 1)
{
if (string[i] == ch)
{
result = i;
break;
}
}
return result;
}
fn u64 string_last_character(String string, u8 ch)
{
u64 result = string_no_match;
u64 i = string.length;
while (i > 0)
{
i -= 1;
if (string[i] == ch)
{
result = i;
break;
}
}
return result;
}
struct ProtectionFlags
{
u8 read:1;
u8 write:1;
u8 execute:1;
};
struct MapFlags
{
u8 priv:1;
u8 anonymous:1;
u8 no_reserve:1;
u8 populate:1;
};
struct PROT
{
u32 read:1;
u32 write:1;
u32 execute:1;
u32 sem:1;
u32 _:28;
};
static_assert(sizeof(PROT) == sizeof(u32));
struct MAP
{
enum class Type : u32
{
shared = 0,
priv = 1,
shared_validate = 2,
};
Type type:4;
u32 fixed:1;
u32 anonymous:1;
u32 bit32:1;
u32 _0: 1;
u32 grows_down:1;
u32 _1: 2;
u32 deny_write:1;
u32 executable:1;
u32 locked:1;
u32 no_reserve:1;
u32 populate:1;
u32 non_block:1;
u32 stack:1;
u32 huge_tlb:1;
u32 sync:1;
u32 fixed_no_replace:1;
u32 _2:5;
u32 uninitialized:1;
u32 _3:5;
};
static_assert(sizeof(MAP) == sizeof(u32));
struct OPEN
{
enum class AccessMode : u32
{
read_only = 0,
write_only = 1,
read_write = 2,
};
AccessMode access_mode:2;
u32 _0:4;
u32 creat:1;
u32 excl:1;
u32 no_ctty:1;
u32 trunc:1;
u32 append:1;
u32 non_block:1;
u32 d_sync:1;
u32 a_sync:1;
u32 direct:1;
u32 _1:1;
u32 directory:1;
u32 no_follow:1;
u32 no_a_time:1;
u32 cloexec:1;
u32 sync:1;
u32 path:1;
u32 tmp_file:1;
u32 _2:9;
};
static_assert(sizeof(OPEN) == sizeof(u32));
using uid_t = u32;
using gid_t = u32;
using off_t = s64;
using ino_t = u64;
using dev_t = u64;
struct timespec
{
s64 seconds;
s64 nanoseconds;
};
struct Stat
{
dev_t dev;
ino_t ino;
u64 nlink;
u32 mode;
uid_t uid;
gid_t gid;
u32 _0;
dev_t rdev;
off_t size;
s64 blksize;
s64 blocks;
timespec atim;
timespec mtim;
timespec ctim;
s64 _1[3];
};
extern "C" s32* __errno_location();
extern "C" void* mmap(void*, u64, PROT, MAP, s32, s64);
extern "C" s32 mprotect(void*, u64, PROT);
extern "C" s64 ptrace(s32, s32, u64, u64);
extern "C" s32 open(const char*, OPEN, ...);
extern "C" s32 close(s32);
extern "C" s32 fstat(s32, Stat*);
extern "C" s64 write(s32, u8*, u64);
extern "C" s64 read(s32, u8*, u64);
extern "C" s32 mkdir(const char*, u64);
enum class Error : u32
{
success = 0,
perm = 1,
};
fn Error errno()
{
return (Error)*__errno_location();
}
fn void* os_reserve(void* base, u64 size, ProtectionFlags protection, MapFlags map)
{
auto protection_flags = PROT
{
.read = protection.read,
.write = protection.write,
.execute = protection.execute,
};
auto map_flags = MAP
{
.type = map.priv ? MAP::Type::priv : MAP::Type::shared,
.anonymous = map.anonymous,
.no_reserve = map.no_reserve,
.populate = map.populate,
};
auto* address = mmap(base, size, protection_flags, map_flags, -1, 0);
assert((u64)address != ~(u64)0);
return address;
}
fn void os_commit(void* address, u64 size, ProtectionFlags protection)
{
auto protection_flags = PROT
{
.read = protection.read,
.write = protection.write,
.execute = protection.execute,
};
auto result = mprotect(address, size, protection_flags);
assert(!result);
}
struct OpenFlags
{
u32 truncate:1;
u32 execute:1;
u32 write:1;
u32 read:1;
u32 create:1;
u32 directory:1;
};
struct Permissions
{
u32 read:1;
u32 write:1;
u32 execute:1;
};
fn s32 os_open(String path, OpenFlags flags, Permissions permissions)
{
OPEN::AccessMode access_mode;
if (flags.read && flags.write)
{
access_mode = OPEN::AccessMode::read_write;
}
else if (flags.read)
{
access_mode = OPEN::AccessMode::read_only;
}
else if (flags.write)
{
access_mode = OPEN::AccessMode::read_only;
}
else
{
unreachable();
}
auto o = OPEN {
.access_mode = access_mode,
.creat = flags.create,
.trunc = flags.truncate,
.directory = flags.directory,
};
// TODO:
auto mode = permissions.execute ? 0755 : 0644;
auto fd = open(cstr(path), o, mode);
return fd;
}
fn bool is_file_valid(s32 fd)
{
return fd >= 0;
}
fn void os_close(s32 fd)
{
assert(is_file_valid(fd));
auto result = close(fd);
assert(result == 0);
}
fn u64 os_file_size(s32 fd)
{
Stat stat;
auto result = fstat(fd, &stat);
assert(result == 0);
return (u64)stat.size;
}
fn u64 os_read_partially(s32 fd, u8* buffer, u64 byte_count)
{
auto result = read(fd, buffer, byte_count);
assert(result > 0);
return (u64)result;
}
fn void os_read(s32 fd, String buffer, u64 byte_count)
{
assert(byte_count <= buffer.length);
u64 it_byte_count = 0;
while (it_byte_count < byte_count)
{
auto read_byte_count = os_read_partially(fd, buffer.pointer + it_byte_count, byte_count - it_byte_count);
it_byte_count += read_byte_count;
}
assert(it_byte_count == byte_count);
}
fn u64 os_write_partially(s32 fd, u8* buffer, u64 byte_count)
{
auto result = write(fd, buffer, byte_count);
assert(result > 0);
return (u64)result;
}
fn void os_write(s32 fd, String content)
{
u64 it_byte_count = 0;
while (it_byte_count < content.length)
{
auto written_byte_count = os_write_partially(fd, content.pointer + it_byte_count, content.length - it_byte_count);
it_byte_count += written_byte_count;
}
assert(it_byte_count == content.length);
}
fn String path_absolute_stack(String buffer, String relative_path)
{
const char* absolute_path = realpath(cstr(relative_path), (char*)buffer.pointer);
if (absolute_path)
{
auto slice = c_string_to_slice(absolute_path);
assert(slice.length < buffer.length);
return slice;
}
return {};
}
fn bool os_is_debugger_present()
{
bool result = false;
if (ptrace(0, 0, 0, 0) == -1)
{
auto errno_error = errno();
result = errno_error == Error::perm;
}
return result;
}
fn void make_directory(const char* path)
{
auto result = mkdir(path, 0755);
unused(result);
}
fn void print(String string)
{
os_write(1, string);
}
struct ArenaInitialization
{
u64 reserved_size;
u64 granularity;
u64 initial_size;
};
struct Arena
{
u64 reserved_size;
u64 position;
u64 os_position;
u64 granularity;
u8 reserved[32];
};
constexpr u64 arena_minimum_position = sizeof(Arena);
fn Arena* arena_initialize(ArenaInitialization i)
{
ProtectionFlags protection_flags = {
.read = 1,
.write = 1,
};
MapFlags map_flags = {
.priv = 1,
.anonymous = 1,
.no_reserve = 1,
};
auto* arena = (Arena*)os_reserve(0, i.reserved_size, protection_flags, map_flags);
os_commit(arena, i.initial_size, { .read = 1, .write = 1 });
*arena = {
.reserved_size = i.reserved_size,
.position = arena_minimum_position,
.os_position = i.initial_size,
.granularity = i.granularity,
};
return arena;
}
fn inline Arena* arena_initialize_default(u64 initial_size)
{
ArenaInitialization i = {
.reserved_size = 4 * gb,
.granularity = 4 * kb,
.initial_size = initial_size,
};
return arena_initialize(i);
}
fn void* arena_allocate_bytes(Arena* arena, u64 size, u64 alignment)
{
void* result = 0;
if (size)
{
auto aligned_offset = align_forward(arena->position, alignment);
auto aligned_size_after = aligned_offset + size;
if (aligned_size_after > arena->os_position)
{
unreachable();
}
result = (u8*)arena + aligned_offset;
arena->position = aligned_size_after;
assert(arena->position <= arena->os_position);
}
return result;
}
template <typename T>
fn Slice<T> arena_allocate(Arena* arena, u64 count)
{
return { (T*)arena_allocate_bytes(arena, sizeof(T) * count, alignof(T)), count };
}
fn String arena_join_string(Arena* arena, Slice<String> pieces)
{
u64 size = 0;
for (auto piece : pieces)
{
size += piece.length;
}
auto* pointer = (u8*)arena_allocate_bytes(arena, size + 1, 1);
u64 i = 0;
for (auto piece : pieces)
{
memcpy(pointer + i, piece.pointer, piece.length);
i += piece.length;
}
assert(i == size);
pointer[i] = 0;
return { pointer, size };
}
fn String arena_duplicate_string(Arena* arena, String string)
{
auto memory = (u8*)arena_allocate_bytes(arena, string.length + 1, 1);
memcpy(memory, string.pointer, string.length);
memory[string.length] = 0;
return { memory, string.length};
}
fn void arena_restore(Arena* arena, u64 position)
{
assert(position <= arena->position);
arena->position = position;
}
fn void arena_reset(Arena* arena)
{
arena->position = arena_minimum_position;
}
fn String path_absolute(Arena* arena, String relative_path)
{
u8 buffer[4096];
auto stack = path_absolute_stack(array_to_slice(buffer), relative_path);
auto result = arena_duplicate_string(arena, stack);
return result;
}
fn String file_read(Arena* arena, String file_path)
{
auto fd = os_open(file_path, { .read = 1 }, { .read = 1 });
String result = {};
if (is_file_valid(fd))
{
auto file_size = os_file_size(fd);
result = arena_allocate<u8>(arena, file_size);
os_read(fd, result, file_size);
os_close(fd);
}
return result;
}
[[noreturn]] fn void fail()
{
if (os_is_debugger_present())
{
trap_raw();
}
exit(1);
}
[[noreturn]] fn void fail_with_message(String string)
{
print(string);
fail();
}
fn u64 next_power_of_two(u64 n)
{
n -= 1;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
n |= n >> 32;
n += 1;
return n;
}

3817
src/parser.cpp Normal file

File diff suppressed because it is too large Load Diff