bloat-buster/src/parser.cpp
2025-05-02 08:10:55 -06:00

658 lines
19 KiB
C++

#include <compiler.h>
global_variable constexpr u8 left_bracket = '[';
global_variable constexpr u8 right_bracket = ']';
global_variable constexpr u8 left_brace = '{';
global_variable constexpr u8 right_brace = '}';
global_variable constexpr u8 left_parenthesis = '(';
global_variable constexpr u8 right_parenthesis = ')';
fn bool is_space(u8 ch)
{
return ((ch == ' ') | (ch == '\n')) | ((ch == '\t') | (ch == '\r'));
}
fn bool is_lower(u8 ch)
{
return ((ch >= 'a') & (ch <= 'z'));
}
fn bool is_upper(u8 ch)
{
return ((ch >= 'A') & (ch <= 'Z'));
}
fn bool is_decimal(u8 ch)
{
return ((ch >= '0') & (ch <= '9'));
}
fn bool is_hexadecimal_alpha_lower(u8 ch)
{
return ((ch >= 'a') & (ch <= 'f'));
}
fn bool is_hexadecimal_alpha_upper(u8 ch)
{
return ((ch >= 'A') & (ch <= 'F'));
}
fn bool is_hexadecimal_alpha(u8 ch)
{
return is_hexadecimal_alpha_lower(ch) | is_hexadecimal_alpha_upper(ch);
}
fn bool is_hexadecimal(u8 ch)
{
return is_decimal(ch) | is_hexadecimal_alpha(ch);
}
fn bool is_identifier_start(u8 ch)
{
return (is_lower(ch) | is_upper(ch)) | (ch == '_');
}
fn bool is_identifier(u8 ch)
{
return is_identifier_start(ch) | is_decimal(ch);
}
fn u32 get_line(Module& module)
{
auto line = module.line_offset + 1;
assert(line < ~(u32)0);
return (u32)line;
}
fn u32 get_column(Module& module)
{
auto column = module.offset - module.line_character_offset + 1;
assert(column < ~(u32)0);
return (u32)column;
}
struct Checkpoint
{
u64 offset;
u64 line_offset;
u64 line_character_offset;
};
fn Checkpoint get_checkpoint(Module& module)
{
return {
.offset = module.offset,
.line_offset = module.line_offset,
.line_character_offset = module.line_character_offset,
};
}
fn void set_checkpoint(Module& module, Checkpoint checkpoint)
{
module.offset = checkpoint.offset;
module.line_offset = checkpoint.line_offset;
module.line_character_offset = checkpoint.line_character_offset;
}
fn bool consume_character_if_match(Module& module, u8 expected_ch)
{
bool is_ch = false;
auto i = module.offset;
if (i < module.content.length)
{
auto ch = module.content[i];
is_ch = expected_ch == ch;
module.offset = i + is_ch;
}
return is_ch;
}
fn void expect_character(Module& module, u8 expected_ch)
{
if (!consume_character_if_match(module, expected_ch))
{
report_error();
}
}
fn void skip_space(Module& module)
{
while (1)
{
auto iteration_offset = module.offset;
while (module.offset < module.content.length)
{
auto ch = module.content[module.offset];
if (!is_space(ch))
{
break;
}
module.line_offset += ch == '\n';
module.line_character_offset = ch == '\n' ? module.offset : module.line_character_offset;
module.offset += 1;
}
if (module.offset + 1 < module.content.length)
{
auto i = module.offset;
auto first_ch = module.content[i];
auto second_ch = module.content[i + 1];
auto is_comment = first_ch == '/' && second_ch == '/';
if (is_comment)
{
while (module.offset < module.content.length)
{
auto ch = module.content[module.offset];
if (ch == '\n')
{
break;
}
module.offset += 1;
}
if (module.offset < module.content.length)
{
module.line_offset += 1;
module.line_character_offset = module.offset;
module.offset += 1;
}
}
}
if (module.offset - iteration_offset == 0)
{
break;
}
}
}
fn String parse_identifier(Module& module)
{
auto start = module.offset;
if (is_identifier_start(module.content[start]))
{
module.offset = start + 1;
while (module.offset < module.content.length)
{
auto i = module.offset;
if (is_identifier(module.content[i]))
{
module.offset = i + 1;
}
else
{
break;
}
}
}
auto end = module.offset;
if (end - start == 0)
{
report_error();
}
return module.content(start, end);
}
fn u64 accumulate_decimal(u64 accumulator, u8 ch)
{
return (accumulator * 10) + (ch - '0');
}
fn u64 parse_integer_decimal_assume_valid(String string)
{
u64 value = 0;
for (u8 ch: string)
{
assert(is_decimal(ch));
value = accumulate_decimal(value, ch);
}
return value;
}
fn Type* parse_type(Module& module)
{
auto start_character = module.content[module.offset];
if (is_identifier_start(start_character))
{
auto identifier = parse_identifier(module);
if (identifier.equal(str("void")))
{
return void_type(module);
}
else if (identifier.equal(str("noreturn")))
{
return noreturn_type(module);
}
else
{
auto is_int_type = identifier.length > 1 && (identifier[0] == 's' || identifier[0] == 'u');
if (is_int_type)
{
for (auto ch : identifier(1))
{
is_int_type = is_int_type && is_decimal(ch);
}
}
if (is_int_type)
{
bool is_signed;
switch (identifier[0])
{
case 's': is_signed = true; break;
case 'u': is_signed = false; break;
default: unreachable();
}
auto bit_count = parse_integer_decimal_assume_valid(identifier(1));
if (bit_count == 0)
{
report_error();
}
if (bit_count > 64)
{
if (bit_count != 128)
{
report_error();
}
}
auto result = integer_type(module, { .bit_count = (u32)bit_count, .is_signed = is_signed });
return result;
}
else
{
trap_raw();
}
}
}
else if (start_character == '&')
{
trap_raw();
}
else if (start_character == left_bracket)
{
trap_raw();
}
else if (start_character == '#')
{
trap_raw();
}
else
{
report_error();
}
}
void parse(Module& module)
{
while (1)
{
skip_space(module);
if (module.offset == module.content.length)
{
break;
}
bool is_export = false;
bool is_extern = false;
auto global_line = get_line(module);
auto global_column = get_column(module);
if (consume_character_if_match(module, left_bracket))
{
while (module.offset < module.content.length)
{
auto global_keyword_string = parse_identifier(module);
enum class GlobalKeyword
{
export_keyword,
extern_keyword,
count,
};
String global_keyword_strings[] = {
str("export"),
str("extern"),
};
static_assert(array_length(global_keyword_strings) == (u64)GlobalKeyword::count);
u32 i;
for (i = 0; i < array_length(global_keyword_strings); i += 1)
{
String keyword = global_keyword_strings[i];
if (keyword.equal(global_keyword_string))
{
break;
}
}
auto global_keyword = (GlobalKeyword)i;
switch (global_keyword)
{
case GlobalKeyword::export_keyword:
{
is_export = true;
} break;
case GlobalKeyword::extern_keyword:
{
is_extern = true;
} break;
case GlobalKeyword::count:
{
report_error();
}
}
if (consume_character_if_match(module, right_bracket))
{
break;
}
else
{
report_error();
}
}
skip_space(module);
}
auto global_name = parse_identifier(module);
Global* last_global = module.first_global;
while (last_global)
{
if (global_name.equal(last_global->variable.name))
{
report_error();
}
if (!last_global->next)
{
break;
}
last_global = last_global->next;
}
Type* type_it = module.first_type;
Type* forward_declaration = 0;
while (type_it)
{
if (global_name.equal(type_it->name))
{
if (type_it->id == TypeId::forward_declaration)
{
forward_declaration = type_it;
break;
}
else
{
report_error();
}
}
if (!type_it->next)
{
break;
}
type_it = type_it->next;
}
skip_space(module);
Type* global_type = 0;
if (consume_character_if_match(module, ':'))
{
skip_space(module);
global_type = parse_type(module);
skip_space(module);
}
expect_character(module, '=');
skip_space(module);
bool is_global_keyword = false;
enum class GlobalKeyword
{
bits,
enumerator,
function,
macro,
structure,
typealias,
union_type,
count,
};
auto i = (backing_type(GlobalKeyword))GlobalKeyword::count;
if (is_identifier_start(module.content[module.offset]))
{
auto checkpoint = get_checkpoint(module);
auto global_string = parse_identifier(module);
skip_space(module);
String global_keywords[] = {
str("bits"),
str("enum"),
str("fn"),
str("macro"),
str("struct"),
str("typealias"),
str("union"),
};
static_assert(array_length(global_keywords) == (u64)GlobalKeyword::count);
for (i = 0; i < (backing_type(GlobalKeyword))GlobalKeyword::count; i += 1)
{
String global_keyword = global_keywords[i];
if (global_string.equal(global_keyword))
{
break;
}
}
auto global_keyword = (GlobalKeyword)i;
switch (global_keyword)
{
case GlobalKeyword::bits:
{
trap_raw();
} break;
case GlobalKeyword::enumerator:
{
trap_raw();
} break;
case GlobalKeyword::function:
{
auto calling_convention = CallingConvention::c;
auto function_attributes = FunctionAttributes{};
bool is_variable_arguments = false;
if (consume_character_if_match(module, left_bracket))
{
while (module.offset < module.content.length)
{
auto function_identifier = parse_identifier(module);
enum class FunctionKeyword
{
cc,
count,
};
String function_keywords[] = {
str("cc"),
};
static_assert(array_length(function_keywords) == (u64)FunctionKeyword::count);
backing_type(FunctionKeyword) i;
for (i = 0; i < (backing_type(FunctionKeyword))(FunctionKeyword::count); i += 1)
{
auto function_keyword = function_keywords[i];
if (function_keyword.equal(function_identifier))
{
break;
}
}
auto function_keyword = (FunctionKeyword)i;
skip_space(module);
switch (function_keyword)
{
case FunctionKeyword::cc:
{
expect_character(module, left_parenthesis);
skip_space(module);
auto calling_convention_string = parse_identifier(module);
String calling_conventions[] = {
str("c"),
};
static_assert(array_length(calling_conventions) == (u64)CallingConvention::count);
backing_type(CallingConvention) i;
for (i = 0; i < (backing_type(CallingConvention))CallingConvention::count; i += 1)
{
auto calling_convention = calling_conventions[i];
if (calling_convention.equal(calling_convention_string))
{
break;
}
}
auto candidate_calling_convention = (CallingConvention)i;
if (candidate_calling_convention == CallingConvention::count)
{
report_error();
}
calling_convention = candidate_calling_convention;
skip_space(module);
expect_character(module, right_parenthesis);
} break;
case FunctionKeyword::count:
{
report_error();
} break;
}
skip_space(module);
if (consume_character_if_match(module, right_bracket))
{
break;
}
else
{
report_error();
}
}
}
skip_space(module);
expect_character(module, left_parenthesis);
Type* semantic_argument_type_buffer[64];
String semantic_argument_name_buffer[64];
u64 semantic_argument_count = 0;
while (module.offset < module.content.length)
{
skip_space(module);
if (consume_character_if_match(module, '.'))
{
expect_character(module, '.');
expect_character(module, '.');
skip_space(module);
expect_character(module, right_parenthesis);
is_variable_arguments = true;
break;
}
if (consume_character_if_match(module, right_parenthesis))
{
break;
}
auto argument_name = parse_identifier(module);
semantic_argument_name_buffer[semantic_argument_count] = argument_name;
skip_space(module);
expect_character(module, ':');
skip_space(module);
auto argument_type = parse_type(module);
semantic_argument_type_buffer[semantic_argument_count] = argument_type;
skip_space(module);
unused(consume_character_if_match(module, ','));
semantic_argument_count += 1;
}
skip_space(module);
auto return_type = parse_type(module);
Slice<Type*> argument_types = {};
if (semantic_argument_count != 0)
{
trap_raw();
}
trap_raw();
} break;
case GlobalKeyword::macro:
{
trap_raw();
} break;
case GlobalKeyword::structure:
{
trap_raw();
} break;
case GlobalKeyword::typealias:
{
trap_raw();
} break;
case GlobalKeyword::union_type:
{
trap_raw();
} break;
case GlobalKeyword::count:
{
set_checkpoint(module, checkpoint);
} break;
}
}
if (i == (backing_type(GlobalKeyword))GlobalKeyword::count)
{
trap_raw();
}
}
}