David Gonzalez Martin e07d65cd54 First iteration
2024-06-28 09:12:53 +02:00

2934 lines
75 KiB
C++

#include <fcntl.h>
#include <stdint.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/mman.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t s8;
typedef int16_t s16;
typedef int32_t s32;
typedef int64_t s64;
typedef float f32;
typedef double f64;
typedef u32 Hash;
#define fn static
#define global static
#define assert(x) if (__builtin_expect(!(x), 0)) { trap(); }
#define forceinline __attribute__((always_inline))
#define expect(x, b) __builtin_expect(x, b)
#define trap() __builtin_trap()
#define array_length(arr) sizeof(arr) / sizeof((arr)[0])
#define page_size (0x1000)
#define unused(x) (void)(x)
#define KB(n) ((n) * 1024)
#define MB(n) ((n) * 1024 * 1024)
#define GB(n) ((u64)(n) * 1024 * 1024 * 1024)
#define TB(n) ((u64)(n) * 1024 * 1024 * 1024 * 1024)
template<typename T>
struct DynamicList
{
T* pointer;
u64 count;
DynamicList* next;
};
template<typename T, u64 count>
struct StaticList
{
u64 length;
StaticList* next;
T array[count];
};
extern "C" void* memcpy(void* __restrict dst, void* __restrict src, u64 size)
{
auto* destination = (u8*)dst;
auto* source = (u8*)src;
for (u64 i = 0; i < size; i += 1)
{
destination[i] = source[i];
}
return dst;
}
extern "C" void* memset(void* dst, u8 n, u64 size)
{
auto* destination = (u8*)dst;
for (u64 i = 0; i < size; i += 1)
{
destination[i] = n;
}
return dst;
}
extern "C" int memcmp(const void* left, const void* right, u64 n)
{
const u8 *l=(const u8*)left, *r=(const u8*)right;
for (; n && *l == *r; n--, l++, r++);
return n ? *l - *r : 0;
}
template<typename T>
forceinline fn u8 mem_equal_range(T* a, T* b, u64 count)
{
return memcmp(a, b, count * sizeof(T)) == 0;
}
template <typename T>
forceinline fn T min(T a, T b)
{
return a < b ? a : b;
}
template <typename T>
forceinline fn T max(T a, T b)
{
return a > b ? a : b;
}
template<typename T>
struct Slice
{
T* pointer;
u64 length;
T& operator[](u64 index)
{
assert(index < length);
return pointer[index];
}
Slice slice(u64 start, u64 end)
{
return {
.pointer = pointer + start,
.length = end - start,
};
}
forceinline u8 equal(Slice other)
{
if (length == other.length)
{
return mem_equal_range(pointer, other.pointer, length);
}
else
{
return 0;
}
}
forceinline T* begin()
{
return pointer;
}
forceinline T* end()
{
return pointer + length;
}
};
using String = Slice<u8>;
#define strlit(s) String{ .pointer = (u8*)s, .length = sizeof(s) - 1, }
#define ch_to_str(ch) String{ .pointer = &ch, .length = 1 }
global auto constexpr fnv_offset = 14695981039346656037ull;
global auto constexpr fnv_prime = 1099511628211ull;
fn Hash hash_bytes(String bytes)
{
u64 result = fnv_offset;
for (u64 i = 0; i < bytes.length; i += 1)
{
result ^= bytes.pointer[i];
result *= fnv_prime;
}
return (Hash)result;
}
// fn forceinline long syscall0(long n)
// {
// unsigned long ret;
// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n) : "rcx", "r11", "memory");
// return ret;
// }
fn forceinline long syscall1(long n, long a1)
{
unsigned long ret;
__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "rcx", "r11", "memory");
return ret;
}
fn forceinline long syscall2(long n, long a1, long a2)
{
unsigned long ret;
__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2)
: "rcx", "r11", "memory");
return ret;
}
fn forceinline long syscall3(long n, long a1, long a2, long a3)
{
unsigned long ret;
__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
"d"(a3) : "rcx", "r11", "memory");
return ret;
}
// fn forceinline long syscall4(long n, long a1, long a2, long a3, long a4)
// {
// unsigned long ret;
// register long r10 __asm__("r10") = a4;
// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
// "d"(a3), "r"(r10): "rcx", "r11", "memory");
// return ret;
// }
// fn forceinline long syscall5(long n, long a1, long a2, long a3, long a4, long a5)
// {
// unsigned long ret;
// register long r10 __asm__("r10") = a4;
// register long r8 __asm__("r8") = a5;
// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
// "d"(a3), "r"(r10), "r"(r8) : "rcx", "r11", "memory");
// return ret;
// }
fn forceinline long syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
{
unsigned long ret;
register long r10 __asm__("r10") = a4;
register long r8 __asm__("r8") = a5;
register long r9 __asm__("r9") = a6;
__asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2),
"d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory");
return ret;
}
fn u8 memeq(u8* a, u8* b, u64 size)
{
for (u64 i = 0; i < size; i += 1)
{
if (a[i] != b[i])
{
return 0;
}
}
return 1;
}
enum class SyscallX86_64 : u64 {
read = 0,
write = 1,
open = 2,
close = 3,
stat = 4,
fstat = 5,
lstat = 6,
poll = 7,
lseek = 8,
mmap = 9,
mprotect = 10,
munmap = 11,
brk = 12,
rt_sigaction = 13,
rt_sigprocmask = 14,
rt_sigreturn = 15,
ioctl = 16,
pread64 = 17,
pwrite64 = 18,
readv = 19,
writev = 20,
access = 21,
pipe = 22,
select = 23,
sched_yield = 24,
mremap = 25,
msync = 26,
mincore = 27,
madvise = 28,
shmget = 29,
shmat = 30,
shmctl = 31,
dup = 32,
dup2 = 33,
pause = 34,
nanosleep = 35,
getitimer = 36,
alarm = 37,
setitimer = 38,
getpid = 39,
sendfile = 40,
socket = 41,
connect = 42,
accept = 43,
sendto = 44,
recvfrom = 45,
sendmsg = 46,
recvmsg = 47,
shutdown = 48,
bind = 49,
listen = 50,
getsockname = 51,
getpeername = 52,
socketpair = 53,
setsockopt = 54,
getsockopt = 55,
clone = 56,
fork = 57,
vfork = 58,
execve = 59,
exit = 60,
wait4 = 61,
kill = 62,
uname = 63,
semget = 64,
semop = 65,
semctl = 66,
shmdt = 67,
msgget = 68,
msgsnd = 69,
msgrcv = 70,
msgctl = 71,
fcntl = 72,
flock = 73,
fsync = 74,
fdatasync = 75,
truncate = 76,
ftruncate = 77,
getdents = 78,
getcwd = 79,
chdir = 80,
fchdir = 81,
rename = 82,
mkdir = 83,
rmdir = 84,
creat = 85,
link = 86,
unlink = 87,
symlink = 88,
readlink = 89,
chmod = 90,
fchmod = 91,
chown = 92,
fchown = 93,
lchown = 94,
umask = 95,
gettimeofday = 96,
getrlimit = 97,
getrusage = 98,
sysinfo = 99,
times = 100,
ptrace = 101,
getuid = 102,
syslog = 103,
getgid = 104,
setuid = 105,
setgid = 106,
geteuid = 107,
getegid = 108,
setpgid = 109,
getppid = 110,
getpgrp = 111,
setsid = 112,
setreuid = 113,
setregid = 114,
getgroups = 115,
setgroups = 116,
setresuid = 117,
getresuid = 118,
setresgid = 119,
getresgid = 120,
getpgid = 121,
setfsuid = 122,
setfsgid = 123,
getsid = 124,
capget = 125,
capset = 126,
rt_sigpending = 127,
rt_sigtimedwait = 128,
rt_sigqueueinfo = 129,
rt_sigsuspend = 130,
sigaltstack = 131,
utime = 132,
mknod = 133,
uselib = 134,
personality = 135,
ustat = 136,
statfs = 137,
fstatfs = 138,
sysfs = 139,
getpriority = 140,
setpriority = 141,
sched_setparam = 142,
sched_getparam = 143,
sched_setscheduler = 144,
sched_getscheduler = 145,
sched_get_priority_max = 146,
sched_get_priority_min = 147,
sched_rr_get_interval = 148,
mlock = 149,
munlock = 150,
mlockall = 151,
munlockall = 152,
vhangup = 153,
modify_ldt = 154,
pivot_root = 155,
_sysctl = 156,
prctl = 157,
arch_prctl = 158,
adjtimex = 159,
setrlimit = 160,
chroot = 161,
sync = 162,
acct = 163,
settimeofday = 164,
mount = 165,
umount2 = 166,
swapon = 167,
swapoff = 168,
reboot = 169,
sethostname = 170,
setdomainname = 171,
iopl = 172,
ioperm = 173,
create_module = 174,
init_module = 175,
delete_module = 176,
get_kernel_syms = 177,
query_module = 178,
quotactl = 179,
nfsservctl = 180,
getpmsg = 181,
putpmsg = 182,
afs_syscall = 183,
tuxcall = 184,
security = 185,
gettid = 186,
readahead = 187,
setxattr = 188,
lsetxattr = 189,
fsetxattr = 190,
getxattr = 191,
lgetxattr = 192,
fgetxattr = 193,
listxattr = 194,
llistxattr = 195,
flistxattr = 196,
removexattr = 197,
lremovexattr = 198,
fremovexattr = 199,
tkill = 200,
time = 201,
futex = 202,
sched_setaffinity = 203,
sched_getaffinity = 204,
set_thread_area = 205,
io_setup = 206,
io_destroy = 207,
io_getevents = 208,
io_submit = 209,
io_cancel = 210,
get_thread_area = 211,
lookup_dcookie = 212,
epoll_create = 213,
epoll_ctl_old = 214,
epoll_wait_old = 215,
remap_file_pages = 216,
getdents64 = 217,
set_tid_address = 218,
restart_syscall = 219,
semtimedop = 220,
fadvise64 = 221,
timer_create = 222,
timer_settime = 223,
timer_gettime = 224,
timer_getoverrun = 225,
timer_delete = 226,
clock_settime = 227,
clock_gettime = 228,
clock_getres = 229,
clock_nanosleep = 230,
exit_group = 231,
epoll_wait = 232,
epoll_ctl = 233,
tgkill = 234,
utimes = 235,
vserver = 236,
mbind = 237,
set_mempolicy = 238,
get_mempolicy = 239,
mq_open = 240,
mq_unlink = 241,
mq_timedsend = 242,
mq_timedreceive = 243,
mq_notify = 244,
mq_getsetattr = 245,
kexec_load = 246,
waitid = 247,
add_key = 248,
request_key = 249,
keyctl = 250,
ioprio_set = 251,
ioprio_get = 252,
inotify_init = 253,
inotify_add_watch = 254,
inotify_rm_watch = 255,
migrate_pages = 256,
openat = 257,
mkdirat = 258,
mknodat = 259,
fchownat = 260,
futimesat = 261,
fstatat64 = 262,
unlinkat = 263,
renameat = 264,
linkat = 265,
symlinkat = 266,
readlinkat = 267,
fchmodat = 268,
faccessat = 269,
pselect6 = 270,
ppoll = 271,
unshare = 272,
set_robust_list = 273,
get_robust_list = 274,
splice = 275,
tee = 276,
sync_file_range = 277,
vmsplice = 278,
move_pages = 279,
utimensat = 280,
epoll_pwait = 281,
signalfd = 282,
timerfd_create = 283,
eventfd = 284,
fallocate = 285,
timerfd_settime = 286,
timerfd_gettime = 287,
accept4 = 288,
signalfd4 = 289,
eventfd2 = 290,
epoll_create1 = 291,
dup3 = 292,
pipe2 = 293,
inotify_init1 = 294,
preadv = 295,
pwritev = 296,
rt_tgsigqueueinfo = 297,
perf_event_open = 298,
recvmmsg = 299,
fanotify_init = 300,
fanotify_mark = 301,
prlimit64 = 302,
name_to_handle_at = 303,
open_by_handle_at = 304,
clock_adjtime = 305,
syncfs = 306,
sendmmsg = 307,
setns = 308,
getcpu = 309,
process_vm_readv = 310,
process_vm_writev = 311,
kcmp = 312,
finit_module = 313,
sched_setattr = 314,
sched_getattr = 315,
renameat2 = 316,
seccomp = 317,
getrandom = 318,
memfd_create = 319,
kexec_file_load = 320,
bpf = 321,
execveat = 322,
userfaultfd = 323,
membarrier = 324,
mlock2 = 325,
copy_file_range = 326,
preadv2 = 327,
pwritev2 = 328,
pkey_mprotect = 329,
pkey_alloc = 330,
pkey_free = 331,
statx = 332,
io_pgetevents = 333,
rseq = 334,
pidfd_send_signal = 424,
io_uring_setup = 425,
io_uring_enter = 426,
io_uring_register = 427,
open_tree = 428,
move_mount = 429,
fsopen = 430,
fsconfig = 431,
fsmount = 432,
fspick = 433,
pidfd_open = 434,
clone3 = 435,
close_range = 436,
openat2 = 437,
pidfd_getfd = 438,
faccessat2 = 439,
process_madvise = 440,
epoll_pwait2 = 441,
mount_setattr = 442,
quotactl_fd = 443,
landlock_create_ruleset = 444,
landlock_add_rule = 445,
landlock_restrict_self = 446,
memfd_secret = 447,
process_mrelease = 448,
futex_waitv = 449,
set_mempolicy_home_node = 450,
cachestat = 451,
fchmodat2 = 452,
map_shadow_stack = 453,
futex_wake = 454,
futex_wait = 455,
futex_requeue = 456,
};
fn void* syscall_mmap(void* address, size_t length, int protection_flags, int map_flags, int fd, __off_t offset)
{
return (void*) syscall6(static_cast<long>(SyscallX86_64::mmap), (unsigned long)address, length, protection_flags, map_flags, fd, offset);
}
fn int syscall_mprotect(void *address, size_t length, int protection_flags)
{
return syscall3(static_cast<long>(SyscallX86_64::mprotect), (unsigned long)address, length, protection_flags);
}
fn int syscall_open(const char *file_path, int flags, int mode)
{
return syscall3(static_cast<long>(SyscallX86_64::open), (unsigned long)file_path, flags, mode);
}
fn int syscall_fstat(int fd, struct stat *buffer)
{
return syscall2(static_cast<long>(SyscallX86_64::fstat), fd, (unsigned long)buffer);
}
fn ssize_t syscall_read(int fd, void* buffer, size_t bytes)
{
return syscall3(static_cast<long>(SyscallX86_64::read), fd, (unsigned long)buffer, bytes);
}
fn ssize_t syscall_write(int fd, const void *buffer, size_t bytes)
{
return syscall3(static_cast<long>(SyscallX86_64::write), fd, (unsigned long)buffer, bytes);
}
[[noreturn]] [[gnu::cold]] fn void syscall_exit(int status)
{
(void)syscall1(231, status);
trap();
}
[[noreturn]] [[gnu::cold]] fn void fail()
{
trap();
syscall_exit(1);
}
fn void* reserve(u64 size, u8 huge_pages)
{
int protection_flags = PROT_NONE;
int map_flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | (huge_pages ? MAP_HUGETLB : 0);
void* result = syscall_mmap(0, size, protection_flags, map_flags, -1, 0);
assert(result != MAP_FAILED);
return result;
}
fn void commit(void* address, u64 size)
{
int result = syscall_mprotect(address, size, PROT_READ | PROT_WRITE);
assert(result == 0);
}
fn u64 align_forward(u64 value, u64 alignment)
{
u64 mask = alignment - 1;
u64 result = (value + mask) & ~mask;
return result;
}
struct Arena
{
u64 reserved_size;
u64 commited;
u64 commit_position;
u64 granularity;
u8 reserved[4 * 8];
global auto constexpr minimum_granularity = KB(4);
global auto constexpr middle_granularity = MB(2);
global auto constexpr page_granularity = page_size;
global auto constexpr default_size = GB(4);
fn Arena* init(u64 reserved_size, u64 granularity, u64 initial_size)
{
assert(initial_size % granularity == 0);
Arena* arena = (Arena*)reserve(reserved_size, granularity != minimum_granularity);
commit(arena, initial_size);
*arena = {
.reserved_size = reserved_size,
.commited = initial_size,
.commit_position = sizeof(Arena),
.granularity = granularity,
};
return arena;
}
fn Arena* init_default(u64 initial_size)
{
return init(default_size, minimum_granularity, initial_size);
}
void* allocate_bytes(u64 size, u64 alignment)
{
u64 aligned_offset = align_forward(commit_position, alignment);
u64 aligned_size_after = aligned_offset + size;
if (aligned_size_after <= commited)
{
void* result = (u8*)this + aligned_offset;
commit_position = aligned_size_after;
return result;
}
else
{
trap();
}
}
template<typename T>
T* allocate_many(u64 count)
{
return (T*)allocate_bytes(sizeof(T) * count, alignof(T));
}
template<typename T>
T* allocate_one()
{
return allocate_many<T>(1);
}
template<typename T>
T* allocate_slice(u64 count)
{
return {
.pointer = allocate_many<T>(count),
.length = count,
};
}
};
static_assert(sizeof(Arena) == 64, "Arena must be cache aligned");
template<typename Destination, typename Source>
fn forceinline Destination transmute(Source source)
{
static_assert(sizeof(Source) == sizeof(Destination));
return *(Destination*)&source;
}
fn String file_read(Arena* arena, String path)
{
String result = {};
int file_descriptor = syscall_open((char*)path.pointer, 0, 0);
assert(file_descriptor != -1);
struct stat stat_buffer;
int stat_result = syscall_fstat(file_descriptor, &stat_buffer);
assert(stat_result == 0);
u64 file_size = stat_buffer.st_size;
result = {
.pointer = (u8*)arena->allocate_bytes(file_size, 64),
.length = file_size,
};
// TODO: big files
ssize_t read_result = syscall_read(file_descriptor, result.pointer, result.length);
assert(read_result >= 0);
assert((u64)read_result == file_size);
return result;
}
fn void print(String message)
{
ssize_t result = syscall_write(1, message.pointer, message.length);
assert(result >= 0);
assert((u64)result == message.length);
}
template<typename T> struct PinnedArray;
fn void generic_pinned_array_ensure_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T);
fn u8* generic_pinned_array_add_with_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T);
template <typename T>
struct PinnedArray
{
T* pointer;
u32 length;
u32 committed;
global constexpr auto granularity = page_size;
global constexpr auto reserved_size = ((u64)GB(4) - granularity);
forceinline T& operator[](u32 index)
{
assert(index < length);
return pointer[index];
}
forceinline void ensure_capacity(u32 additional)
{
auto generic_array = (PinnedArray<u8>*)(this);
generic_pinned_array_ensure_capacity(generic_array, additional, sizeof(T));
}
forceinline Slice<T> add_with_capacity(u32 additional)
{
auto generic_array = (PinnedArray<u8>*)(this);
auto pointer = generic_pinned_array_add_with_capacity(generic_array, additional, sizeof(T));
return {
.pointer = (T*)pointer,
.length = additional,
};
}
// generic_pinned_array_ensure_capacity(array, additional_T, size_of_T);
// u8* result = generic_pinned_array_add_with_capacity(array, additional_T, size_of_T);
forceinline Slice<T> add(u32 additional)
{
ensure_capacity(additional);
auto slice = add_with_capacity(additional);
return slice;
}
forceinline T* add_one()
{
return add(1).pointer;
}
};
forceinline fn u32 generic_pinned_array_length(PinnedArray<u8>* array, u32 size_of_T)
{
u32 current_length_bytes = array->length * size_of_T;
return current_length_bytes;
}
fn void generic_pinned_array_ensure_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T)
{
if (array->committed == 0)
{
assert(array->length == 0);
assert(array->pointer == 0);
array->pointer = static_cast<u8*>(reserve(PinnedArray<u8>::reserved_size, 0));
}
u32 additional_bytes = additional_T * size_of_T;
u32 current_length_bytes = generic_pinned_array_length(array, size_of_T);
u64 granularity_aligned_commit_bytes = align_forward(current_length_bytes, PinnedArray<u8>::granularity);
u64 new_length_bytes = current_length_bytes + additional_bytes;
if (granularity_aligned_commit_bytes < new_length_bytes)
{
assert(new_length_bytes <= PinnedArray<u8>::reserved_size);
u64 new_granularity_aligned_commit_bytes = align_forward(new_length_bytes, PinnedArray<u8>::granularity);
u8* commit_pointer = array->pointer + granularity_aligned_commit_bytes;
u64 commit_bytes = new_granularity_aligned_commit_bytes + granularity_aligned_commit_bytes;
commit(commit_pointer, commit_bytes);
array->committed += commit_bytes / PinnedArray<u8>::granularity;
}
}
fn u8* generic_pinned_array_add_with_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T)
{
u32 current_length_bytes = generic_pinned_array_length(array, size_of_T);
assert(current_length_bytes < PinnedArray<u8>::reserved_size);
u8* pointer = array->pointer;
array->length += additional_T;
return pointer;
}
template <typename K, typename V> struct PinnedHashmap;
template <typename K, typename V>
struct GetOrPut
{
K* key;
V* value;
u8 existing;
};
fn GetOrPut<u8, u8> generic_pinned_hashmap_get_or_put(PinnedHashmap<u8, u8>* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size);
template<typename K, typename V>
struct PinnedHashmap
{
K* keys;
V* values;
u32 length;
u16 key_page_capacity;
u16 value_page_capacity;
global constexpr auto invalid_index = ~0u;
global constexpr auto granularity = PinnedArray<V>::granularity;
global constexpr auto reserved_size = PinnedArray<V>::reserved_size;
static_assert(granularity % sizeof(K) == 0, "");
static_assert(granularity % sizeof(V) == 0, "");
forceinline GetOrPut<K, V> get_or_put(K key, V value)
{
auto* generic_hashmap = (PinnedHashmap<u8, u8>*)(this);
auto generic_get_or_put = generic_pinned_hashmap_get_or_put(generic_hashmap, (u8*)&key, sizeof(K), (u8*)&value, sizeof(V));
return *(GetOrPut<K, V>*)&generic_get_or_put;
}
};
// Returns the generic value pointer if the key is present
fn u32 generic_pinned_hashmap_get_index(PinnedHashmap<u8, u8>* hashmap, u8* key_pointer, u32 key_size)
{
u32 index = hashmap->invalid_index;
for (u32 i = 0; i < hashmap->length; i += 1)
{
u8* it_key_pointer = &hashmap->keys[i * key_size];
if (memeq(it_key_pointer, key_pointer, key_size))
{
index = (it_key_pointer - hashmap->keys) / key_size;
break;
}
}
return index;
}
fn void generic_pinned_hashmap_ensure_capacity(PinnedHashmap<u8, u8>* hashmap, u32 key_size, u32 value_size, u32 additional_elements)
{
if (additional_elements != 0)
{
if (hashmap->key_page_capacity == 0)
{
assert(hashmap->value_page_capacity == 0);
hashmap->keys = (u8*)reserve(hashmap->reserved_size, 0);
hashmap->values = (u8*)reserve(hashmap->reserved_size, 0);
}
u32 target_element_capacity = hashmap->length + additional_elements;
{
u32 key_byte_capacity = hashmap->key_page_capacity * hashmap->granularity;
u32 target_byte_capacity = target_element_capacity * key_size;
if (key_byte_capacity < target_byte_capacity)
{
u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity);
void* commit_pointer = hashmap->keys + key_byte_capacity;
u32 commit_size = aligned_target_byte_capacity - key_byte_capacity;
commit(commit_pointer, commit_size);
hashmap->key_page_capacity = aligned_target_byte_capacity / hashmap->granularity;
}
}
{
u32 value_byte_capacity = hashmap->value_page_capacity * hashmap->granularity;
u32 target_byte_capacity = target_element_capacity * value_size;
if (value_byte_capacity < target_byte_capacity)
{
u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity);
void* commit_pointer = hashmap->values + value_byte_capacity;
u32 commit_size = aligned_target_byte_capacity - value_byte_capacity;
commit(commit_pointer, commit_size);
hashmap->value_page_capacity = aligned_target_byte_capacity / hashmap->granularity;
}
}
}
}
fn GetOrPut<u8, u8> generic_pinned_hashmap_get_or_put(PinnedHashmap<u8, u8>* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size)
{
u32 index = generic_pinned_hashmap_get_index(hashmap, new_key_pointer, key_size);
if (index != hashmap->invalid_index)
{
trap();
}
else
{
generic_pinned_hashmap_ensure_capacity(hashmap, key_size, value_size, 1);
u32 new_index = hashmap->length;
hashmap->length += 1;
u8* key_pointer = &hashmap->keys[new_index * key_size];
u8* value_pointer = &hashmap->values[new_index * value_size];
memcpy(key_pointer, new_key_pointer, key_size);
memcpy(value_pointer, new_value_pointer, value_size);
return {
.key = key_pointer,
.value = value_pointer,
.existing = 0,
};
}
}
typedef enum FileStatus
{
FILE_STATUS_ADDED = 0,
FILE_STATUS_QUEUED = 1,
FILE_STATUS_READ = 2,
FILE_STATUS_ANALYZING = 3,
} FileStatus;
struct File
{
String path;
String source_code;
FileStatus status;
};
struct SemaType;
union Type
{
enum Id
{
sema,
backend,
};
u64 bits:57;
Id id:1;
forceinline SemaType* get_sema()
{
assert(id == Id::sema);
return (SemaType*)(bits);
}
forceinline u8 is_resolved();
};
static_assert(sizeof(Type) == 8);
enum class SemaTypeId: u8
{
VOID,
NORETURN,
POINTER,
INTEGER,
ARRAY,
STRUCT,
UNION,
COUNT,
};
global auto constexpr type_id_bit_count = 3;
static_assert(static_cast<u8>(SemaTypeId::COUNT) < (1 << type_id_bit_count), "Type bit count for id must be respected");
global auto constexpr type_flags_bit_count = 32 - (type_id_bit_count + 1);
struct SemaType
{
u64 size;
u64 alignment;
SemaTypeId id : type_id_bit_count;
u32 resolved: 1;
u32 flags: type_flags_bit_count;
u32 name;
u8 get_bit_count()
{
assert(id == SemaTypeId::INTEGER);
u32 bit_count_mask = (1 << (type_flags_bit_count - 1)) - 1;
u8 bit_count = flags & bit_count_mask;
assert(bit_count <= size * 8);
assert(bit_count <= 64);
return bit_count;
}
};
static_assert(sizeof(SemaType) == sizeof(u64) * 3, "Type must be 24 bytes");
forceinline u8 Type::is_resolved()
{
return (id == Id::backend) | ((id == Id::sema) & get_sema()->resolved);
}
struct Symbol
{
enum class Id: u8
{
variable,
function,
};
enum class Linkage: u8
{
internal,
external,
};
u32 name;
Id id: 1;
Linkage linkage: 1;
};
typedef enum AbiInfoKind : u8
{
ABI_INFO_IGNORE,
ABI_INFO_DIRECT,
ABI_INFO_DIRECT_PAIR,
ABI_INFO_DIRECT_COERCE,
ABI_INFO_DIRECT_COERCE_INT,
ABI_INFO_DIRECT_SPLIT_STRUCT_I32,
ABI_INFO_EXPAND_COERCE,
ABI_INFO_INDIRECT,
ABI_INFO_EXPAND,
} AbiInfoKind;
enum class Side : u8
{
left,
right,
};
struct NodeDataType
{
enum class Id : u8
{
VOID,
INTEGER,
TUPLE,
CONTROL,
MEMORY,
POINTER,
};
Id id;
u8 bit_count:5;
};
union AbiInfoPayload
{
NodeDataType direct;
NodeDataType direct_pair[2];
NodeDataType direct_coerce;
struct
{
NodeDataType type;
u32 alignment;
} indirect;
};
typedef union AbiInfoPayload AbiInfoPayload;
struct AbiInfoAttributes
{
u8 by_reg: 1;
u8 zero_extend: 1;
u8 sign_extend: 1;
u8 realign: 1;
u8 by_value: 1;
};
typedef struct AbiInfoAttributes AbiInfoAttributes;
struct AbiInfo
{
AbiInfoPayload payload;
u16 indices[2];
AbiInfoAttributes attributes;
AbiInfoKind kind;
};
struct FunctionPrototype
{
AbiInfo* argument_type_abis; // The count for this array is "original_argument_count", not "abi_argument_count"
SemaType** original_argument_types;
// TODO: are these needed?
// Node::DataType* abi_argument_types;
// u32 abi_argument_count;
SemaType* original_return_type;
AbiInfo return_type_abi;
u32 original_argument_count;
// TODO: is this needed?
// Node::DataType abi_return_type;
u8 varags:1;
};
struct Function;
global auto constexpr void_type_index = 0;
global auto constexpr noreturn_type_index = 1;
global auto constexpr opaque_pointer_type_index = 2;
// global auto constexpr f32_type_offset = 3;
// global auto constexpr f64_type_offset = 4;
global auto constexpr integer_type_offset = 5;
global auto constexpr integer_type_count = 64 * 2;
global auto constexpr builtin_type_count = integer_type_count + integer_type_offset + 1;
struct Unit
{
PinnedArray<File> files;
PinnedArray<Function> functions;
Arena* arena;
Arena* node_arena;
Arena* type_arena;
PinnedHashmap<Hash, String> identifiers;
SemaType* builtin_types;
u64 generate_debug_information : 1;
SemaType* get_integer_type(u8 bit_count, u8 signedness)
{
auto index = integer_type_offset + signedness * 64 + bit_count - 1;
return &builtin_types[index];
}
};
struct Node;
struct Function
{
Symbol symbol;
Node* root_node;
Node** parameters;
FunctionPrototype prototype;
u32 node_count;
u16 parameter_count;
};
struct ProjectionData
{
NodeDataType type;
u16 index;
};
// This is a node in the "sea of nodes" sense:
// https://en.wikipedia.org/wiki/Sea_of_nodes
struct Node
{
enum class Id: u8
{
ROOT,
PROJECTION,
RETURN,
CONSTANT_INT,
};
static_assert(sizeof(NodeDataType) <= 2);
Node** edges;
u32 gvn;
u16 input_count;
u16 output_count;
u16 capacity;
NodeDataType data_type;
Id id;
union
{
struct
{
u32 index;
} projection;
u64 constant_int;
};
forceinline Slice<Node*> get_inputs()
{
return {
.pointer = edges,
.length = input_count,
};
}
forceinline Slice<Node*> get_outputs()
{
return {
.pointer = edges + input_count,
.length = output_count,
};
}
[[nodiscard]] fn Node* add(Unit* unit)
{
Node* node = unit->node_arena->allocate_one<Node>();
*node = {};
return node;
}
[[nodiscard]] fn Node* add_from_function(Unit* unit, Function* function)
{
u32 gvn = function->node_count;
function->node_count += 1;
Node* node = unit->node_arena->allocate_one<Node>();
*node = {
.gvn = gvn,
};
return node;
}
Node* project(Unit* unit, Function* function, ProjectionData data)
{
assert(data_type.id == NodeDataType::Id::TUPLE);
Node* projection = Node::add_from_function(unit, function);
assert(projection != this);
projection->id = Node::Id::PROJECTION;
projection->data_type = data.type;
projection->reallocate_edges(unit, 4);
projection->input_count = 1;
projection->set_input(unit, function, this, 0);
projection->projection.index = data.index;
return projection;
}
void set_input(Unit* unit, Function* function, Node* input, u16 slot)
{
assert(slot < input_count);
remove_output(unit, function, slot);
*get_input_slot(slot) = input;
if (input)
{
add_output(unit, function, input);
}
}
Node** get_output_slot(u16 slot)
{
assert(slot < output_count);
return edges + input_count + slot;
}
Node** get_input_slot(u16 slot)
{
assert(slot < input_count);
return edges + slot;
}
void add_output(Unit* unit, Function* function, Node* input)
{
unused(unit);
unused(function);
if (input->output_count + input->input_count >= input->capacity)
{
trap();
}
auto index = input->output_count;
input->output_count += 1;
*input->get_output_slot(index) = this;
}
void remove_output(Unit* unit, Function* function, u16 slot)
{
unused(unit);
unused(function);
if (slot < output_count)
{
Node** output_slot = get_output_slot(slot);
if (*output_slot)
{
trap();
}
}
}
void reallocate_edges(Unit* unit, u16 new_capacity)
{
auto old_capacity = capacity;
assert(new_capacity > old_capacity);
auto length = output_count + input_count;
Node** new_edges = unit->arena->allocate_many<Node*>(new_capacity);
memcpy(new_edges, edges, length * sizeof(Node*));
memset(new_edges + length, 0, (new_capacity - length) * sizeof(Node));
capacity = new_capacity;
edges = new_edges;
// TODO: free
}
};
fn u64 round_up_to_next_power_of_2(u64 n)
{
n -= 1;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
n |= n >> 32;
n += 1;
return n;
}
fn Hash intern_identifier(Unit* unit, String identifier)
{
Hash hash = hash_bytes(identifier);
(void)unit->identifiers.get_or_put(hash, identifier);
return hash;
}
global String integer_names[] =
{
strlit("u1"),
strlit("u2"),
strlit("u3"),
strlit("u4"),
strlit("u5"),
strlit("u6"),
strlit("u7"),
strlit("u8"),
strlit("u9"),
strlit("u10"),
strlit("u11"),
strlit("u12"),
strlit("u13"),
strlit("u14"),
strlit("u15"),
strlit("u16"),
strlit("u17"),
strlit("u18"),
strlit("u19"),
strlit("u20"),
strlit("u21"),
strlit("u22"),
strlit("u23"),
strlit("u24"),
strlit("u25"),
strlit("u26"),
strlit("u27"),
strlit("u28"),
strlit("u29"),
strlit("u30"),
strlit("u31"),
strlit("u32"),
strlit("u33"),
strlit("u34"),
strlit("u35"),
strlit("u36"),
strlit("u37"),
strlit("u38"),
strlit("u39"),
strlit("u40"),
strlit("u41"),
strlit("u42"),
strlit("u43"),
strlit("u44"),
strlit("u45"),
strlit("u46"),
strlit("u47"),
strlit("u48"),
strlit("u49"),
strlit("u50"),
strlit("u51"),
strlit("u52"),
strlit("u53"),
strlit("u54"),
strlit("u55"),
strlit("u56"),
strlit("u57"),
strlit("u58"),
strlit("u59"),
strlit("u60"),
strlit("u61"),
strlit("u62"),
strlit("u63"),
strlit("u64"),
strlit("s1"),
strlit("s2"),
strlit("s3"),
strlit("s4"),
strlit("s5"),
strlit("s6"),
strlit("s7"),
strlit("s8"),
strlit("s9"),
strlit("s10"),
strlit("s11"),
strlit("s12"),
strlit("s13"),
strlit("s14"),
strlit("s15"),
strlit("s16"),
strlit("s17"),
strlit("s18"),
strlit("s19"),
strlit("s20"),
strlit("s21"),
strlit("s22"),
strlit("s23"),
strlit("s24"),
strlit("s25"),
strlit("s26"),
strlit("s27"),
strlit("s28"),
strlit("s29"),
strlit("s30"),
strlit("s31"),
strlit("s32"),
strlit("s33"),
strlit("s34"),
strlit("s35"),
strlit("s36"),
strlit("s37"),
strlit("s38"),
strlit("s39"),
strlit("s40"),
strlit("s41"),
strlit("s42"),
strlit("s43"),
strlit("s44"),
strlit("s45"),
strlit("s46"),
strlit("s47"),
strlit("s48"),
strlit("s49"),
strlit("s50"),
strlit("s51"),
strlit("s52"),
strlit("s53"),
strlit("s54"),
strlit("s55"),
strlit("s56"),
strlit("s57"),
strlit("s58"),
strlit("s59"),
strlit("s60"),
strlit("s61"),
strlit("s62"),
strlit("s63"),
strlit("s64"),
};
fn void unit_initialize(Unit* unit)
{
Arena* type_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64));
SemaType* builtin_types = type_arena->allocate_many<SemaType>(builtin_type_count);
*unit = {
.arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4)),
.node_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64)),
.type_arena = type_arena,
.builtin_types = builtin_types,
};
builtin_types[void_type_index] = {
.size = 0,
.alignment = 1,
.id = SemaTypeId::VOID,
.resolved = 1,
.name = intern_identifier(unit, strlit("void")),
};
builtin_types[noreturn_type_index] = {
.size = 0,
.alignment = 1,
.id = SemaTypeId::NORETURN,
.resolved = 1,
.name = intern_identifier(unit, strlit("noreturn")),
};
builtin_types[opaque_pointer_type_index] = {
.size = 8,
.alignment = 8,
.id = SemaTypeId::POINTER,
.resolved = 1,
.name = intern_identifier(unit, strlit("*any")),
};
// TODO: float types
u64 i;
for (i = integer_type_offset; i < integer_type_offset + 64; i += 1)
{
u64 bit_count = i - integer_type_offset + 1;
assert(bit_count >= 1 | bit_count <= 64);
auto aligned_bit_count = round_up_to_next_power_of_2(bit_count);
auto byte_count = max<u64>(aligned_bit_count / 8, 1);
assert(byte_count <= bit_count);
assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8);
builtin_types[i] =
{
.size = byte_count,
.alignment = byte_count,
.id = SemaTypeId::INTEGER,
.resolved = 1,
.flags = static_cast<u32>(bit_count),
.name = intern_identifier(unit, integer_names[bit_count - 1]),
};
}
for (; i < integer_type_offset + integer_type_count; i += 1)
{
u64 bit_count = i - (integer_type_offset + 64 - 1);
assert(bit_count >= 1 | bit_count <= 64);
auto aligned_bit_count = round_up_to_next_power_of_2(bit_count);
auto byte_count = max<u64>(aligned_bit_count / 8, 1);
assert(byte_count <= bit_count);
assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8);
builtin_types[i] =
{
.size = byte_count,
.alignment = byte_count,
.id = SemaTypeId::INTEGER,
.resolved = 1,
.flags = static_cast<u32>(bit_count | (1 << (type_flags_bit_count - 1))), // Signedness bit
.name = intern_identifier(unit, integer_names[bit_count + 63]),
};
}
}
static_assert(array_length(integer_names) == 128, "Integer name array must be 128 bytes");
struct Instance
{
Arena* arena;
};
typedef struct Instance Instance;
fn Unit* instance_add_unit(Instance* instance)
{
Unit* unit = instance->arena->allocate_one<Unit>();
*unit = {
};
return unit;
}
struct Parser
{
u64 i;
u32 line;
u32 column;
};
typedef struct Parser Parser;
fn u64 safe_flag(u64 value, u64 flag)
{
u64 result = value & ((u64)0 - flag);
return result;
}
fn u8 get_next_ch_safe(String file, u64 index)
{
u64 next_index = index + 1;
u64 is_in_range = next_index < file.length;
u64 safe_index = safe_flag(next_index, is_in_range);
u8 unsafe_result = file.pointer[safe_index];
u64 safe_result = safe_flag(unsafe_result, is_in_range);
assert(safe_result < 256);
return (u8)safe_result;
}
fn u32 is_space(u8 ch, u8 next_ch)
{
u32 is_comment = (ch == '/') & (next_ch == '/');
u32 is_whitespace = ch == ' ';
u32 is_vertical_tab = ch == 0x0b;
u32 is_horizontal_tab = ch == '\t';
u32 is_line_feed = ch == '\n';
u32 is_carry_return = ch == '\r';
u32 result = (((is_vertical_tab | is_horizontal_tab) | (is_line_feed | is_carry_return)) | (is_comment | is_whitespace));
return result;
}
fn void skip_space(Parser* parser, String src)
{
u64 original_i = parser->i;
if (original_i != src.length)
{
if (is_space(src.pointer[original_i], get_next_ch_safe(src, original_i)))
{
while (parser->i < src.length)
{
u64 index = parser->i;
u8 ch = src.pointer[index];
u64 new_line = ch == '\n';
parser->line += new_line;
if (new_line)
{
parser->column = index + 1;
}
if (!is_space(ch, get_next_ch_safe(src, parser->i)))
{
break;
}
u32 is_comment = src.pointer[index] == '/';
parser->i += is_comment + is_comment;
if (is_comment)
{
while (parser->i < src.length)
{
if (src.pointer[parser->i] == '\n')
{
break;
}
parser->i += 1;
}
continue;
}
parser->i += 1;
}
}
}
}
fn u64 is_lower(u8 ch)
{
return (ch >= 'a') & (ch <= 'z');
}
fn u64 is_upper(u8 ch)
{
return (ch >= 'A') & (ch <= 'Z');
}
fn u64 is_alphabetic(u8 ch)
{
return is_lower(ch) | is_upper(ch);
}
fn u64 is_decimal_digit(u8 ch)
{
return (ch >= '0') & (ch <= '9');
}
fn u64 is_hex_digit(u8 ch)
{
return (is_decimal_digit(ch) | ((ch == 'a' | ch == 'A') | (ch == 'b' | ch == 'B'))) | (((ch == 'c' | ch == 'C') | (ch == 'd' | ch == 'D')) | ((ch == 'e' | ch == 'E') | (ch == 'f' | ch == 'F')));
}
fn u64 is_identifier_start(u8 ch)
{
u64 alphabetic = is_alphabetic(ch);
u64 is_underscore = ch == '_';
return alphabetic | is_underscore;
}
fn u64 is_identifier_ch(u8 ch)
{
u64 identifier_start = is_identifier_start(ch);
u64 decimal = is_decimal_digit(ch);
return identifier_start | decimal;
}
fn void expect_character(Parser* parser, String src, u8 expected_ch)
{
u64 index = parser->i;
if (expect(index < src.length, 1))
{
u8 ch = src.pointer[index];
u64 matches = ch == expected_ch;
expect(matches, 1);
parser->i += matches;
if (!matches)
{
print(strlit("expected character '"));
print(ch_to_str(expected_ch));
print(strlit("', but found '"));
print(ch_to_str(ch));
print(strlit("'\n"));
fail();
}
}
else
{
print(strlit("expected character '"));
print(ch_to_str(expected_ch));
print(strlit("', but found end of file\n"));
fail();
}
}
fn String parse_identifier(Parser* parser, String src)
{
u64 identifier_start_index = parser->i;
u64 is_string_literal = src.pointer[identifier_start_index] == '"';
parser->i += is_string_literal;
u8 identifier_start_ch = src.pointer[parser->i];
u64 is_valid_identifier_start = is_identifier_start(identifier_start_ch);
parser->i += is_valid_identifier_start;
if (expect(is_valid_identifier_start, 1))
{
while (parser->i < src.length)
{
u8 ch = src.pointer[parser->i];
u64 is_identifier = is_identifier_ch(ch);
expect(is_identifier, 1);
parser->i += is_identifier;
if (!is_identifier)
{
if (expect(is_string_literal, 0))
{
expect_character(parser, src, '"');
}
String result = src.slice(identifier_start_index, parser->i - is_string_literal);
return result;
}
}
fail();
}
else
{
fail();
}
}
typedef enum Keyword : u32
{
KEYWORD_COUNT,
KEYWORD_INVALID = ~0u,
} Keyword;
// TODO:
// fn Keyword parse_keyword(String identifier)
// {
// Keyword result = KEYWORD_INVALID;
// return result;
// }
fn Hash parse_and_intern_identifier(Parser* parser, Unit* unit, String src)
{
String identifier = parse_identifier(parser, src);
// Keyword keyword_index = parse_keyword(identifier);
// if (expect(keyword_index != KEYWORD_INVALID, 0))
// {
// fail();
// }
if (expect(identifier.equal(strlit("_")), 0))
{
return 0;
}
Hash result = intern_identifier(unit, identifier);
return result;
}
// fn u32 get_line(Parser* parser)
// {
// return parser->line + 1;
// }
//
// fn u32 get_column(Parser* parser)
// {
// return parser->i - parser->column + 1;
// }
fn File* unit_add_file(Unit* unit, String file_path)
{
auto* file = unit->files.add_one();
*file = {
.path = file_path,
};
return file;
}
fn void unit_file_read(Unit* unit, File* file)
{
assert(file->status == FILE_STATUS_ADDED || file->status == FILE_STATUS_QUEUED);
file->source_code = file_read(unit->arena, file->path);
file->status = FILE_STATUS_READ;
}
global constexpr auto brace_open = '{';
global constexpr auto brace_close = '}';
global constexpr auto parenthesis_open = '(';
global constexpr auto parenthesis_close = ')';
global constexpr auto bracket_open = '[';
global constexpr auto bracket_close = ']';
global constexpr auto pointer_sign = '*';
global constexpr auto end_of_statement = ';';
global constexpr auto end_of_argument = ',';
global constexpr auto function_argument_start = parenthesis_open;
global constexpr auto function_argument_end = parenthesis_close;
global constexpr auto function_attribute_start = bracket_open;
global constexpr auto function_attribute_end = bracket_close;
global constexpr auto symbol_attribute_start = bracket_open;
global constexpr auto symbol_attribute_end = bracket_close;
global constexpr auto block_start = brace_open;
global constexpr auto block_end = brace_close;
global constexpr auto array_expression_start = bracket_open;
// global constexpr auto array_expression_end = bracket_close;
global constexpr auto composite_initialization_start = brace_open;
// global constexpr auto composite_initialization_end = brace_close;
global String function_attributes[] =
{
strlit("cc"),
};
typedef enum FunctionAttribute
{
FUNCTION_ATTRIBUTE_CC,
FUNCTION_ATTRIBUTE_COUNT,
} FunctionAttribute;
static_assert(array_length(function_attributes) == FUNCTION_ATTRIBUTE_COUNT, "");
global String calling_conventions[] =
{
strlit("c"),
strlit("custom"),
};
typedef enum CallingConvention
{
CALLING_CONVENTION_C,
CALLING_CONVENTION_CUSTOM,
CALLING_CONVENTION_COUNT,
} CallingConvention;
static_assert(array_length(calling_conventions) == CALLING_CONVENTION_COUNT, "");
typedef enum GlobalSymbolAttribute
{
GLOBAL_SYMBOL_ATTRIBUTE_EXPORT,
GLOBAL_SYMBOL_ATTRIBUTE_EXTERN,
GLOBAL_SYMBOL_ATTRIBUTE_COUNT,
} GlobalSymbolAttribute;
global String global_symbol_attributes[] =
{
strlit("export"),
strlit("extern"),
};
struct GlobalSymbolAttributes
{
u8 exported: 1;
u8 external: 1;
};
typedef struct GlobalSymbolAttributes GlobalSymbolAttributes;
static_assert(array_length(global_symbol_attributes) == GLOBAL_SYMBOL_ATTRIBUTE_COUNT, "");
struct Analyzer
{
Function* function;
};
fn SemaType* analyze_type(Parser* parser, Unit* unit, String src)
{
u64 start_index = parser->i;
u8 start_ch = src.pointer[start_index];
u32 array_start = start_ch == array_expression_start;
u32 u_start = start_ch == 'u';
u32 s_start = start_ch == 's';
u32 float_start = start_ch == 'f';
u32 void_start = start_ch == 'v';
u32 pointer_start = start_ch == pointer_sign;
u32 integer_start = u_start | s_start;
u32 number_start = integer_start | float_start;
if (void_start)
{
trap();
}
else if (array_start)
{
trap();
}
else if (pointer_start)
{
trap();
}
else if (number_start)
{
u64 expected_digit_start = start_index + 1;
u64 i = expected_digit_start;
u32 decimal_digit_count = 0;
u64 top = i + 5;
while (i < top)
{
u8 ch = src.pointer[i];
u32 is_digit = is_decimal_digit(ch);
decimal_digit_count += is_digit;
if (!is_digit)
{
u32 is_alpha = is_alphabetic(ch);
if (is_alpha)
{
decimal_digit_count = 0;
}
break;
}
i += 1;
}
if (decimal_digit_count)
{
parser->i += 1;
if (integer_start)
{
u64 signedness = s_start;
u64 bit_size;
u64 current_i = parser->i;
assert(src.pointer[current_i] >= '0' & src.pointer[current_i] <= '9');
switch (decimal_digit_count) {
case 0:
fail();
case 1:
bit_size = src.pointer[current_i] - '0';
break;
case 2:
bit_size = (src.pointer[current_i] - '0') * 10 + (src.pointer[current_i + 1] - '0');
break;
default:
fail();
}
parser->i += decimal_digit_count;
assert(!is_decimal_digit(src.pointer[parser->i]));
if (bit_size)
{
auto* result = unit->get_integer_type(bit_size, signedness);
return result;
}
else
{
fail();
}
}
else if (float_start)
{
trap();
}
else
{
trap();
}
}
else
{
fail();
}
}
trap();
}
fn u64 parse_hex(String string)
{
u64 value = 0;
for (u8 ch : string)
{
u8 byte;
auto is_decimal = (ch >= '0') & (ch <= '9');
auto is_lower_hex = (ch >= 'a') & (ch <= 'f');
auto is_upper_hex = (ch >= 'A') & (ch <= 'F');
if (is_decimal)
{
byte = ch - '0';
}
else if (is_lower_hex)
{
byte = ch - 'a' + 10;
}
else if (is_upper_hex)
{
byte = ch - 'A' + 10;
}
else
{
fail();
}
value = (value << 4) | (byte & 0x0f);
}
return value;
}
[[nodiscard]] fn Node* parse_constant_integer(Parser* parser, Unit* unit, String src, SemaType* type)
{
auto starting_ch = src[parser->i];
if (starting_ch == '0')
{
auto follow_up_character = src[parser->i + 1];
auto is_hex_start = follow_up_character == 'x';
auto is_octal_start = follow_up_character == 'o';
auto is_bin_start = follow_up_character == 'b';
auto is_prefixed_start = is_hex_start | is_octal_start | is_bin_start;
auto follow_up_alpha = is_alphabetic(follow_up_character);
auto follow_up_digit = is_decimal_digit(follow_up_character);
auto is_valid_after_zero = is_space(follow_up_character, get_next_ch_safe(src, follow_up_character)) | (!follow_up_digit and !follow_up_alpha);
if (is_prefixed_start) {
enum class IntegerPrefix {
hexadecimal,
octal,
binary,
};
IntegerPrefix prefix;
switch (follow_up_character) {
case 'x': prefix = IntegerPrefix::hexadecimal; break;
case 'o': prefix = IntegerPrefix::octal; break;
case 'b': prefix = IntegerPrefix::binary; break;
default: fail();
};
parser->i += 2;
auto start = parser->i;
switch (prefix) {
case IntegerPrefix::hexadecimal:
{
while (is_hex_digit(src[parser->i])) {
parser->i += 1;
}
auto slice = src.slice(start, parser->i);
auto number = parse_hex(slice);
auto* constant_int = Node::add(unit);
constant_int->id = Node::Id::CONSTANT_INT;
constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() };
constant_int->constant_int = number;
// TODO: is this fine?
constant_int->reallocate_edges(unit, 1);
return constant_int;
}
case IntegerPrefix::octal:
trap();
case IntegerPrefix::binary:
trap();
}
} else if (is_valid_after_zero) {
parser->i += 1;
auto* constant_int = Node::add(unit);
constant_int->id = Node::Id::CONSTANT_INT;
constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() };
constant_int->reallocate_edges(unit, 1);
constant_int->constant_int = 0;
return constant_int;
} else {
fail();
}
trap();
}
else
{
trap();
}
}
[[nodiscard]] fn Node* analyze_single_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side)
{
unused(side);
enum class Unary
{
NONE,
ONE_COMPLEMENT,
NEGATION,
};
auto unary_operation = Unary::NONE;
auto original_starting_ch_index = parser->i;
u8 original_starting_ch = src[original_starting_ch_index];
switch (src[parser->i])
{
case '\'':
trap();
case '"':
trap();
case '-':
trap();
case '~':
trap();
case '#':
trap();
case composite_initialization_start:
trap();
case array_expression_start:
trap();
default:
assert(is_decimal_digit(original_starting_ch) | is_identifier_start(original_starting_ch));
break;
}
auto starting_ch_index = parser->i;
u8 starting_ch = src[starting_ch_index];
auto is_digit = is_decimal_digit(starting_ch);
auto is_identifier = is_identifier_start(starting_ch);
// auto line = get_line(parser);
// auto column = get_column(parser);
if (is_digit)
{
SemaType* integer_type;
if (type)
{
integer_type = type;
}
else
{
switch (unary_operation)
{
case Unary::NONE:
integer_type = unit->get_integer_type(64, 0);
break;
case Unary::ONE_COMPLEMENT:
fail();
case Unary::NEGATION:
fail();
}
}
if (integer_type->id != SemaTypeId::INTEGER)
{
fail();
}
Node* constant_int = parse_constant_integer(parser, unit, src, integer_type);
constant_int->gvn = analyzer->function->node_count;
analyzer->function->node_count += 1;
return constant_int;
}
else if (is_identifier)
{
trap();
}
else
{
fail();
}
}
[[nodiscard]] fn Node* analyze_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side)
{
enum class CurrentOperation
{
NONE,
};
u64 iterations = 0;
SemaType* iteration_type = type;
auto current_operation = CurrentOperation::NONE;
Node* previous_node = 0;
while (1)
{
if ((iterations == 0) & !iteration_type)
{
trap();
}
// u32 line = get_line(parser);
// u32 column = get_column(parser);
Node* current_node;
if (src[parser->i] == '(')
{
trap();
}
else
{
current_node = analyze_single_expression(analyzer, parser, unit, src, iteration_type, side);
}
skip_space(parser, src);
switch (current_operation)
{
case CurrentOperation::NONE:
previous_node = current_node;
break;
}
auto original_index = parser->i;
u8 original = src[original_index];
switch (original)
{
case end_of_statement:
case end_of_argument:
case parenthesis_close:
case bracket_close:
return previous_node;
default:
trap();
}
iterations += 1;
}
}
fn void analyze_local_block(Analyzer* analyzer, Parser* parser, Unit* unit, String src)
{
expect_character(parser, src, block_start);
while (1)
{
skip_space(parser, src);
if (src[parser->i] == block_end)
{
break;
}
auto statement_start_index = parser->i;
u8 statement_start_ch = src[statement_start_index];
if (is_identifier_start(statement_start_ch))
{
String identifier = parse_identifier(parser, src);
if (identifier.equal(strlit("return")))
{
skip_space(parser, src);
auto* return_value = analyze_expression(analyzer, parser, unit, src, analyzer->function->prototype.original_return_type, Side::right);
expect_character(parser, src, ';');
Function* function = analyzer->function;
Node* ret_node = Node::add_from_function(unit, function);
ret_node->id = Node::Id::RETURN;
ret_node->data_type = { .id = NodeDataType::Id::CONTROL };
ret_node->reallocate_edges(unit, 4);
ret_node->input_count = 2;
ret_node->set_input(unit, function, function->root_node, 0);
ret_node->set_input(unit, function, return_value, 1);
}
else
{
trap();
}
}
else
{
trap();
}
}
expect_character(parser, src, block_end);
}
typedef enum SystemVClass
{
SYSTEMV_CLASS_NONE,
SYSTEMV_CLASS_MEMORY,
SYSTEMV_CLASS_INTEGER,
SYSTEMV_CLASS_SSE,
SYSTEMV_CLASS_SSEUP,
} SystemVClass;
struct SystemVClassification
{
SystemVClass v[2];
};
typedef struct SystemVClassification SystemVClassification;
struct SystemVRegisterCount
{
u32 gp_registers;
u32 sse_registers;
};
typedef struct SystemVRegisterCount SystemVRegisterCount;
fn SystemVClassification systemv_classify(SemaType* type, u64 base_offset)
{
SystemVClassification result;
u32 is_memory = base_offset >= 8;
u32 current_index = is_memory;
result.v[current_index] = SYSTEMV_CLASS_MEMORY;
result.v[!current_index] = SYSTEMV_CLASS_NONE;
switch (type->id)
{
case SemaTypeId::VOID:
trap();
case SemaTypeId::NORETURN:
trap();
case SemaTypeId::POINTER:
trap();
case SemaTypeId::INTEGER:
{
u8 bit_count = type->get_bit_count();
switch (bit_count)
{
case 8: case 16: case 32: case 64:
result.v[current_index] = SYSTEMV_CLASS_INTEGER;
break;
default:
trap();
}
} break;
case SemaTypeId::COUNT:
trap();
default:
trap();
}
return result;
}
fn u8 contains_no_user_data(SemaType* type, u64 start, u64 end)
{
unused(end);
if (type->size <= start)
{
return 1;
}
switch (type->id)
{
case SemaTypeId::ARRAY:
trap();
case SemaTypeId::STRUCT:
trap();
case SemaTypeId::UNION:
trap();
default:
return 0;
case SemaTypeId::COUNT:
trap();
}
}
fn SemaType* systemv_get_int_type_at_offset(SemaType* type, u64 offset, SemaType* source_type, u64 source_offset)
{
unused(source_type);
switch (type->id)
{
case SemaTypeId::VOID:
trap();
case SemaTypeId::NORETURN:
trap();
case SemaTypeId::POINTER:
trap();
case SemaTypeId::INTEGER:
{
u8 bit_count = type->get_bit_count();
switch (bit_count)
{
case 8: case 16: case 32: case 64:
if (offset == 0)
{
u64 start = source_offset + type->size;
u64 end = source_offset + 8;
if (contains_no_user_data(type, start, end))
{
return type;
}
trap();
}
else
{
trap();
}
default:
trap();
}
trap();
} break;
case SemaTypeId::COUNT:
trap();
case SemaTypeId::ARRAY:
trap();
case SemaTypeId::STRUCT:
trap();
case SemaTypeId::UNION:
trap();
}
}
fn void analyze_function(Parser* parser, Unit* unit, String src)
{
expect_character(parser, src, 'f');
expect_character(parser, src, 'n');
skip_space(parser, src);
u64 has_function_attributes = src.pointer[parser->i] == function_attribute_start;
parser->i += has_function_attributes;
CallingConvention calling_convention = CALLING_CONVENTION_CUSTOM;
if (has_function_attributes)
{
u64 mask = 0;
while (1)
{
skip_space(parser, src);
if (src.pointer[parser->i] == function_attribute_end)
{
break;
}
String attribute_candidate = parse_identifier(parser, src);
u64 attribute_i;
for (attribute_i = 0; attribute_i < array_length(function_attributes); attribute_i += 1)
{
String function_attribute_string = function_attributes[attribute_i];
if (attribute_candidate.equal(function_attribute_string))
{
if (mask & (1 << attribute_i))
{
fail();
}
auto function_attribute = static_cast<FunctionAttribute>(attribute_i);
mask |= (1 << attribute_i);
switch (function_attribute)
{
case FUNCTION_ATTRIBUTE_CC:
{
skip_space(parser, src);
expect_character(parser, src, '(');
skip_space(parser, src);
expect_character(parser, src, '.');
String candidate_cc = parse_identifier(parser, src);
skip_space(parser, src);
expect_character(parser, src, ')');
u64 cc_i;
for (cc_i = 0; cc_i < array_length(calling_conventions); cc_i += 1)
{
String calling_convention_string = calling_conventions[cc_i];
if (calling_convention_string.equal(candidate_cc))
{
calling_convention = static_cast<CallingConvention>(cc_i);
break;
}
}
if (cc_i == array_length(calling_conventions))
{
fail();
}
} break;
default:
trap();
}
break;
}
}
if (attribute_i == array_length(function_attributes))
{
fail();
}
skip_space(parser, src);
u8 after_ch = src.pointer[parser->i];
switch (after_ch)
{
case function_attribute_end: break;
default: fail();
}
}
expect_character(parser, src, function_attribute_end);
skip_space(parser, src);
}
Hash name_hash = parse_and_intern_identifier(parser, unit, src);
skip_space(parser, src);
u64 has_global_attributes = src.pointer[parser->i] == symbol_attribute_start;
parser->i += has_global_attributes;
GlobalSymbolAttributes symbol_attributes = {};
if (has_global_attributes)
{
u64 mask = 0;
while (1)
{
skip_space(parser, src);
if (src.pointer[parser->i] == symbol_attribute_end)
{
break;
}
String candidate_attribute = parse_identifier(parser, src);
skip_space(parser, src);
switch (src.pointer[parser->i])
{
case symbol_attribute_end:
break;
case ',':
parser->i += 1;
break;
default:
fail();
}
u64 attribute_i;
for (attribute_i = 0; attribute_i < array_length(global_symbol_attributes); attribute_i += 1)
{
String attribute_string = global_symbol_attributes[attribute_i];
if (attribute_string.equal(candidate_attribute))
{
if (mask & (1 << attribute_i))
{
fail();
}
mask |= 1 << attribute_i;
auto attribute = static_cast<GlobalSymbolAttribute>(attribute_i);
switch (attribute)
{
case GLOBAL_SYMBOL_ATTRIBUTE_EXPORT:
symbol_attributes.exported = 1;
break;
case GLOBAL_SYMBOL_ATTRIBUTE_EXTERN:
symbol_attributes.external = 1;
break;
default:
trap();
}
break;
}
}
if (attribute_i == array_length(global_symbol_attributes))
{
fail();
}
}
expect_character(parser, src, symbol_attribute_end);
skip_space(parser, src);
}
if (symbol_attributes.exported & symbol_attributes.external)
{
fail();
}
expect_character(parser, src, function_argument_start);
while (1)
{
skip_space(parser, src);
if (src.pointer[parser->i] == function_argument_end)
{
break;
}
// TODO: function arguments in function definition
trap();
}
expect_character(parser, src, function_argument_end);
skip_space(parser, src);
PinnedArray<SemaType*> original_argument_types = {};
SemaType* original_return_type = analyze_type(parser, unit, src);
skip_space(parser, src);
switch (calling_convention)
{
case CALLING_CONVENTION_C:
{
// First process the return type ABI
AbiInfo return_type_abi = {};
{
SystemVClassification return_type_classes = systemv_classify(original_return_type, 0);
assert(return_type_classes.v[1] != SYSTEMV_CLASS_MEMORY | return_type_classes.v[0] == SYSTEMV_CLASS_MEMORY);
assert(return_type_classes.v[1] != SYSTEMV_CLASS_SSEUP | return_type_classes.v[0] == SYSTEMV_CLASS_SSE);
SemaType* low_part = 0;
switch (return_type_classes.v[0])
{
case SYSTEMV_CLASS_INTEGER:
{
SemaType* result_type = systemv_get_int_type_at_offset(original_return_type, 0, original_return_type, 0);
if (return_type_classes.v[1] == SYSTEMV_CLASS_NONE & original_return_type->get_bit_count() < 32)
{
trap();
}
low_part = result_type;
} break;
default:
trap();
}
assert(low_part);
SemaType* high_part = 0;
switch (return_type_classes.v[1])
{
case SYSTEMV_CLASS_NONE:
break;
case SYSTEMV_CLASS_MEMORY:
trap();
case SYSTEMV_CLASS_INTEGER:
trap();
case SYSTEMV_CLASS_SSE:
trap();
case SYSTEMV_CLASS_SSEUP:
trap();
}
if (high_part)
{
trap();
}
else
{
// TODO:
u8 is_type = 1;
if (is_type)
{
if (low_part == original_return_type)
{
return_type_abi =
{
.kind = ABI_INFO_DIRECT,
};
}
else
{
trap();
}
}
else
{
trap();
}
}
}
// Now process the ABI for argument types
PinnedArray<AbiInfo> argument_type_abis = {};
// u32 abi_argument_type_count = 0;
{
SystemVRegisterCount available_registers = {
.gp_registers = 6,
.sse_registers = 8,
};
available_registers.gp_registers -= return_type_abi.kind == ABI_INFO_INDIRECT;
// TODO: return by reference
u8 return_by_reference = 0;
if (return_by_reference)
{
trap();
}
for (u32 original_argument_index = 0; original_argument_index < original_argument_types.length; original_argument_index += 1)
{
trap();
}
}
switch (return_type_abi.kind)
{
case ABI_INFO_IGNORE: case ABI_INFO_DIRECT:
break;
case ABI_INFO_DIRECT_PAIR:
trap();
case ABI_INFO_DIRECT_COERCE:
trap();
case ABI_INFO_DIRECT_COERCE_INT:
trap();
case ABI_INFO_DIRECT_SPLIT_STRUCT_I32:
trap();
case ABI_INFO_EXPAND_COERCE:
trap();
case ABI_INFO_INDIRECT:
trap();
case ABI_INFO_EXPAND:
trap();
}
// assert(abi_argument_type_count == 0);
// TODO: reserve memory for them
// Slice<Node::DataType> abi_argument_types = {};
for (u32 i = 0; i < argument_type_abis.length; i += 1)
{
trap();
}
auto* function = unit->functions.add_one();
*function = {
.symbol = {
.name = name_hash,
.id = Symbol::Id::function,
.linkage = symbol_attributes.external ? Symbol::Linkage::external : Symbol::Linkage::internal,
},
.root_node = 0,
.parameters = unit->arena->allocate_many<Node*>(argument_type_abis.length),
.prototype = {
.argument_type_abis = argument_type_abis.pointer,
.original_argument_types = original_argument_types.pointer,
.original_return_type = original_return_type,
.return_type_abi = return_type_abi,
.original_argument_count = original_argument_types.length,
.varags = 0,
},
.node_count = 0,
.parameter_count = (u16)argument_type_abis.length,
};
Node* root_node = Node::add_from_function(unit, function);
root_node->id = Node::Id::ROOT;
root_node->data_type = {
.id = NodeDataType::Id::TUPLE,
};
root_node->reallocate_edges(unit, 4);
// TODO: revisit
// auto* control_node = root_node->project(unit, function, {
// .type = { .id = NodeDataType::Id::CONTROL },
// });
// auto* memory_node = root_node->project(unit, function, {});
// auto* pointer_node = root_node->project(unit, function, {});
// function->parameters[0] = control_node;
// function->parameters[1] = memory_node;
// function->parameters[2] = pointer_node;
for (u32 argument_i = 0; argument_i < argument_type_abis.length; argument_i += 1)
{
trap();
}
// TODO: callgraph
// TODO: revisit
// Node* ret_node = Node::add_from_function(unit, function);
// ret_node->id = Node::Id::RETURN;
// ret_node->data_type = { .id = NodeDataType::Id::CONTROL };
// ret_node->reallocate_edges(unit, 4);
// ret_node->input_count = 2;
// ret_node->set_input(unit, function, root_node, 0);
switch (symbol_attributes.external)
{
case 0:
{
Analyzer analyzer = {};
analyzer.function = function;
analyze_local_block(&analyzer, parser, unit, src);
} break;
case 1:
trap();
}
} break;
case CALLING_CONVENTION_CUSTOM:
trap();
break;
case CALLING_CONVENTION_COUNT:
trap();
break;
}
}
fn void unit_file_analyze(Unit* unit, File* file)
{
unit_file_read(unit, file);
Parser parser = {};
String src = file->source_code;
while (1)
{
skip_space(&parser, src);
if (parser.i >= src.length)
{
break;
}
// u32 line = get_line(&parser);
// u32 column = get_column(&parser);
u64 declaration_start_index = parser.i;
u8 declaration_start_ch = src.pointer[declaration_start_index];
switch (declaration_start_ch)
{
case '>':
trap();
break;
case 'f':
if (get_next_ch_safe(src, declaration_start_index) == 'n')
{
analyze_function(&parser, unit, src);
}
else
{
fail();
}
break;
default:
fail();
}
}
}
global Instance instance;
extern "C" void entry_point()
{
instance.arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4));
Unit* unit = instance_add_unit(&instance);
File* file = unit_add_file(unit, strlit("tests/first/main.nat"));
unit_initialize(unit);
unit_file_analyze(unit, file);
}