From e07d65cd54581ec280a9c9359012ba938f115c51 Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Thu, 20 Jun 2024 21:41:20 +0200 Subject: [PATCH] First iteration --- .gitignore | 1 + bootstrap/entry.S | 15 + bootstrap/main.cpp | 2933 ++++++++++++++++++++++++++++++++++++++++++ run.sh | 23 + tests/first/main.nat | 4 + 5 files changed, 2976 insertions(+) create mode 100644 .gitignore create mode 100644 bootstrap/entry.S create mode 100644 bootstrap/main.cpp create mode 100755 run.sh create mode 100644 tests/first/main.nat diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/bootstrap/entry.S b/bootstrap/entry.S new file mode 100644 index 0000000..b6a70df --- /dev/null +++ b/bootstrap/entry.S @@ -0,0 +1,15 @@ +.section .text +.extern entry_point +.global _start +_start: + xor %ebp, %ebp + popq %rdi + mov %rsp, %rsi + and $~0xf, %rsp + pushq %rsp + pushq $0 + call entry_point + mov $231, %eax + xor %edi, %edi + syscall + diff --git a/bootstrap/main.cpp b/bootstrap/main.cpp new file mode 100644 index 0000000..9ecdbff --- /dev/null +++ b/bootstrap/main.cpp @@ -0,0 +1,2933 @@ +#include +#include +#include +#include +#include +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +typedef float f32; +typedef double f64; + +typedef u32 Hash; + +#define fn static +#define global static +#define assert(x) if (__builtin_expect(!(x), 0)) { trap(); } +#define forceinline __attribute__((always_inline)) +#define expect(x, b) __builtin_expect(x, b) +#define trap() __builtin_trap() +#define array_length(arr) sizeof(arr) / sizeof((arr)[0]) +#define page_size (0x1000) +#define unused(x) (void)(x) +#define KB(n) ((n) * 1024) +#define MB(n) ((n) * 1024 * 1024) +#define GB(n) ((u64)(n) * 1024 * 1024 * 1024) +#define TB(n) ((u64)(n) * 1024 * 1024 * 1024 * 1024) + +template +struct DynamicList +{ + T* pointer; + u64 count; + DynamicList* next; +}; + +template +struct StaticList +{ + u64 length; + StaticList* next; + T array[count]; +}; + +extern "C" void* memcpy(void* __restrict dst, void* __restrict src, u64 size) +{ + auto* destination = (u8*)dst; + auto* source = (u8*)src; + + for (u64 i = 0; i < size; i += 1) + { + destination[i] = source[i]; + } + + return dst; +} + +extern "C" void* memset(void* dst, u8 n, u64 size) +{ + auto* destination = (u8*)dst; + for (u64 i = 0; i < size; i += 1) + { + destination[i] = n; + } + + return dst; +} + +extern "C" int memcmp(const void* left, const void* right, u64 n) +{ + const u8 *l=(const u8*)left, *r=(const u8*)right; + for (; n && *l == *r; n--, l++, r++); + return n ? *l - *r : 0; +} + +template +forceinline fn u8 mem_equal_range(T* a, T* b, u64 count) +{ + return memcmp(a, b, count * sizeof(T)) == 0; +} + +template +forceinline fn T min(T a, T b) +{ + return a < b ? a : b; +} + +template +forceinline fn T max(T a, T b) +{ + return a > b ? a : b; +} + +template +struct Slice +{ + T* pointer; + u64 length; + + T& operator[](u64 index) + { + assert(index < length); + return pointer[index]; + } + + Slice slice(u64 start, u64 end) + { + return { + .pointer = pointer + start, + .length = end - start, + }; + } + + forceinline u8 equal(Slice other) + { + if (length == other.length) + { + return mem_equal_range(pointer, other.pointer, length); + } + else + { + return 0; + } + } + + forceinline T* begin() + { + return pointer; + } + + forceinline T* end() + { + return pointer + length; + } +}; + +using String = Slice; +#define strlit(s) String{ .pointer = (u8*)s, .length = sizeof(s) - 1, } +#define ch_to_str(ch) String{ .pointer = &ch, .length = 1 } + +global auto constexpr fnv_offset = 14695981039346656037ull; +global auto constexpr fnv_prime = 1099511628211ull; + +fn Hash hash_bytes(String bytes) +{ + u64 result = fnv_offset; + for (u64 i = 0; i < bytes.length; i += 1) + { + result ^= bytes.pointer[i]; + result *= fnv_prime; + } + + return (Hash)result; +} + +// fn forceinline long syscall0(long n) +// { +// unsigned long ret; +// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n) : "rcx", "r11", "memory"); +// return ret; +// } + +fn forceinline long syscall1(long n, long a1) +{ + unsigned long ret; + __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "rcx", "r11", "memory"); + return ret; +} + +fn forceinline long syscall2(long n, long a1, long a2) +{ + unsigned long ret; + __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2) + : "rcx", "r11", "memory"); + return ret; +} + +fn forceinline long syscall3(long n, long a1, long a2, long a3) +{ + unsigned long ret; + __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), + "d"(a3) : "rcx", "r11", "memory"); + return ret; +} + +// fn forceinline long syscall4(long n, long a1, long a2, long a3, long a4) +// { +// unsigned long ret; +// register long r10 __asm__("r10") = a4; +// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), +// "d"(a3), "r"(r10): "rcx", "r11", "memory"); +// return ret; +// } + +// fn forceinline long syscall5(long n, long a1, long a2, long a3, long a4, long a5) +// { +// unsigned long ret; +// register long r10 __asm__("r10") = a4; +// register long r8 __asm__("r8") = a5; +// __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), +// "d"(a3), "r"(r10), "r"(r8) : "rcx", "r11", "memory"); +// return ret; +// } + +fn forceinline long syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6) +{ + unsigned long ret; + register long r10 __asm__("r10") = a4; + register long r8 __asm__("r8") = a5; + register long r9 __asm__("r9") = a6; + __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), + "d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory"); + return ret; +} + +fn u8 memeq(u8* a, u8* b, u64 size) +{ + for (u64 i = 0; i < size; i += 1) + { + if (a[i] != b[i]) + { + return 0; + } + } + + return 1; +} + +enum class SyscallX86_64 : u64 { + read = 0, + write = 1, + open = 2, + close = 3, + stat = 4, + fstat = 5, + lstat = 6, + poll = 7, + lseek = 8, + mmap = 9, + mprotect = 10, + munmap = 11, + brk = 12, + rt_sigaction = 13, + rt_sigprocmask = 14, + rt_sigreturn = 15, + ioctl = 16, + pread64 = 17, + pwrite64 = 18, + readv = 19, + writev = 20, + access = 21, + pipe = 22, + select = 23, + sched_yield = 24, + mremap = 25, + msync = 26, + mincore = 27, + madvise = 28, + shmget = 29, + shmat = 30, + shmctl = 31, + dup = 32, + dup2 = 33, + pause = 34, + nanosleep = 35, + getitimer = 36, + alarm = 37, + setitimer = 38, + getpid = 39, + sendfile = 40, + socket = 41, + connect = 42, + accept = 43, + sendto = 44, + recvfrom = 45, + sendmsg = 46, + recvmsg = 47, + shutdown = 48, + bind = 49, + listen = 50, + getsockname = 51, + getpeername = 52, + socketpair = 53, + setsockopt = 54, + getsockopt = 55, + clone = 56, + fork = 57, + vfork = 58, + execve = 59, + exit = 60, + wait4 = 61, + kill = 62, + uname = 63, + semget = 64, + semop = 65, + semctl = 66, + shmdt = 67, + msgget = 68, + msgsnd = 69, + msgrcv = 70, + msgctl = 71, + fcntl = 72, + flock = 73, + fsync = 74, + fdatasync = 75, + truncate = 76, + ftruncate = 77, + getdents = 78, + getcwd = 79, + chdir = 80, + fchdir = 81, + rename = 82, + mkdir = 83, + rmdir = 84, + creat = 85, + link = 86, + unlink = 87, + symlink = 88, + readlink = 89, + chmod = 90, + fchmod = 91, + chown = 92, + fchown = 93, + lchown = 94, + umask = 95, + gettimeofday = 96, + getrlimit = 97, + getrusage = 98, + sysinfo = 99, + times = 100, + ptrace = 101, + getuid = 102, + syslog = 103, + getgid = 104, + setuid = 105, + setgid = 106, + geteuid = 107, + getegid = 108, + setpgid = 109, + getppid = 110, + getpgrp = 111, + setsid = 112, + setreuid = 113, + setregid = 114, + getgroups = 115, + setgroups = 116, + setresuid = 117, + getresuid = 118, + setresgid = 119, + getresgid = 120, + getpgid = 121, + setfsuid = 122, + setfsgid = 123, + getsid = 124, + capget = 125, + capset = 126, + rt_sigpending = 127, + rt_sigtimedwait = 128, + rt_sigqueueinfo = 129, + rt_sigsuspend = 130, + sigaltstack = 131, + utime = 132, + mknod = 133, + uselib = 134, + personality = 135, + ustat = 136, + statfs = 137, + fstatfs = 138, + sysfs = 139, + getpriority = 140, + setpriority = 141, + sched_setparam = 142, + sched_getparam = 143, + sched_setscheduler = 144, + sched_getscheduler = 145, + sched_get_priority_max = 146, + sched_get_priority_min = 147, + sched_rr_get_interval = 148, + mlock = 149, + munlock = 150, + mlockall = 151, + munlockall = 152, + vhangup = 153, + modify_ldt = 154, + pivot_root = 155, + _sysctl = 156, + prctl = 157, + arch_prctl = 158, + adjtimex = 159, + setrlimit = 160, + chroot = 161, + sync = 162, + acct = 163, + settimeofday = 164, + mount = 165, + umount2 = 166, + swapon = 167, + swapoff = 168, + reboot = 169, + sethostname = 170, + setdomainname = 171, + iopl = 172, + ioperm = 173, + create_module = 174, + init_module = 175, + delete_module = 176, + get_kernel_syms = 177, + query_module = 178, + quotactl = 179, + nfsservctl = 180, + getpmsg = 181, + putpmsg = 182, + afs_syscall = 183, + tuxcall = 184, + security = 185, + gettid = 186, + readahead = 187, + setxattr = 188, + lsetxattr = 189, + fsetxattr = 190, + getxattr = 191, + lgetxattr = 192, + fgetxattr = 193, + listxattr = 194, + llistxattr = 195, + flistxattr = 196, + removexattr = 197, + lremovexattr = 198, + fremovexattr = 199, + tkill = 200, + time = 201, + futex = 202, + sched_setaffinity = 203, + sched_getaffinity = 204, + set_thread_area = 205, + io_setup = 206, + io_destroy = 207, + io_getevents = 208, + io_submit = 209, + io_cancel = 210, + get_thread_area = 211, + lookup_dcookie = 212, + epoll_create = 213, + epoll_ctl_old = 214, + epoll_wait_old = 215, + remap_file_pages = 216, + getdents64 = 217, + set_tid_address = 218, + restart_syscall = 219, + semtimedop = 220, + fadvise64 = 221, + timer_create = 222, + timer_settime = 223, + timer_gettime = 224, + timer_getoverrun = 225, + timer_delete = 226, + clock_settime = 227, + clock_gettime = 228, + clock_getres = 229, + clock_nanosleep = 230, + exit_group = 231, + epoll_wait = 232, + epoll_ctl = 233, + tgkill = 234, + utimes = 235, + vserver = 236, + mbind = 237, + set_mempolicy = 238, + get_mempolicy = 239, + mq_open = 240, + mq_unlink = 241, + mq_timedsend = 242, + mq_timedreceive = 243, + mq_notify = 244, + mq_getsetattr = 245, + kexec_load = 246, + waitid = 247, + add_key = 248, + request_key = 249, + keyctl = 250, + ioprio_set = 251, + ioprio_get = 252, + inotify_init = 253, + inotify_add_watch = 254, + inotify_rm_watch = 255, + migrate_pages = 256, + openat = 257, + mkdirat = 258, + mknodat = 259, + fchownat = 260, + futimesat = 261, + fstatat64 = 262, + unlinkat = 263, + renameat = 264, + linkat = 265, + symlinkat = 266, + readlinkat = 267, + fchmodat = 268, + faccessat = 269, + pselect6 = 270, + ppoll = 271, + unshare = 272, + set_robust_list = 273, + get_robust_list = 274, + splice = 275, + tee = 276, + sync_file_range = 277, + vmsplice = 278, + move_pages = 279, + utimensat = 280, + epoll_pwait = 281, + signalfd = 282, + timerfd_create = 283, + eventfd = 284, + fallocate = 285, + timerfd_settime = 286, + timerfd_gettime = 287, + accept4 = 288, + signalfd4 = 289, + eventfd2 = 290, + epoll_create1 = 291, + dup3 = 292, + pipe2 = 293, + inotify_init1 = 294, + preadv = 295, + pwritev = 296, + rt_tgsigqueueinfo = 297, + perf_event_open = 298, + recvmmsg = 299, + fanotify_init = 300, + fanotify_mark = 301, + prlimit64 = 302, + name_to_handle_at = 303, + open_by_handle_at = 304, + clock_adjtime = 305, + syncfs = 306, + sendmmsg = 307, + setns = 308, + getcpu = 309, + process_vm_readv = 310, + process_vm_writev = 311, + kcmp = 312, + finit_module = 313, + sched_setattr = 314, + sched_getattr = 315, + renameat2 = 316, + seccomp = 317, + getrandom = 318, + memfd_create = 319, + kexec_file_load = 320, + bpf = 321, + execveat = 322, + userfaultfd = 323, + membarrier = 324, + mlock2 = 325, + copy_file_range = 326, + preadv2 = 327, + pwritev2 = 328, + pkey_mprotect = 329, + pkey_alloc = 330, + pkey_free = 331, + statx = 332, + io_pgetevents = 333, + rseq = 334, + pidfd_send_signal = 424, + io_uring_setup = 425, + io_uring_enter = 426, + io_uring_register = 427, + open_tree = 428, + move_mount = 429, + fsopen = 430, + fsconfig = 431, + fsmount = 432, + fspick = 433, + pidfd_open = 434, + clone3 = 435, + close_range = 436, + openat2 = 437, + pidfd_getfd = 438, + faccessat2 = 439, + process_madvise = 440, + epoll_pwait2 = 441, + mount_setattr = 442, + quotactl_fd = 443, + landlock_create_ruleset = 444, + landlock_add_rule = 445, + landlock_restrict_self = 446, + memfd_secret = 447, + process_mrelease = 448, + futex_waitv = 449, + set_mempolicy_home_node = 450, + cachestat = 451, + fchmodat2 = 452, + map_shadow_stack = 453, + futex_wake = 454, + futex_wait = 455, + futex_requeue = 456, +}; + +fn void* syscall_mmap(void* address, size_t length, int protection_flags, int map_flags, int fd, __off_t offset) +{ + return (void*) syscall6(static_cast(SyscallX86_64::mmap), (unsigned long)address, length, protection_flags, map_flags, fd, offset); +} + +fn int syscall_mprotect(void *address, size_t length, int protection_flags) +{ + return syscall3(static_cast(SyscallX86_64::mprotect), (unsigned long)address, length, protection_flags); +} + +fn int syscall_open(const char *file_path, int flags, int mode) +{ + return syscall3(static_cast(SyscallX86_64::open), (unsigned long)file_path, flags, mode); +} + +fn int syscall_fstat(int fd, struct stat *buffer) +{ + return syscall2(static_cast(SyscallX86_64::fstat), fd, (unsigned long)buffer); +} + +fn ssize_t syscall_read(int fd, void* buffer, size_t bytes) +{ + return syscall3(static_cast(SyscallX86_64::read), fd, (unsigned long)buffer, bytes); +} + +fn ssize_t syscall_write(int fd, const void *buffer, size_t bytes) +{ + return syscall3(static_cast(SyscallX86_64::write), fd, (unsigned long)buffer, bytes); +} + +[[noreturn]] [[gnu::cold]] fn void syscall_exit(int status) +{ + (void)syscall1(231, status); + trap(); +} + +[[noreturn]] [[gnu::cold]] fn void fail() +{ + trap(); + syscall_exit(1); +} + +fn void* reserve(u64 size, u8 huge_pages) +{ + int protection_flags = PROT_NONE; + int map_flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | (huge_pages ? MAP_HUGETLB : 0); + void* result = syscall_mmap(0, size, protection_flags, map_flags, -1, 0); + assert(result != MAP_FAILED); + return result; +} + +fn void commit(void* address, u64 size) +{ + int result = syscall_mprotect(address, size, PROT_READ | PROT_WRITE); + assert(result == 0); +} + +fn u64 align_forward(u64 value, u64 alignment) +{ + u64 mask = alignment - 1; + u64 result = (value + mask) & ~mask; + return result; +} + +struct Arena +{ + u64 reserved_size; + u64 commited; + u64 commit_position; + u64 granularity; + u8 reserved[4 * 8]; + + global auto constexpr minimum_granularity = KB(4); + global auto constexpr middle_granularity = MB(2); + global auto constexpr page_granularity = page_size; + global auto constexpr default_size = GB(4); + + fn Arena* init(u64 reserved_size, u64 granularity, u64 initial_size) + { + assert(initial_size % granularity == 0); + Arena* arena = (Arena*)reserve(reserved_size, granularity != minimum_granularity); + commit(arena, initial_size); + *arena = { + .reserved_size = reserved_size, + .commited = initial_size, + .commit_position = sizeof(Arena), + .granularity = granularity, + }; + return arena; + } + + fn Arena* init_default(u64 initial_size) + { + return init(default_size, minimum_granularity, initial_size); + } + + void* allocate_bytes(u64 size, u64 alignment) + { + u64 aligned_offset = align_forward(commit_position, alignment); + u64 aligned_size_after = aligned_offset + size; + if (aligned_size_after <= commited) + { + void* result = (u8*)this + aligned_offset; + commit_position = aligned_size_after; + return result; + } + else + { + trap(); + } + } + + template + T* allocate_many(u64 count) + { + return (T*)allocate_bytes(sizeof(T) * count, alignof(T)); + } + + template + T* allocate_one() + { + return allocate_many(1); + } + + template + T* allocate_slice(u64 count) + { + return { + .pointer = allocate_many(count), + .length = count, + }; + } +}; +static_assert(sizeof(Arena) == 64, "Arena must be cache aligned"); + +template +fn forceinline Destination transmute(Source source) +{ + static_assert(sizeof(Source) == sizeof(Destination)); + return *(Destination*)&source; +} + +fn String file_read(Arena* arena, String path) +{ + String result = {}; + int file_descriptor = syscall_open((char*)path.pointer, 0, 0); + assert(file_descriptor != -1); + + struct stat stat_buffer; + int stat_result = syscall_fstat(file_descriptor, &stat_buffer); + assert(stat_result == 0); + + u64 file_size = stat_buffer.st_size; + + result = { + .pointer = (u8*)arena->allocate_bytes(file_size, 64), + .length = file_size, + }; + + // TODO: big files + ssize_t read_result = syscall_read(file_descriptor, result.pointer, result.length); + assert(read_result >= 0); + assert((u64)read_result == file_size); + + return result; +} + +fn void print(String message) +{ + ssize_t result = syscall_write(1, message.pointer, message.length); + assert(result >= 0); + assert((u64)result == message.length); +} + +template struct PinnedArray; +fn void generic_pinned_array_ensure_capacity(PinnedArray* array, u32 additional_T, u32 size_of_T); +fn u8* generic_pinned_array_add_with_capacity(PinnedArray* array, u32 additional_T, u32 size_of_T); + + +template +struct PinnedArray +{ + T* pointer; + u32 length; + u32 committed; + + global constexpr auto granularity = page_size; + global constexpr auto reserved_size = ((u64)GB(4) - granularity); + + forceinline T& operator[](u32 index) + { + assert(index < length); + return pointer[index]; + } + + forceinline void ensure_capacity(u32 additional) + { + auto generic_array = (PinnedArray*)(this); + generic_pinned_array_ensure_capacity(generic_array, additional, sizeof(T)); + } + + forceinline Slice add_with_capacity(u32 additional) + { + auto generic_array = (PinnedArray*)(this); + auto pointer = generic_pinned_array_add_with_capacity(generic_array, additional, sizeof(T)); + return { + .pointer = (T*)pointer, + .length = additional, + }; + } + +// generic_pinned_array_ensure_capacity(array, additional_T, size_of_T); +// u8* result = generic_pinned_array_add_with_capacity(array, additional_T, size_of_T); + forceinline Slice add(u32 additional) + { + ensure_capacity(additional); + auto slice = add_with_capacity(additional); + return slice; + } + + forceinline T* add_one() + { + return add(1).pointer; + } +}; + +forceinline fn u32 generic_pinned_array_length(PinnedArray* array, u32 size_of_T) +{ + u32 current_length_bytes = array->length * size_of_T; + return current_length_bytes; +} + +fn void generic_pinned_array_ensure_capacity(PinnedArray* array, u32 additional_T, u32 size_of_T) +{ + if (array->committed == 0) + { + assert(array->length == 0); + assert(array->pointer == 0); + array->pointer = static_cast(reserve(PinnedArray::reserved_size, 0)); + } + + u32 additional_bytes = additional_T * size_of_T; + u32 current_length_bytes = generic_pinned_array_length(array, size_of_T); + u64 granularity_aligned_commit_bytes = align_forward(current_length_bytes, PinnedArray::granularity); + u64 new_length_bytes = current_length_bytes + additional_bytes; + + if (granularity_aligned_commit_bytes < new_length_bytes) + { + assert(new_length_bytes <= PinnedArray::reserved_size); + u64 new_granularity_aligned_commit_bytes = align_forward(new_length_bytes, PinnedArray::granularity); + u8* commit_pointer = array->pointer + granularity_aligned_commit_bytes; + u64 commit_bytes = new_granularity_aligned_commit_bytes + granularity_aligned_commit_bytes; + commit(commit_pointer, commit_bytes); + array->committed += commit_bytes / PinnedArray::granularity; + } +} + +fn u8* generic_pinned_array_add_with_capacity(PinnedArray* array, u32 additional_T, u32 size_of_T) +{ + u32 current_length_bytes = generic_pinned_array_length(array, size_of_T); + assert(current_length_bytes < PinnedArray::reserved_size); + u8* pointer = array->pointer; + array->length += additional_T; + return pointer; +} + +template struct PinnedHashmap; + +template +struct GetOrPut +{ + K* key; + V* value; + u8 existing; +}; +fn GetOrPut generic_pinned_hashmap_get_or_put(PinnedHashmap* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size); + + +template +struct PinnedHashmap +{ + K* keys; + V* values; + u32 length; + u16 key_page_capacity; + u16 value_page_capacity; + + global constexpr auto invalid_index = ~0u; + global constexpr auto granularity = PinnedArray::granularity; + global constexpr auto reserved_size = PinnedArray::reserved_size; + + static_assert(granularity % sizeof(K) == 0, ""); + static_assert(granularity % sizeof(V) == 0, ""); + + forceinline GetOrPut get_or_put(K key, V value) + { + auto* generic_hashmap = (PinnedHashmap*)(this); + auto generic_get_or_put = generic_pinned_hashmap_get_or_put(generic_hashmap, (u8*)&key, sizeof(K), (u8*)&value, sizeof(V)); + return *(GetOrPut*)&generic_get_or_put; + } +}; + +// Returns the generic value pointer if the key is present +fn u32 generic_pinned_hashmap_get_index(PinnedHashmap* hashmap, u8* key_pointer, u32 key_size) +{ + u32 index = hashmap->invalid_index; + + for (u32 i = 0; i < hashmap->length; i += 1) + { + u8* it_key_pointer = &hashmap->keys[i * key_size]; + if (memeq(it_key_pointer, key_pointer, key_size)) + { + index = (it_key_pointer - hashmap->keys) / key_size; + break; + } + } + + return index; +} + +fn void generic_pinned_hashmap_ensure_capacity(PinnedHashmap* hashmap, u32 key_size, u32 value_size, u32 additional_elements) +{ + if (additional_elements != 0) + { + if (hashmap->key_page_capacity == 0) + { + assert(hashmap->value_page_capacity == 0); + hashmap->keys = (u8*)reserve(hashmap->reserved_size, 0); + hashmap->values = (u8*)reserve(hashmap->reserved_size, 0); + } + + u32 target_element_capacity = hashmap->length + additional_elements; + + { + u32 key_byte_capacity = hashmap->key_page_capacity * hashmap->granularity; + u32 target_byte_capacity = target_element_capacity * key_size; + if (key_byte_capacity < target_byte_capacity) + { + u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity); + void* commit_pointer = hashmap->keys + key_byte_capacity; + u32 commit_size = aligned_target_byte_capacity - key_byte_capacity; + commit(commit_pointer, commit_size); + hashmap->key_page_capacity = aligned_target_byte_capacity / hashmap->granularity; + } + } + + { + u32 value_byte_capacity = hashmap->value_page_capacity * hashmap->granularity; + u32 target_byte_capacity = target_element_capacity * value_size; + if (value_byte_capacity < target_byte_capacity) + { + u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity); + void* commit_pointer = hashmap->values + value_byte_capacity; + u32 commit_size = aligned_target_byte_capacity - value_byte_capacity; + commit(commit_pointer, commit_size); + hashmap->value_page_capacity = aligned_target_byte_capacity / hashmap->granularity; + } + } + } +} + +fn GetOrPut generic_pinned_hashmap_get_or_put(PinnedHashmap* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size) +{ + u32 index = generic_pinned_hashmap_get_index(hashmap, new_key_pointer, key_size); + if (index != hashmap->invalid_index) + { + trap(); + } + else + { + generic_pinned_hashmap_ensure_capacity(hashmap, key_size, value_size, 1); + u32 new_index = hashmap->length; + hashmap->length += 1; + u8* key_pointer = &hashmap->keys[new_index * key_size]; + u8* value_pointer = &hashmap->values[new_index * value_size]; + memcpy(key_pointer, new_key_pointer, key_size); + memcpy(value_pointer, new_value_pointer, value_size); + + return { + .key = key_pointer, + .value = value_pointer, + .existing = 0, + }; + } +} + +typedef enum FileStatus +{ + FILE_STATUS_ADDED = 0, + FILE_STATUS_QUEUED = 1, + FILE_STATUS_READ = 2, + FILE_STATUS_ANALYZING = 3, +} FileStatus; + +struct File +{ + String path; + String source_code; + FileStatus status; +}; + +struct SemaType; +union Type +{ + enum Id + { + sema, + backend, + }; + + u64 bits:57; + Id id:1; + + forceinline SemaType* get_sema() + { + assert(id == Id::sema); + return (SemaType*)(bits); + } + + forceinline u8 is_resolved(); +}; +static_assert(sizeof(Type) == 8); + +enum class SemaTypeId: u8 +{ + VOID, + NORETURN, + POINTER, + INTEGER, + ARRAY, + STRUCT, + UNION, + COUNT, +}; +global auto constexpr type_id_bit_count = 3; +static_assert(static_cast(SemaTypeId::COUNT) < (1 << type_id_bit_count), "Type bit count for id must be respected"); + +global auto constexpr type_flags_bit_count = 32 - (type_id_bit_count + 1); + +struct SemaType +{ + u64 size; + u64 alignment; + SemaTypeId id : type_id_bit_count; + u32 resolved: 1; + u32 flags: type_flags_bit_count; + u32 name; + + u8 get_bit_count() + { + assert(id == SemaTypeId::INTEGER); + u32 bit_count_mask = (1 << (type_flags_bit_count - 1)) - 1; + u8 bit_count = flags & bit_count_mask; + assert(bit_count <= size * 8); + assert(bit_count <= 64); + return bit_count; + } + +}; +static_assert(sizeof(SemaType) == sizeof(u64) * 3, "Type must be 24 bytes"); +forceinline u8 Type::is_resolved() +{ + return (id == Id::backend) | ((id == Id::sema) & get_sema()->resolved); +} + + +struct Symbol +{ + enum class Id: u8 + { + variable, + function, + }; + + enum class Linkage: u8 + { + internal, + external, + }; + + u32 name; + Id id: 1; + Linkage linkage: 1; +}; + +typedef enum AbiInfoKind : u8 +{ + ABI_INFO_IGNORE, + ABI_INFO_DIRECT, + ABI_INFO_DIRECT_PAIR, + ABI_INFO_DIRECT_COERCE, + ABI_INFO_DIRECT_COERCE_INT, + ABI_INFO_DIRECT_SPLIT_STRUCT_I32, + ABI_INFO_EXPAND_COERCE, + ABI_INFO_INDIRECT, + ABI_INFO_EXPAND, +} AbiInfoKind; + +enum class Side : u8 +{ + left, + right, +}; + +struct NodeDataType +{ + enum class Id : u8 + { + VOID, + INTEGER, + TUPLE, + CONTROL, + MEMORY, + POINTER, + }; + Id id; + u8 bit_count:5; +}; + +union AbiInfoPayload +{ + NodeDataType direct; + NodeDataType direct_pair[2]; + NodeDataType direct_coerce; + struct + { + NodeDataType type; + u32 alignment; + } indirect; +}; +typedef union AbiInfoPayload AbiInfoPayload; + +struct AbiInfoAttributes +{ + u8 by_reg: 1; + u8 zero_extend: 1; + u8 sign_extend: 1; + u8 realign: 1; + u8 by_value: 1; +}; +typedef struct AbiInfoAttributes AbiInfoAttributes; + +struct AbiInfo +{ + AbiInfoPayload payload; + u16 indices[2]; + AbiInfoAttributes attributes; + AbiInfoKind kind; +}; + +struct FunctionPrototype +{ + AbiInfo* argument_type_abis; // The count for this array is "original_argument_count", not "abi_argument_count" + SemaType** original_argument_types; + // TODO: are these needed? + // Node::DataType* abi_argument_types; + // u32 abi_argument_count; + SemaType* original_return_type; + AbiInfo return_type_abi; + u32 original_argument_count; + // TODO: is this needed? + // Node::DataType abi_return_type; + u8 varags:1; +}; + +struct Function; + +global auto constexpr void_type_index = 0; +global auto constexpr noreturn_type_index = 1; +global auto constexpr opaque_pointer_type_index = 2; +// global auto constexpr f32_type_offset = 3; +// global auto constexpr f64_type_offset = 4; +global auto constexpr integer_type_offset = 5; +global auto constexpr integer_type_count = 64 * 2; +global auto constexpr builtin_type_count = integer_type_count + integer_type_offset + 1; + + +struct Unit +{ + PinnedArray files; + PinnedArray functions; + Arena* arena; + Arena* node_arena; + Arena* type_arena; + PinnedHashmap identifiers; + SemaType* builtin_types; + u64 generate_debug_information : 1; + + SemaType* get_integer_type(u8 bit_count, u8 signedness) + { + auto index = integer_type_offset + signedness * 64 + bit_count - 1; + return &builtin_types[index]; + } +}; + + +struct Node; + +struct Function +{ + Symbol symbol; + Node* root_node; + Node** parameters; + FunctionPrototype prototype; + u32 node_count; + u16 parameter_count; +}; + +struct ProjectionData +{ + NodeDataType type; + u16 index; +}; +// This is a node in the "sea of nodes" sense: +// https://en.wikipedia.org/wiki/Sea_of_nodes +struct Node +{ + enum class Id: u8 + { + ROOT, + PROJECTION, + RETURN, + CONSTANT_INT, + }; + + static_assert(sizeof(NodeDataType) <= 2); + + Node** edges; + u32 gvn; + u16 input_count; + u16 output_count; + u16 capacity; + NodeDataType data_type; + Id id; + + union + { + struct + { + u32 index; + } projection; + u64 constant_int; + }; + + forceinline Slice get_inputs() + { + return { + .pointer = edges, + .length = input_count, + }; + } + + forceinline Slice get_outputs() + { + return { + .pointer = edges + input_count, + .length = output_count, + }; + } + + [[nodiscard]] fn Node* add(Unit* unit) + { + Node* node = unit->node_arena->allocate_one(); + *node = {}; + return node; + } + + [[nodiscard]] fn Node* add_from_function(Unit* unit, Function* function) + { + u32 gvn = function->node_count; + function->node_count += 1; + Node* node = unit->node_arena->allocate_one(); + *node = { + .gvn = gvn, + }; + return node; + } + + Node* project(Unit* unit, Function* function, ProjectionData data) + { + assert(data_type.id == NodeDataType::Id::TUPLE); + + Node* projection = Node::add_from_function(unit, function); + assert(projection != this); + projection->id = Node::Id::PROJECTION; + projection->data_type = data.type; + projection->reallocate_edges(unit, 4); + projection->input_count = 1; + projection->set_input(unit, function, this, 0); + projection->projection.index = data.index; + + return projection; + } + + void set_input(Unit* unit, Function* function, Node* input, u16 slot) + { + assert(slot < input_count); + remove_output(unit, function, slot); + *get_input_slot(slot) = input; + if (input) + { + add_output(unit, function, input); + } + } + + Node** get_output_slot(u16 slot) + { + assert(slot < output_count); + return edges + input_count + slot; + } + + Node** get_input_slot(u16 slot) + { + assert(slot < input_count); + return edges + slot; + } + + void add_output(Unit* unit, Function* function, Node* input) + { + unused(unit); + unused(function); + + if (input->output_count + input->input_count >= input->capacity) + { + trap(); + } + + auto index = input->output_count; + input->output_count += 1; + *input->get_output_slot(index) = this; + } + + void remove_output(Unit* unit, Function* function, u16 slot) + { + unused(unit); + unused(function); + if (slot < output_count) + { + Node** output_slot = get_output_slot(slot); + if (*output_slot) + { + trap(); + } + } + } + + void reallocate_edges(Unit* unit, u16 new_capacity) + { + auto old_capacity = capacity; + assert(new_capacity > old_capacity); + auto length = output_count + input_count; + Node** new_edges = unit->arena->allocate_many(new_capacity); + memcpy(new_edges, edges, length * sizeof(Node*)); + memset(new_edges + length, 0, (new_capacity - length) * sizeof(Node)); + capacity = new_capacity; + edges = new_edges; + // TODO: free + + } +}; + +fn u64 round_up_to_next_power_of_2(u64 n) +{ + n -= 1; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + n += 1; + return n; +} + +fn Hash intern_identifier(Unit* unit, String identifier) +{ + Hash hash = hash_bytes(identifier); + (void)unit->identifiers.get_or_put(hash, identifier); + return hash; +} + +global String integer_names[] = +{ + strlit("u1"), + strlit("u2"), + strlit("u3"), + strlit("u4"), + strlit("u5"), + strlit("u6"), + strlit("u7"), + strlit("u8"), + strlit("u9"), + strlit("u10"), + strlit("u11"), + strlit("u12"), + strlit("u13"), + strlit("u14"), + strlit("u15"), + strlit("u16"), + strlit("u17"), + strlit("u18"), + strlit("u19"), + strlit("u20"), + strlit("u21"), + strlit("u22"), + strlit("u23"), + strlit("u24"), + strlit("u25"), + strlit("u26"), + strlit("u27"), + strlit("u28"), + strlit("u29"), + strlit("u30"), + strlit("u31"), + strlit("u32"), + strlit("u33"), + strlit("u34"), + strlit("u35"), + strlit("u36"), + strlit("u37"), + strlit("u38"), + strlit("u39"), + strlit("u40"), + strlit("u41"), + strlit("u42"), + strlit("u43"), + strlit("u44"), + strlit("u45"), + strlit("u46"), + strlit("u47"), + strlit("u48"), + strlit("u49"), + strlit("u50"), + strlit("u51"), + strlit("u52"), + strlit("u53"), + strlit("u54"), + strlit("u55"), + strlit("u56"), + strlit("u57"), + strlit("u58"), + strlit("u59"), + strlit("u60"), + strlit("u61"), + strlit("u62"), + strlit("u63"), + strlit("u64"), + strlit("s1"), + strlit("s2"), + strlit("s3"), + strlit("s4"), + strlit("s5"), + strlit("s6"), + strlit("s7"), + strlit("s8"), + strlit("s9"), + strlit("s10"), + strlit("s11"), + strlit("s12"), + strlit("s13"), + strlit("s14"), + strlit("s15"), + strlit("s16"), + strlit("s17"), + strlit("s18"), + strlit("s19"), + strlit("s20"), + strlit("s21"), + strlit("s22"), + strlit("s23"), + strlit("s24"), + strlit("s25"), + strlit("s26"), + strlit("s27"), + strlit("s28"), + strlit("s29"), + strlit("s30"), + strlit("s31"), + strlit("s32"), + strlit("s33"), + strlit("s34"), + strlit("s35"), + strlit("s36"), + strlit("s37"), + strlit("s38"), + strlit("s39"), + strlit("s40"), + strlit("s41"), + strlit("s42"), + strlit("s43"), + strlit("s44"), + strlit("s45"), + strlit("s46"), + strlit("s47"), + strlit("s48"), + strlit("s49"), + strlit("s50"), + strlit("s51"), + strlit("s52"), + strlit("s53"), + strlit("s54"), + strlit("s55"), + strlit("s56"), + strlit("s57"), + strlit("s58"), + strlit("s59"), + strlit("s60"), + strlit("s61"), + strlit("s62"), + strlit("s63"), + strlit("s64"), +}; + +fn void unit_initialize(Unit* unit) +{ + Arena* type_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64)); + SemaType* builtin_types = type_arena->allocate_many(builtin_type_count); + + *unit = { + .arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4)), + .node_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64)), + .type_arena = type_arena, + .builtin_types = builtin_types, + }; + + builtin_types[void_type_index] = { + .size = 0, + .alignment = 1, + .id = SemaTypeId::VOID, + .resolved = 1, + .name = intern_identifier(unit, strlit("void")), + }; + builtin_types[noreturn_type_index] = { + .size = 0, + .alignment = 1, + .id = SemaTypeId::NORETURN, + .resolved = 1, + .name = intern_identifier(unit, strlit("noreturn")), + }; + builtin_types[opaque_pointer_type_index] = { + .size = 8, + .alignment = 8, + .id = SemaTypeId::POINTER, + .resolved = 1, + .name = intern_identifier(unit, strlit("*any")), + }; + // TODO: float types + + u64 i; + for (i = integer_type_offset; i < integer_type_offset + 64; i += 1) + { + u64 bit_count = i - integer_type_offset + 1; + assert(bit_count >= 1 | bit_count <= 64); + auto aligned_bit_count = round_up_to_next_power_of_2(bit_count); + auto byte_count = max(aligned_bit_count / 8, 1); + assert(byte_count <= bit_count); + assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8); + builtin_types[i] = + { + .size = byte_count, + .alignment = byte_count, + .id = SemaTypeId::INTEGER, + .resolved = 1, + .flags = static_cast(bit_count), + .name = intern_identifier(unit, integer_names[bit_count - 1]), + }; + } + + for (; i < integer_type_offset + integer_type_count; i += 1) + { + u64 bit_count = i - (integer_type_offset + 64 - 1); + assert(bit_count >= 1 | bit_count <= 64); + auto aligned_bit_count = round_up_to_next_power_of_2(bit_count); + auto byte_count = max(aligned_bit_count / 8, 1); + assert(byte_count <= bit_count); + assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8); + builtin_types[i] = + { + .size = byte_count, + .alignment = byte_count, + .id = SemaTypeId::INTEGER, + .resolved = 1, + .flags = static_cast(bit_count | (1 << (type_flags_bit_count - 1))), // Signedness bit + .name = intern_identifier(unit, integer_names[bit_count + 63]), + }; + } +} + + +static_assert(array_length(integer_names) == 128, "Integer name array must be 128 bytes"); + +struct Instance +{ + Arena* arena; +}; +typedef struct Instance Instance; + +fn Unit* instance_add_unit(Instance* instance) +{ + Unit* unit = instance->arena->allocate_one(); + *unit = { + }; + return unit; +} + +struct Parser +{ + u64 i; + u32 line; + u32 column; +}; +typedef struct Parser Parser; + +fn u64 safe_flag(u64 value, u64 flag) +{ + u64 result = value & ((u64)0 - flag); + return result; +} + +fn u8 get_next_ch_safe(String file, u64 index) +{ + u64 next_index = index + 1; + u64 is_in_range = next_index < file.length; + u64 safe_index = safe_flag(next_index, is_in_range); + u8 unsafe_result = file.pointer[safe_index]; + u64 safe_result = safe_flag(unsafe_result, is_in_range); + assert(safe_result < 256); + return (u8)safe_result; +} + +fn u32 is_space(u8 ch, u8 next_ch) +{ + u32 is_comment = (ch == '/') & (next_ch == '/'); + u32 is_whitespace = ch == ' '; + u32 is_vertical_tab = ch == 0x0b; + u32 is_horizontal_tab = ch == '\t'; + u32 is_line_feed = ch == '\n'; + u32 is_carry_return = ch == '\r'; + u32 result = (((is_vertical_tab | is_horizontal_tab) | (is_line_feed | is_carry_return)) | (is_comment | is_whitespace)); + return result; +} + +fn void skip_space(Parser* parser, String src) +{ + u64 original_i = parser->i; + + if (original_i != src.length) + { + if (is_space(src.pointer[original_i], get_next_ch_safe(src, original_i))) + { + while (parser->i < src.length) + { + u64 index = parser->i; + u8 ch = src.pointer[index]; + u64 new_line = ch == '\n'; + parser->line += new_line; + + if (new_line) + { + parser->column = index + 1; + } + + if (!is_space(ch, get_next_ch_safe(src, parser->i))) + { + break; + } + + u32 is_comment = src.pointer[index] == '/'; + parser->i += is_comment + is_comment; + if (is_comment) + { + while (parser->i < src.length) + { + if (src.pointer[parser->i] == '\n') + { + break; + } + + parser->i += 1; + } + + continue; + } + + parser->i += 1; + } + } + } +} + +fn u64 is_lower(u8 ch) +{ + return (ch >= 'a') & (ch <= 'z'); +} + +fn u64 is_upper(u8 ch) +{ + return (ch >= 'A') & (ch <= 'Z'); +} + +fn u64 is_alphabetic(u8 ch) +{ + return is_lower(ch) | is_upper(ch); +} + +fn u64 is_decimal_digit(u8 ch) +{ + return (ch >= '0') & (ch <= '9'); +} + +fn u64 is_hex_digit(u8 ch) +{ + return (is_decimal_digit(ch) | ((ch == 'a' | ch == 'A') | (ch == 'b' | ch == 'B'))) | (((ch == 'c' | ch == 'C') | (ch == 'd' | ch == 'D')) | ((ch == 'e' | ch == 'E') | (ch == 'f' | ch == 'F'))); +} + + +fn u64 is_identifier_start(u8 ch) +{ + u64 alphabetic = is_alphabetic(ch); + u64 is_underscore = ch == '_'; + return alphabetic | is_underscore; +} + +fn u64 is_identifier_ch(u8 ch) +{ + u64 identifier_start = is_identifier_start(ch); + u64 decimal = is_decimal_digit(ch); + return identifier_start | decimal; +} + +fn void expect_character(Parser* parser, String src, u8 expected_ch) +{ + u64 index = parser->i; + if (expect(index < src.length, 1)) + { + u8 ch = src.pointer[index]; + u64 matches = ch == expected_ch; + expect(matches, 1); + parser->i += matches; + if (!matches) + { + print(strlit("expected character '")); + print(ch_to_str(expected_ch)); + print(strlit("', but found '")); + print(ch_to_str(ch)); + print(strlit("'\n")); + fail(); + } + } + else + { + print(strlit("expected character '")); + print(ch_to_str(expected_ch)); + print(strlit("', but found end of file\n")); + fail(); + } +} + +fn String parse_identifier(Parser* parser, String src) +{ + u64 identifier_start_index = parser->i; + u64 is_string_literal = src.pointer[identifier_start_index] == '"'; + parser->i += is_string_literal; + u8 identifier_start_ch = src.pointer[parser->i]; + u64 is_valid_identifier_start = is_identifier_start(identifier_start_ch); + parser->i += is_valid_identifier_start; + + if (expect(is_valid_identifier_start, 1)) + { + while (parser->i < src.length) + { + u8 ch = src.pointer[parser->i]; + u64 is_identifier = is_identifier_ch(ch); + expect(is_identifier, 1); + parser->i += is_identifier; + + if (!is_identifier) + { + if (expect(is_string_literal, 0)) + { + expect_character(parser, src, '"'); + } + + String result = src.slice(identifier_start_index, parser->i - is_string_literal); + return result; + } + } + + fail(); + } + else + { + fail(); + } +} + +typedef enum Keyword : u32 +{ + KEYWORD_COUNT, + KEYWORD_INVALID = ~0u, +} Keyword; + +// TODO: +// fn Keyword parse_keyword(String identifier) +// { +// Keyword result = KEYWORD_INVALID; +// return result; +// } + +fn Hash parse_and_intern_identifier(Parser* parser, Unit* unit, String src) +{ + String identifier = parse_identifier(parser, src); + // Keyword keyword_index = parse_keyword(identifier); + // if (expect(keyword_index != KEYWORD_INVALID, 0)) + // { + // fail(); + // } + + if (expect(identifier.equal(strlit("_")), 0)) + { + return 0; + } + + Hash result = intern_identifier(unit, identifier); + return result; +} + + +// fn u32 get_line(Parser* parser) +// { +// return parser->line + 1; +// } +// +// fn u32 get_column(Parser* parser) +// { +// return parser->i - parser->column + 1; +// } + +fn File* unit_add_file(Unit* unit, String file_path) +{ + auto* file = unit->files.add_one(); + *file = { + .path = file_path, + }; + return file; +} + +fn void unit_file_read(Unit* unit, File* file) +{ + assert(file->status == FILE_STATUS_ADDED || file->status == FILE_STATUS_QUEUED); + file->source_code = file_read(unit->arena, file->path); + file->status = FILE_STATUS_READ; +} + +global constexpr auto brace_open = '{'; +global constexpr auto brace_close = '}'; + +global constexpr auto parenthesis_open = '('; +global constexpr auto parenthesis_close = ')'; + +global constexpr auto bracket_open = '['; +global constexpr auto bracket_close = ']'; + +global constexpr auto pointer_sign = '*'; +global constexpr auto end_of_statement = ';'; +global constexpr auto end_of_argument = ','; + +global constexpr auto function_argument_start = parenthesis_open; +global constexpr auto function_argument_end = parenthesis_close; + +global constexpr auto function_attribute_start = bracket_open; +global constexpr auto function_attribute_end = bracket_close; + +global constexpr auto symbol_attribute_start = bracket_open; +global constexpr auto symbol_attribute_end = bracket_close; + +global constexpr auto block_start = brace_open; +global constexpr auto block_end = brace_close; + +global constexpr auto array_expression_start = bracket_open; +// global constexpr auto array_expression_end = bracket_close; + +global constexpr auto composite_initialization_start = brace_open; +// global constexpr auto composite_initialization_end = brace_close; + +global String function_attributes[] = +{ + strlit("cc"), +}; + +typedef enum FunctionAttribute +{ + FUNCTION_ATTRIBUTE_CC, + FUNCTION_ATTRIBUTE_COUNT, +} FunctionAttribute; + +static_assert(array_length(function_attributes) == FUNCTION_ATTRIBUTE_COUNT, ""); + +global String calling_conventions[] = +{ + strlit("c"), + strlit("custom"), +}; + +typedef enum CallingConvention +{ + CALLING_CONVENTION_C, + CALLING_CONVENTION_CUSTOM, + CALLING_CONVENTION_COUNT, +} CallingConvention; + +static_assert(array_length(calling_conventions) == CALLING_CONVENTION_COUNT, ""); + +typedef enum GlobalSymbolAttribute +{ + GLOBAL_SYMBOL_ATTRIBUTE_EXPORT, + GLOBAL_SYMBOL_ATTRIBUTE_EXTERN, + GLOBAL_SYMBOL_ATTRIBUTE_COUNT, +} GlobalSymbolAttribute; + +global String global_symbol_attributes[] = +{ + strlit("export"), + strlit("extern"), +}; + +struct GlobalSymbolAttributes +{ + u8 exported: 1; + u8 external: 1; +}; +typedef struct GlobalSymbolAttributes GlobalSymbolAttributes; + +static_assert(array_length(global_symbol_attributes) == GLOBAL_SYMBOL_ATTRIBUTE_COUNT, ""); + +struct Analyzer +{ + Function* function; +}; + +fn SemaType* analyze_type(Parser* parser, Unit* unit, String src) +{ + u64 start_index = parser->i; + u8 start_ch = src.pointer[start_index]; + u32 array_start = start_ch == array_expression_start; + u32 u_start = start_ch == 'u'; + u32 s_start = start_ch == 's'; + u32 float_start = start_ch == 'f'; + u32 void_start = start_ch == 'v'; + u32 pointer_start = start_ch == pointer_sign; + u32 integer_start = u_start | s_start; + u32 number_start = integer_start | float_start; + + if (void_start) + { + trap(); + } + else if (array_start) + { + trap(); + } + else if (pointer_start) + { + trap(); + } + else if (number_start) + { + u64 expected_digit_start = start_index + 1; + u64 i = expected_digit_start; + u32 decimal_digit_count = 0; + u64 top = i + 5; + + while (i < top) + { + u8 ch = src.pointer[i]; + u32 is_digit = is_decimal_digit(ch); + decimal_digit_count += is_digit; + if (!is_digit) + { + u32 is_alpha = is_alphabetic(ch); + if (is_alpha) + { + decimal_digit_count = 0; + } + break; + } + + i += 1; + } + + + if (decimal_digit_count) + { + parser->i += 1; + + if (integer_start) + { + u64 signedness = s_start; + u64 bit_size; + u64 current_i = parser->i; + assert(src.pointer[current_i] >= '0' & src.pointer[current_i] <= '9'); + switch (decimal_digit_count) { + case 0: + fail(); + case 1: + bit_size = src.pointer[current_i] - '0'; + break; + case 2: + bit_size = (src.pointer[current_i] - '0') * 10 + (src.pointer[current_i + 1] - '0'); + break; + default: + fail(); + } + parser->i += decimal_digit_count; + + assert(!is_decimal_digit(src.pointer[parser->i])); + + if (bit_size) + { + auto* result = unit->get_integer_type(bit_size, signedness); + return result; + } + else + { + fail(); + } + } + else if (float_start) + { + trap(); + } + else + { + trap(); + } + } + else + { + fail(); + } + } + + trap(); +} + +fn u64 parse_hex(String string) +{ + u64 value = 0; + for (u8 ch : string) + { + u8 byte; + auto is_decimal = (ch >= '0') & (ch <= '9'); + auto is_lower_hex = (ch >= 'a') & (ch <= 'f'); + auto is_upper_hex = (ch >= 'A') & (ch <= 'F'); + if (is_decimal) + { + byte = ch - '0'; + } + else if (is_lower_hex) + { + byte = ch - 'a' + 10; + } + else if (is_upper_hex) + { + byte = ch - 'A' + 10; + } + else + { + fail(); + } + + value = (value << 4) | (byte & 0x0f); + } + + return value; +} + +[[nodiscard]] fn Node* parse_constant_integer(Parser* parser, Unit* unit, String src, SemaType* type) +{ + auto starting_ch = src[parser->i]; + if (starting_ch == '0') + { + auto follow_up_character = src[parser->i + 1]; + auto is_hex_start = follow_up_character == 'x'; + auto is_octal_start = follow_up_character == 'o'; + auto is_bin_start = follow_up_character == 'b'; + auto is_prefixed_start = is_hex_start | is_octal_start | is_bin_start; + auto follow_up_alpha = is_alphabetic(follow_up_character); + auto follow_up_digit = is_decimal_digit(follow_up_character); + auto is_valid_after_zero = is_space(follow_up_character, get_next_ch_safe(src, follow_up_character)) | (!follow_up_digit and !follow_up_alpha); + + if (is_prefixed_start) { + enum class IntegerPrefix { + hexadecimal, + octal, + binary, + }; + IntegerPrefix prefix; + switch (follow_up_character) { + case 'x': prefix = IntegerPrefix::hexadecimal; break; + case 'o': prefix = IntegerPrefix::octal; break; + case 'b': prefix = IntegerPrefix::binary; break; + default: fail(); + }; + + parser->i += 2; + + auto start = parser->i; + + switch (prefix) { + case IntegerPrefix::hexadecimal: + { + while (is_hex_digit(src[parser->i])) { + parser->i += 1; + } + + auto slice = src.slice(start, parser->i); + auto number = parse_hex(slice); + + auto* constant_int = Node::add(unit); + constant_int->id = Node::Id::CONSTANT_INT; + constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() }; + constant_int->constant_int = number; + // TODO: is this fine? + constant_int->reallocate_edges(unit, 1); + return constant_int; + } + case IntegerPrefix::octal: + trap(); + case IntegerPrefix::binary: + trap(); + } + } else if (is_valid_after_zero) { + parser->i += 1; + + auto* constant_int = Node::add(unit); + constant_int->id = Node::Id::CONSTANT_INT; + constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() }; + constant_int->reallocate_edges(unit, 1); + constant_int->constant_int = 0; + return constant_int; + } else { + fail(); + } + + trap(); + } + else + { + trap(); + } +} + +[[nodiscard]] fn Node* analyze_single_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side) +{ + unused(side); + enum class Unary + { + NONE, + ONE_COMPLEMENT, + NEGATION, + }; + auto unary_operation = Unary::NONE; + + auto original_starting_ch_index = parser->i; + u8 original_starting_ch = src[original_starting_ch_index]; + + switch (src[parser->i]) + { + case '\'': + trap(); + case '"': + trap(); + case '-': + trap(); + case '~': + trap(); + case '#': + trap(); + case composite_initialization_start: + trap(); + case array_expression_start: + trap(); + default: + assert(is_decimal_digit(original_starting_ch) | is_identifier_start(original_starting_ch)); + break; + } + + auto starting_ch_index = parser->i; + u8 starting_ch = src[starting_ch_index]; + auto is_digit = is_decimal_digit(starting_ch); + auto is_identifier = is_identifier_start(starting_ch); + + // auto line = get_line(parser); + // auto column = get_column(parser); + + if (is_digit) + { + SemaType* integer_type; + if (type) + { + integer_type = type; + } + else + { + switch (unary_operation) + { + case Unary::NONE: + integer_type = unit->get_integer_type(64, 0); + break; + case Unary::ONE_COMPLEMENT: + fail(); + case Unary::NEGATION: + fail(); + } + } + + if (integer_type->id != SemaTypeId::INTEGER) + { + fail(); + } + + Node* constant_int = parse_constant_integer(parser, unit, src, integer_type); + constant_int->gvn = analyzer->function->node_count; + analyzer->function->node_count += 1; + + return constant_int; + } + else if (is_identifier) + { + trap(); + } + else + { + fail(); + } +} + +[[nodiscard]] fn Node* analyze_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side) +{ + enum class CurrentOperation + { + NONE, + }; + + u64 iterations = 0; + SemaType* iteration_type = type; + auto current_operation = CurrentOperation::NONE; + Node* previous_node = 0; + + while (1) + { + if ((iterations == 0) & !iteration_type) + { + trap(); + } + + // u32 line = get_line(parser); + // u32 column = get_column(parser); + Node* current_node; + if (src[parser->i] == '(') + { + trap(); + } + else + { + current_node = analyze_single_expression(analyzer, parser, unit, src, iteration_type, side); + } + + skip_space(parser, src); + + switch (current_operation) + { + case CurrentOperation::NONE: + previous_node = current_node; + break; + } + + auto original_index = parser->i; + u8 original = src[original_index]; + + switch (original) + { + case end_of_statement: + case end_of_argument: + case parenthesis_close: + case bracket_close: + return previous_node; + default: + trap(); + } + + iterations += 1; + } +} + +fn void analyze_local_block(Analyzer* analyzer, Parser* parser, Unit* unit, String src) +{ + expect_character(parser, src, block_start); + while (1) + { + skip_space(parser, src); + + if (src[parser->i] == block_end) + { + break; + } + + auto statement_start_index = parser->i; + u8 statement_start_ch = src[statement_start_index]; + + if (is_identifier_start(statement_start_ch)) + { + String identifier = parse_identifier(parser, src); + if (identifier.equal(strlit("return"))) + { + skip_space(parser, src); + + auto* return_value = analyze_expression(analyzer, parser, unit, src, analyzer->function->prototype.original_return_type, Side::right); + expect_character(parser, src, ';'); + + Function* function = analyzer->function; + + Node* ret_node = Node::add_from_function(unit, function); + ret_node->id = Node::Id::RETURN; + ret_node->data_type = { .id = NodeDataType::Id::CONTROL }; + ret_node->reallocate_edges(unit, 4); + ret_node->input_count = 2; + ret_node->set_input(unit, function, function->root_node, 0); + ret_node->set_input(unit, function, return_value, 1); + } + else + { + trap(); + } + } + else + { + trap(); + } + } + + expect_character(parser, src, block_end); +} + +typedef enum SystemVClass +{ + SYSTEMV_CLASS_NONE, + SYSTEMV_CLASS_MEMORY, + SYSTEMV_CLASS_INTEGER, + SYSTEMV_CLASS_SSE, + SYSTEMV_CLASS_SSEUP, +} SystemVClass; + +struct SystemVClassification +{ + SystemVClass v[2]; +}; +typedef struct SystemVClassification SystemVClassification; + +struct SystemVRegisterCount +{ + u32 gp_registers; + u32 sse_registers; +}; +typedef struct SystemVRegisterCount SystemVRegisterCount; + +fn SystemVClassification systemv_classify(SemaType* type, u64 base_offset) +{ + SystemVClassification result; + u32 is_memory = base_offset >= 8; + u32 current_index = is_memory; + result.v[current_index] = SYSTEMV_CLASS_MEMORY; + result.v[!current_index] = SYSTEMV_CLASS_NONE; + + switch (type->id) + { + case SemaTypeId::VOID: + trap(); + case SemaTypeId::NORETURN: + trap(); + case SemaTypeId::POINTER: + trap(); + case SemaTypeId::INTEGER: + { + u8 bit_count = type->get_bit_count(); + switch (bit_count) + { + case 8: case 16: case 32: case 64: + result.v[current_index] = SYSTEMV_CLASS_INTEGER; + break; + default: + trap(); + } + } break; + case SemaTypeId::COUNT: + trap(); + default: + trap(); + } + + return result; +} + +fn u8 contains_no_user_data(SemaType* type, u64 start, u64 end) +{ + unused(end); + if (type->size <= start) + { + return 1; + } + + switch (type->id) + { + case SemaTypeId::ARRAY: + trap(); + case SemaTypeId::STRUCT: + trap(); + case SemaTypeId::UNION: + trap(); + default: + return 0; + case SemaTypeId::COUNT: + trap(); + } +} + +fn SemaType* systemv_get_int_type_at_offset(SemaType* type, u64 offset, SemaType* source_type, u64 source_offset) +{ + unused(source_type); + + switch (type->id) + { + case SemaTypeId::VOID: + trap(); + case SemaTypeId::NORETURN: + trap(); + case SemaTypeId::POINTER: + trap(); + case SemaTypeId::INTEGER: + { + u8 bit_count = type->get_bit_count(); + switch (bit_count) + { + case 8: case 16: case 32: case 64: + if (offset == 0) + { + u64 start = source_offset + type->size; + u64 end = source_offset + 8; + if (contains_no_user_data(type, start, end)) + { + return type; + } + trap(); + } + else + { + trap(); + } + default: + trap(); + } + trap(); + } break; + case SemaTypeId::COUNT: + trap(); + case SemaTypeId::ARRAY: + trap(); + case SemaTypeId::STRUCT: + trap(); + case SemaTypeId::UNION: + trap(); + } +} + +fn void analyze_function(Parser* parser, Unit* unit, String src) +{ + expect_character(parser, src, 'f'); + expect_character(parser, src, 'n'); + + skip_space(parser, src); + + u64 has_function_attributes = src.pointer[parser->i] == function_attribute_start; + parser->i += has_function_attributes; + + CallingConvention calling_convention = CALLING_CONVENTION_CUSTOM; + + if (has_function_attributes) + { + u64 mask = 0; + + while (1) + { + skip_space(parser, src); + + if (src.pointer[parser->i] == function_attribute_end) + { + break; + } + + String attribute_candidate = parse_identifier(parser, src); + + u64 attribute_i; + for (attribute_i = 0; attribute_i < array_length(function_attributes); attribute_i += 1) + { + String function_attribute_string = function_attributes[attribute_i]; + if (attribute_candidate.equal(function_attribute_string)) + { + if (mask & (1 << attribute_i)) + { + fail(); + } + + auto function_attribute = static_cast(attribute_i); + mask |= (1 << attribute_i); + + switch (function_attribute) + { + case FUNCTION_ATTRIBUTE_CC: + { + skip_space(parser, src); + expect_character(parser, src, '('); + skip_space(parser, src); + expect_character(parser, src, '.'); + String candidate_cc = parse_identifier(parser, src); + skip_space(parser, src); + expect_character(parser, src, ')'); + + u64 cc_i; + for (cc_i = 0; cc_i < array_length(calling_conventions); cc_i += 1) + { + String calling_convention_string = calling_conventions[cc_i]; + + if (calling_convention_string.equal(candidate_cc)) + { + calling_convention = static_cast(cc_i); + break; + } + } + + if (cc_i == array_length(calling_conventions)) + { + fail(); + } + } break; + default: + trap(); + } + + break; + } + } + + if (attribute_i == array_length(function_attributes)) + { + fail(); + } + + skip_space(parser, src); + + u8 after_ch = src.pointer[parser->i]; + switch (after_ch) + { + case function_attribute_end: break; + default: fail(); + } + } + + expect_character(parser, src, function_attribute_end); + + skip_space(parser, src); + } + + Hash name_hash = parse_and_intern_identifier(parser, unit, src); + + skip_space(parser, src); + + u64 has_global_attributes = src.pointer[parser->i] == symbol_attribute_start; + parser->i += has_global_attributes; + + GlobalSymbolAttributes symbol_attributes = {}; + if (has_global_attributes) + { + u64 mask = 0; + + while (1) + { + skip_space(parser, src); + + if (src.pointer[parser->i] == symbol_attribute_end) + { + break; + } + + String candidate_attribute = parse_identifier(parser, src); + skip_space(parser, src); + switch (src.pointer[parser->i]) + { + case symbol_attribute_end: + break; + case ',': + parser->i += 1; + break; + default: + fail(); + } + + u64 attribute_i; + for (attribute_i = 0; attribute_i < array_length(global_symbol_attributes); attribute_i += 1) + { + String attribute_string = global_symbol_attributes[attribute_i]; + if (attribute_string.equal(candidate_attribute)) + { + if (mask & (1 << attribute_i)) + { + fail(); + } + + mask |= 1 << attribute_i; + + auto attribute = static_cast(attribute_i); + + switch (attribute) + { + case GLOBAL_SYMBOL_ATTRIBUTE_EXPORT: + symbol_attributes.exported = 1; + break; + case GLOBAL_SYMBOL_ATTRIBUTE_EXTERN: + symbol_attributes.external = 1; + break; + default: + trap(); + } + break; + } + } + + if (attribute_i == array_length(global_symbol_attributes)) + { + fail(); + } + } + + expect_character(parser, src, symbol_attribute_end); + + skip_space(parser, src); + } + + if (symbol_attributes.exported & symbol_attributes.external) + { + fail(); + } + + expect_character(parser, src, function_argument_start); + + while (1) + { + skip_space(parser, src); + + if (src.pointer[parser->i] == function_argument_end) + { + break; + } + + // TODO: function arguments in function definition + trap(); + } + + expect_character(parser, src, function_argument_end); + + skip_space(parser, src); + PinnedArray original_argument_types = {}; + + SemaType* original_return_type = analyze_type(parser, unit, src); + + skip_space(parser, src); + + switch (calling_convention) + { + case CALLING_CONVENTION_C: + { + // First process the return type ABI + AbiInfo return_type_abi = {}; + { + SystemVClassification return_type_classes = systemv_classify(original_return_type, 0); + assert(return_type_classes.v[1] != SYSTEMV_CLASS_MEMORY | return_type_classes.v[0] == SYSTEMV_CLASS_MEMORY); + assert(return_type_classes.v[1] != SYSTEMV_CLASS_SSEUP | return_type_classes.v[0] == SYSTEMV_CLASS_SSE); + SemaType* low_part = 0; + switch (return_type_classes.v[0]) + { + case SYSTEMV_CLASS_INTEGER: + { + SemaType* result_type = systemv_get_int_type_at_offset(original_return_type, 0, original_return_type, 0); + if (return_type_classes.v[1] == SYSTEMV_CLASS_NONE & original_return_type->get_bit_count() < 32) + { + trap(); + } + + low_part = result_type; + } break; + default: + trap(); + } + assert(low_part); + + SemaType* high_part = 0; + switch (return_type_classes.v[1]) + { + case SYSTEMV_CLASS_NONE: + break; + case SYSTEMV_CLASS_MEMORY: + trap(); + case SYSTEMV_CLASS_INTEGER: + trap(); + case SYSTEMV_CLASS_SSE: + trap(); + case SYSTEMV_CLASS_SSEUP: + trap(); + } + + if (high_part) + { + trap(); + } + else + { + // TODO: + u8 is_type = 1; + if (is_type) + { + if (low_part == original_return_type) + { + return_type_abi = + { + .kind = ABI_INFO_DIRECT, + }; + } + else + { + trap(); + } + } + else + { + trap(); + } + } + } + + // Now process the ABI for argument types + + PinnedArray argument_type_abis = {}; + // u32 abi_argument_type_count = 0; + { + SystemVRegisterCount available_registers = { + .gp_registers = 6, + .sse_registers = 8, + }; + + available_registers.gp_registers -= return_type_abi.kind == ABI_INFO_INDIRECT; + + // TODO: return by reference + u8 return_by_reference = 0; + if (return_by_reference) + { + trap(); + } + + for (u32 original_argument_index = 0; original_argument_index < original_argument_types.length; original_argument_index += 1) + { + trap(); + } + } + + switch (return_type_abi.kind) + { + case ABI_INFO_IGNORE: case ABI_INFO_DIRECT: + break; + case ABI_INFO_DIRECT_PAIR: + trap(); + case ABI_INFO_DIRECT_COERCE: + trap(); + case ABI_INFO_DIRECT_COERCE_INT: + trap(); + case ABI_INFO_DIRECT_SPLIT_STRUCT_I32: + trap(); + case ABI_INFO_EXPAND_COERCE: + trap(); + case ABI_INFO_INDIRECT: + trap(); + case ABI_INFO_EXPAND: + trap(); + } + + // assert(abi_argument_type_count == 0); + // TODO: reserve memory for them + // Slice abi_argument_types = {}; + for (u32 i = 0; i < argument_type_abis.length; i += 1) + { + trap(); + } + + auto* function = unit->functions.add_one(); + *function = { + .symbol = { + .name = name_hash, + .id = Symbol::Id::function, + .linkage = symbol_attributes.external ? Symbol::Linkage::external : Symbol::Linkage::internal, + }, + .root_node = 0, + .parameters = unit->arena->allocate_many(argument_type_abis.length), + .prototype = { + .argument_type_abis = argument_type_abis.pointer, + .original_argument_types = original_argument_types.pointer, + .original_return_type = original_return_type, + .return_type_abi = return_type_abi, + .original_argument_count = original_argument_types.length, + .varags = 0, + }, + .node_count = 0, + .parameter_count = (u16)argument_type_abis.length, + }; + + Node* root_node = Node::add_from_function(unit, function); + root_node->id = Node::Id::ROOT; + root_node->data_type = { + .id = NodeDataType::Id::TUPLE, + }; + root_node->reallocate_edges(unit, 4); + // TODO: revisit + + // auto* control_node = root_node->project(unit, function, { + // .type = { .id = NodeDataType::Id::CONTROL }, + // }); + // auto* memory_node = root_node->project(unit, function, {}); + // auto* pointer_node = root_node->project(unit, function, {}); + // function->parameters[0] = control_node; + // function->parameters[1] = memory_node; + // function->parameters[2] = pointer_node; + + for (u32 argument_i = 0; argument_i < argument_type_abis.length; argument_i += 1) + { + trap(); + } + + // TODO: callgraph + + // TODO: revisit + // Node* ret_node = Node::add_from_function(unit, function); + // ret_node->id = Node::Id::RETURN; + // ret_node->data_type = { .id = NodeDataType::Id::CONTROL }; + // ret_node->reallocate_edges(unit, 4); + // ret_node->input_count = 2; + // ret_node->set_input(unit, function, root_node, 0); + + switch (symbol_attributes.external) + { + case 0: + { + Analyzer analyzer = {}; + analyzer.function = function; + analyze_local_block(&analyzer, parser, unit, src); + } break; + case 1: + trap(); + } + } break; + case CALLING_CONVENTION_CUSTOM: + trap(); + break; + case CALLING_CONVENTION_COUNT: + trap(); + break; + } +} + +fn void unit_file_analyze(Unit* unit, File* file) +{ + unit_file_read(unit, file); + + Parser parser = {}; + String src = file->source_code; + + while (1) + { + skip_space(&parser, src); + + if (parser.i >= src.length) + { + break; + } + + // u32 line = get_line(&parser); + // u32 column = get_column(&parser); + u64 declaration_start_index = parser.i; + u8 declaration_start_ch = src.pointer[declaration_start_index]; + + switch (declaration_start_ch) + { + case '>': + trap(); + break; + case 'f': + if (get_next_ch_safe(src, declaration_start_index) == 'n') + { + analyze_function(&parser, unit, src); + } + else + { + fail(); + } + break; + default: + fail(); + } + } +} + +global Instance instance; + +extern "C" void entry_point() +{ + instance.arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4)); + Unit* unit = instance_add_unit(&instance); + File* file = unit_add_file(unit, strlit("tests/first/main.nat")); + unit_initialize(unit); + unit_file_analyze(unit, file); +} diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..45adfce --- /dev/null +++ b/run.sh @@ -0,0 +1,23 @@ +#!/bin/sh +set -ex +mkdir -p build +time clang++ \ +-o build/hatch \ +bootstrap/main.cpp \ +bootstrap/entry.S \ +-g \ +-std=gnu++23 \ +-Wall \ +-Wextra \ +-Wpedantic \ +-Wno-nested-anon-types \ +-pedantic \ +-ffreestanding \ +-nostdlib \ +-static \ +-fno-exceptions \ +-fno-stack-protector \ +-ferror-limit=1 \ +`#-ftime-report` \ +-MJ build/compile_commands.json +gf2 -ex r build/hatch diff --git a/tests/first/main.nat b/tests/first/main.nat new file mode 100644 index 0000000..425cd5a --- /dev/null +++ b/tests/first/main.nat @@ -0,0 +1,4 @@ +fn[cc(.c)] main [export] () s32 +{ + return 0; +}