#include <fcntl.h> #include <stdint.h> #include <sys/stat.h> #include <unistd.h> #include <sys/mman.h> typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; typedef float f32; typedef double f64; typedef u32 Hash; #define fn static #define global static #define assert(x) if (__builtin_expect(!(x), 0)) { trap(); } #define forceinline __attribute__((always_inline)) #define expect(x, b) __builtin_expect(x, b) #define trap() __builtin_trap() #define array_length(arr) sizeof(arr) / sizeof((arr)[0]) #define page_size (0x1000) #define unused(x) (void)(x) #define KB(n) ((n) * 1024) #define MB(n) ((n) * 1024 * 1024) #define GB(n) ((u64)(n) * 1024 * 1024 * 1024) #define TB(n) ((u64)(n) * 1024 * 1024 * 1024 * 1024) template<typename T> struct DynamicList { T* pointer; u64 count; DynamicList* next; }; template<typename T, u64 count> struct StaticList { u64 length; StaticList* next; T array[count]; }; extern "C" void* memcpy(void* __restrict dst, void* __restrict src, u64 size) { auto* destination = (u8*)dst; auto* source = (u8*)src; for (u64 i = 0; i < size; i += 1) { destination[i] = source[i]; } return dst; } extern "C" void* memset(void* dst, u8 n, u64 size) { auto* destination = (u8*)dst; for (u64 i = 0; i < size; i += 1) { destination[i] = n; } return dst; } extern "C" int memcmp(const void* left, const void* right, u64 n) { const u8 *l=(const u8*)left, *r=(const u8*)right; for (; n && *l == *r; n--, l++, r++); return n ? *l - *r : 0; } template<typename T> forceinline fn u8 mem_equal_range(T* a, T* b, u64 count) { return memcmp(a, b, count * sizeof(T)) == 0; } template <typename T> forceinline fn T min(T a, T b) { return a < b ? a : b; } template <typename T> forceinline fn T max(T a, T b) { return a > b ? a : b; } template<typename T> struct Slice { T* pointer; u64 length; T& operator[](u64 index) { assert(index < length); return pointer[index]; } Slice slice(u64 start, u64 end) { return { .pointer = pointer + start, .length = end - start, }; } forceinline u8 equal(Slice other) { if (length == other.length) { return mem_equal_range(pointer, other.pointer, length); } else { return 0; } } forceinline T* begin() { return pointer; } forceinline T* end() { return pointer + length; } }; using String = Slice<u8>; #define strlit(s) String{ .pointer = (u8*)s, .length = sizeof(s) - 1, } #define ch_to_str(ch) String{ .pointer = &ch, .length = 1 } global auto constexpr fnv_offset = 14695981039346656037ull; global auto constexpr fnv_prime = 1099511628211ull; fn Hash hash_bytes(String bytes) { u64 result = fnv_offset; for (u64 i = 0; i < bytes.length; i += 1) { result ^= bytes.pointer[i]; result *= fnv_prime; } return (Hash)result; } // fn forceinline long syscall0(long n) // { // unsigned long ret; // __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n) : "rcx", "r11", "memory"); // return ret; // } fn forceinline long syscall1(long n, long a1) { unsigned long ret; __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1) : "rcx", "r11", "memory"); return ret; } fn forceinline long syscall2(long n, long a1, long a2) { unsigned long ret; __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2) : "rcx", "r11", "memory"); return ret; } fn forceinline long syscall3(long n, long a1, long a2, long a3) { unsigned long ret; __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3) : "rcx", "r11", "memory"); return ret; } // fn forceinline long syscall4(long n, long a1, long a2, long a3, long a4) // { // unsigned long ret; // register long r10 __asm__("r10") = a4; // __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), // "d"(a3), "r"(r10): "rcx", "r11", "memory"); // return ret; // } // fn forceinline long syscall5(long n, long a1, long a2, long a3, long a4, long a5) // { // unsigned long ret; // register long r10 __asm__("r10") = a4; // register long r8 __asm__("r8") = a5; // __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), // "d"(a3), "r"(r10), "r"(r8) : "rcx", "r11", "memory"); // return ret; // } fn forceinline long syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6) { unsigned long ret; register long r10 __asm__("r10") = a4; register long r8 __asm__("r8") = a5; register long r9 __asm__("r9") = a6; __asm__ __volatile__ ("syscall" : "=a"(ret) : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory"); return ret; } fn u8 memeq(u8* a, u8* b, u64 size) { for (u64 i = 0; i < size; i += 1) { if (a[i] != b[i]) { return 0; } } return 1; } enum class SyscallX86_64 : u64 { read = 0, write = 1, open = 2, close = 3, stat = 4, fstat = 5, lstat = 6, poll = 7, lseek = 8, mmap = 9, mprotect = 10, munmap = 11, brk = 12, rt_sigaction = 13, rt_sigprocmask = 14, rt_sigreturn = 15, ioctl = 16, pread64 = 17, pwrite64 = 18, readv = 19, writev = 20, access = 21, pipe = 22, select = 23, sched_yield = 24, mremap = 25, msync = 26, mincore = 27, madvise = 28, shmget = 29, shmat = 30, shmctl = 31, dup = 32, dup2 = 33, pause = 34, nanosleep = 35, getitimer = 36, alarm = 37, setitimer = 38, getpid = 39, sendfile = 40, socket = 41, connect = 42, accept = 43, sendto = 44, recvfrom = 45, sendmsg = 46, recvmsg = 47, shutdown = 48, bind = 49, listen = 50, getsockname = 51, getpeername = 52, socketpair = 53, setsockopt = 54, getsockopt = 55, clone = 56, fork = 57, vfork = 58, execve = 59, exit = 60, wait4 = 61, kill = 62, uname = 63, semget = 64, semop = 65, semctl = 66, shmdt = 67, msgget = 68, msgsnd = 69, msgrcv = 70, msgctl = 71, fcntl = 72, flock = 73, fsync = 74, fdatasync = 75, truncate = 76, ftruncate = 77, getdents = 78, getcwd = 79, chdir = 80, fchdir = 81, rename = 82, mkdir = 83, rmdir = 84, creat = 85, link = 86, unlink = 87, symlink = 88, readlink = 89, chmod = 90, fchmod = 91, chown = 92, fchown = 93, lchown = 94, umask = 95, gettimeofday = 96, getrlimit = 97, getrusage = 98, sysinfo = 99, times = 100, ptrace = 101, getuid = 102, syslog = 103, getgid = 104, setuid = 105, setgid = 106, geteuid = 107, getegid = 108, setpgid = 109, getppid = 110, getpgrp = 111, setsid = 112, setreuid = 113, setregid = 114, getgroups = 115, setgroups = 116, setresuid = 117, getresuid = 118, setresgid = 119, getresgid = 120, getpgid = 121, setfsuid = 122, setfsgid = 123, getsid = 124, capget = 125, capset = 126, rt_sigpending = 127, rt_sigtimedwait = 128, rt_sigqueueinfo = 129, rt_sigsuspend = 130, sigaltstack = 131, utime = 132, mknod = 133, uselib = 134, personality = 135, ustat = 136, statfs = 137, fstatfs = 138, sysfs = 139, getpriority = 140, setpriority = 141, sched_setparam = 142, sched_getparam = 143, sched_setscheduler = 144, sched_getscheduler = 145, sched_get_priority_max = 146, sched_get_priority_min = 147, sched_rr_get_interval = 148, mlock = 149, munlock = 150, mlockall = 151, munlockall = 152, vhangup = 153, modify_ldt = 154, pivot_root = 155, _sysctl = 156, prctl = 157, arch_prctl = 158, adjtimex = 159, setrlimit = 160, chroot = 161, sync = 162, acct = 163, settimeofday = 164, mount = 165, umount2 = 166, swapon = 167, swapoff = 168, reboot = 169, sethostname = 170, setdomainname = 171, iopl = 172, ioperm = 173, create_module = 174, init_module = 175, delete_module = 176, get_kernel_syms = 177, query_module = 178, quotactl = 179, nfsservctl = 180, getpmsg = 181, putpmsg = 182, afs_syscall = 183, tuxcall = 184, security = 185, gettid = 186, readahead = 187, setxattr = 188, lsetxattr = 189, fsetxattr = 190, getxattr = 191, lgetxattr = 192, fgetxattr = 193, listxattr = 194, llistxattr = 195, flistxattr = 196, removexattr = 197, lremovexattr = 198, fremovexattr = 199, tkill = 200, time = 201, futex = 202, sched_setaffinity = 203, sched_getaffinity = 204, set_thread_area = 205, io_setup = 206, io_destroy = 207, io_getevents = 208, io_submit = 209, io_cancel = 210, get_thread_area = 211, lookup_dcookie = 212, epoll_create = 213, epoll_ctl_old = 214, epoll_wait_old = 215, remap_file_pages = 216, getdents64 = 217, set_tid_address = 218, restart_syscall = 219, semtimedop = 220, fadvise64 = 221, timer_create = 222, timer_settime = 223, timer_gettime = 224, timer_getoverrun = 225, timer_delete = 226, clock_settime = 227, clock_gettime = 228, clock_getres = 229, clock_nanosleep = 230, exit_group = 231, epoll_wait = 232, epoll_ctl = 233, tgkill = 234, utimes = 235, vserver = 236, mbind = 237, set_mempolicy = 238, get_mempolicy = 239, mq_open = 240, mq_unlink = 241, mq_timedsend = 242, mq_timedreceive = 243, mq_notify = 244, mq_getsetattr = 245, kexec_load = 246, waitid = 247, add_key = 248, request_key = 249, keyctl = 250, ioprio_set = 251, ioprio_get = 252, inotify_init = 253, inotify_add_watch = 254, inotify_rm_watch = 255, migrate_pages = 256, openat = 257, mkdirat = 258, mknodat = 259, fchownat = 260, futimesat = 261, fstatat64 = 262, unlinkat = 263, renameat = 264, linkat = 265, symlinkat = 266, readlinkat = 267, fchmodat = 268, faccessat = 269, pselect6 = 270, ppoll = 271, unshare = 272, set_robust_list = 273, get_robust_list = 274, splice = 275, tee = 276, sync_file_range = 277, vmsplice = 278, move_pages = 279, utimensat = 280, epoll_pwait = 281, signalfd = 282, timerfd_create = 283, eventfd = 284, fallocate = 285, timerfd_settime = 286, timerfd_gettime = 287, accept4 = 288, signalfd4 = 289, eventfd2 = 290, epoll_create1 = 291, dup3 = 292, pipe2 = 293, inotify_init1 = 294, preadv = 295, pwritev = 296, rt_tgsigqueueinfo = 297, perf_event_open = 298, recvmmsg = 299, fanotify_init = 300, fanotify_mark = 301, prlimit64 = 302, name_to_handle_at = 303, open_by_handle_at = 304, clock_adjtime = 305, syncfs = 306, sendmmsg = 307, setns = 308, getcpu = 309, process_vm_readv = 310, process_vm_writev = 311, kcmp = 312, finit_module = 313, sched_setattr = 314, sched_getattr = 315, renameat2 = 316, seccomp = 317, getrandom = 318, memfd_create = 319, kexec_file_load = 320, bpf = 321, execveat = 322, userfaultfd = 323, membarrier = 324, mlock2 = 325, copy_file_range = 326, preadv2 = 327, pwritev2 = 328, pkey_mprotect = 329, pkey_alloc = 330, pkey_free = 331, statx = 332, io_pgetevents = 333, rseq = 334, pidfd_send_signal = 424, io_uring_setup = 425, io_uring_enter = 426, io_uring_register = 427, open_tree = 428, move_mount = 429, fsopen = 430, fsconfig = 431, fsmount = 432, fspick = 433, pidfd_open = 434, clone3 = 435, close_range = 436, openat2 = 437, pidfd_getfd = 438, faccessat2 = 439, process_madvise = 440, epoll_pwait2 = 441, mount_setattr = 442, quotactl_fd = 443, landlock_create_ruleset = 444, landlock_add_rule = 445, landlock_restrict_self = 446, memfd_secret = 447, process_mrelease = 448, futex_waitv = 449, set_mempolicy_home_node = 450, cachestat = 451, fchmodat2 = 452, map_shadow_stack = 453, futex_wake = 454, futex_wait = 455, futex_requeue = 456, }; fn void* syscall_mmap(void* address, size_t length, int protection_flags, int map_flags, int fd, __off_t offset) { return (void*) syscall6(static_cast<long>(SyscallX86_64::mmap), (unsigned long)address, length, protection_flags, map_flags, fd, offset); } fn int syscall_mprotect(void *address, size_t length, int protection_flags) { return syscall3(static_cast<long>(SyscallX86_64::mprotect), (unsigned long)address, length, protection_flags); } fn int syscall_open(const char *file_path, int flags, int mode) { return syscall3(static_cast<long>(SyscallX86_64::open), (unsigned long)file_path, flags, mode); } fn int syscall_fstat(int fd, struct stat *buffer) { return syscall2(static_cast<long>(SyscallX86_64::fstat), fd, (unsigned long)buffer); } fn ssize_t syscall_read(int fd, void* buffer, size_t bytes) { return syscall3(static_cast<long>(SyscallX86_64::read), fd, (unsigned long)buffer, bytes); } fn ssize_t syscall_write(int fd, const void *buffer, size_t bytes) { return syscall3(static_cast<long>(SyscallX86_64::write), fd, (unsigned long)buffer, bytes); } [[noreturn]] [[gnu::cold]] fn void syscall_exit(int status) { (void)syscall1(231, status); trap(); } [[noreturn]] [[gnu::cold]] fn void fail() { trap(); syscall_exit(1); } fn void* reserve(u64 size, u8 huge_pages) { int protection_flags = PROT_NONE; int map_flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | (huge_pages ? MAP_HUGETLB : 0); void* result = syscall_mmap(0, size, protection_flags, map_flags, -1, 0); assert(result != MAP_FAILED); return result; } fn void commit(void* address, u64 size) { int result = syscall_mprotect(address, size, PROT_READ | PROT_WRITE); assert(result == 0); } fn u64 align_forward(u64 value, u64 alignment) { u64 mask = alignment - 1; u64 result = (value + mask) & ~mask; return result; } struct Arena { u64 reserved_size; u64 commited; u64 commit_position; u64 granularity; u8 reserved[4 * 8]; global auto constexpr minimum_granularity = KB(4); global auto constexpr middle_granularity = MB(2); global auto constexpr page_granularity = page_size; global auto constexpr default_size = GB(4); fn Arena* init(u64 reserved_size, u64 granularity, u64 initial_size) { assert(initial_size % granularity == 0); Arena* arena = (Arena*)reserve(reserved_size, granularity != minimum_granularity); commit(arena, initial_size); *arena = { .reserved_size = reserved_size, .commited = initial_size, .commit_position = sizeof(Arena), .granularity = granularity, }; return arena; } fn Arena* init_default(u64 initial_size) { return init(default_size, minimum_granularity, initial_size); } void* allocate_bytes(u64 size, u64 alignment) { u64 aligned_offset = align_forward(commit_position, alignment); u64 aligned_size_after = aligned_offset + size; if (aligned_size_after <= commited) { void* result = (u8*)this + aligned_offset; commit_position = aligned_size_after; return result; } else { trap(); } } template<typename T> T* allocate_many(u64 count) { return (T*)allocate_bytes(sizeof(T) * count, alignof(T)); } template<typename T> T* allocate_one() { return allocate_many<T>(1); } template<typename T> T* allocate_slice(u64 count) { return { .pointer = allocate_many<T>(count), .length = count, }; } }; static_assert(sizeof(Arena) == 64, "Arena must be cache aligned"); template<typename Destination, typename Source> fn forceinline Destination transmute(Source source) { static_assert(sizeof(Source) == sizeof(Destination)); return *(Destination*)&source; } fn String file_read(Arena* arena, String path) { String result = {}; int file_descriptor = syscall_open((char*)path.pointer, 0, 0); assert(file_descriptor != -1); struct stat stat_buffer; int stat_result = syscall_fstat(file_descriptor, &stat_buffer); assert(stat_result == 0); u64 file_size = stat_buffer.st_size; result = { .pointer = (u8*)arena->allocate_bytes(file_size, 64), .length = file_size, }; // TODO: big files ssize_t read_result = syscall_read(file_descriptor, result.pointer, result.length); assert(read_result >= 0); assert((u64)read_result == file_size); return result; } fn void print(String message) { ssize_t result = syscall_write(1, message.pointer, message.length); assert(result >= 0); assert((u64)result == message.length); } template<typename T> struct PinnedArray; fn void generic_pinned_array_ensure_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T); fn u8* generic_pinned_array_add_with_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T); template <typename T> struct PinnedArray { T* pointer; u32 length; u32 committed; global constexpr auto granularity = page_size; global constexpr auto reserved_size = ((u64)GB(4) - granularity); forceinline T& operator[](u32 index) { assert(index < length); return pointer[index]; } forceinline void ensure_capacity(u32 additional) { auto generic_array = (PinnedArray<u8>*)(this); generic_pinned_array_ensure_capacity(generic_array, additional, sizeof(T)); } forceinline Slice<T> add_with_capacity(u32 additional) { auto generic_array = (PinnedArray<u8>*)(this); auto pointer = generic_pinned_array_add_with_capacity(generic_array, additional, sizeof(T)); return { .pointer = (T*)pointer, .length = additional, }; } // generic_pinned_array_ensure_capacity(array, additional_T, size_of_T); // u8* result = generic_pinned_array_add_with_capacity(array, additional_T, size_of_T); forceinline Slice<T> add(u32 additional) { ensure_capacity(additional); auto slice = add_with_capacity(additional); return slice; } forceinline T* add_one() { return add(1).pointer; } }; forceinline fn u32 generic_pinned_array_length(PinnedArray<u8>* array, u32 size_of_T) { u32 current_length_bytes = array->length * size_of_T; return current_length_bytes; } fn void generic_pinned_array_ensure_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T) { if (array->committed == 0) { assert(array->length == 0); assert(array->pointer == 0); array->pointer = static_cast<u8*>(reserve(PinnedArray<u8>::reserved_size, 0)); } u32 additional_bytes = additional_T * size_of_T; u32 current_length_bytes = generic_pinned_array_length(array, size_of_T); u64 granularity_aligned_commit_bytes = align_forward(current_length_bytes, PinnedArray<u8>::granularity); u64 new_length_bytes = current_length_bytes + additional_bytes; if (granularity_aligned_commit_bytes < new_length_bytes) { assert(new_length_bytes <= PinnedArray<u8>::reserved_size); u64 new_granularity_aligned_commit_bytes = align_forward(new_length_bytes, PinnedArray<u8>::granularity); u8* commit_pointer = array->pointer + granularity_aligned_commit_bytes; u64 commit_bytes = new_granularity_aligned_commit_bytes + granularity_aligned_commit_bytes; commit(commit_pointer, commit_bytes); array->committed += commit_bytes / PinnedArray<u8>::granularity; } } fn u8* generic_pinned_array_add_with_capacity(PinnedArray<u8>* array, u32 additional_T, u32 size_of_T) { u32 current_length_bytes = generic_pinned_array_length(array, size_of_T); assert(current_length_bytes < PinnedArray<u8>::reserved_size); u8* pointer = array->pointer; array->length += additional_T; return pointer; } template <typename K, typename V> struct PinnedHashmap; template <typename K, typename V> struct GetOrPut { K* key; V* value; u8 existing; }; fn GetOrPut<u8, u8> generic_pinned_hashmap_get_or_put(PinnedHashmap<u8, u8>* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size); template<typename K, typename V> struct PinnedHashmap { K* keys; V* values; u32 length; u16 key_page_capacity; u16 value_page_capacity; global constexpr auto invalid_index = ~0u; global constexpr auto granularity = PinnedArray<V>::granularity; global constexpr auto reserved_size = PinnedArray<V>::reserved_size; static_assert(granularity % sizeof(K) == 0, ""); static_assert(granularity % sizeof(V) == 0, ""); forceinline GetOrPut<K, V> get_or_put(K key, V value) { auto* generic_hashmap = (PinnedHashmap<u8, u8>*)(this); auto generic_get_or_put = generic_pinned_hashmap_get_or_put(generic_hashmap, (u8*)&key, sizeof(K), (u8*)&value, sizeof(V)); return *(GetOrPut<K, V>*)&generic_get_or_put; } }; // Returns the generic value pointer if the key is present fn u32 generic_pinned_hashmap_get_index(PinnedHashmap<u8, u8>* hashmap, u8* key_pointer, u32 key_size) { u32 index = hashmap->invalid_index; for (u32 i = 0; i < hashmap->length; i += 1) { u8* it_key_pointer = &hashmap->keys[i * key_size]; if (memeq(it_key_pointer, key_pointer, key_size)) { index = (it_key_pointer - hashmap->keys) / key_size; break; } } return index; } fn void generic_pinned_hashmap_ensure_capacity(PinnedHashmap<u8, u8>* hashmap, u32 key_size, u32 value_size, u32 additional_elements) { if (additional_elements != 0) { if (hashmap->key_page_capacity == 0) { assert(hashmap->value_page_capacity == 0); hashmap->keys = (u8*)reserve(hashmap->reserved_size, 0); hashmap->values = (u8*)reserve(hashmap->reserved_size, 0); } u32 target_element_capacity = hashmap->length + additional_elements; { u32 key_byte_capacity = hashmap->key_page_capacity * hashmap->granularity; u32 target_byte_capacity = target_element_capacity * key_size; if (key_byte_capacity < target_byte_capacity) { u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity); void* commit_pointer = hashmap->keys + key_byte_capacity; u32 commit_size = aligned_target_byte_capacity - key_byte_capacity; commit(commit_pointer, commit_size); hashmap->key_page_capacity = aligned_target_byte_capacity / hashmap->granularity; } } { u32 value_byte_capacity = hashmap->value_page_capacity * hashmap->granularity; u32 target_byte_capacity = target_element_capacity * value_size; if (value_byte_capacity < target_byte_capacity) { u32 aligned_target_byte_capacity = align_forward(target_byte_capacity, hashmap->granularity); void* commit_pointer = hashmap->values + value_byte_capacity; u32 commit_size = aligned_target_byte_capacity - value_byte_capacity; commit(commit_pointer, commit_size); hashmap->value_page_capacity = aligned_target_byte_capacity / hashmap->granularity; } } } } fn GetOrPut<u8, u8> generic_pinned_hashmap_get_or_put(PinnedHashmap<u8, u8>* hashmap, u8* new_key_pointer, u32 key_size, u8* new_value_pointer, u32 value_size) { u32 index = generic_pinned_hashmap_get_index(hashmap, new_key_pointer, key_size); if (index != hashmap->invalid_index) { trap(); } else { generic_pinned_hashmap_ensure_capacity(hashmap, key_size, value_size, 1); u32 new_index = hashmap->length; hashmap->length += 1; u8* key_pointer = &hashmap->keys[new_index * key_size]; u8* value_pointer = &hashmap->values[new_index * value_size]; memcpy(key_pointer, new_key_pointer, key_size); memcpy(value_pointer, new_value_pointer, value_size); return { .key = key_pointer, .value = value_pointer, .existing = 0, }; } } typedef enum FileStatus { FILE_STATUS_ADDED = 0, FILE_STATUS_QUEUED = 1, FILE_STATUS_READ = 2, FILE_STATUS_ANALYZING = 3, } FileStatus; struct File { String path; String source_code; FileStatus status; }; struct SemaType; union Type { enum Id { sema, backend, }; u64 bits:57; Id id:1; forceinline SemaType* get_sema() { assert(id == Id::sema); return (SemaType*)(bits); } forceinline u8 is_resolved(); }; static_assert(sizeof(Type) == 8); enum class SemaTypeId: u8 { VOID, NORETURN, POINTER, INTEGER, ARRAY, STRUCT, UNION, COUNT, }; global auto constexpr type_id_bit_count = 3; static_assert(static_cast<u8>(SemaTypeId::COUNT) < (1 << type_id_bit_count), "Type bit count for id must be respected"); global auto constexpr type_flags_bit_count = 32 - (type_id_bit_count + 1); struct SemaType { u64 size; u64 alignment; SemaTypeId id : type_id_bit_count; u32 resolved: 1; u32 flags: type_flags_bit_count; u32 name; u8 get_bit_count() { assert(id == SemaTypeId::INTEGER); u32 bit_count_mask = (1 << (type_flags_bit_count - 1)) - 1; u8 bit_count = flags & bit_count_mask; assert(bit_count <= size * 8); assert(bit_count <= 64); return bit_count; } }; static_assert(sizeof(SemaType) == sizeof(u64) * 3, "Type must be 24 bytes"); forceinline u8 Type::is_resolved() { return (id == Id::backend) | ((id == Id::sema) & get_sema()->resolved); } struct Symbol { enum class Id: u8 { variable, function, }; enum class Linkage: u8 { internal, external, }; u32 name; Id id: 1; Linkage linkage: 1; }; typedef enum AbiInfoKind : u8 { ABI_INFO_IGNORE, ABI_INFO_DIRECT, ABI_INFO_DIRECT_PAIR, ABI_INFO_DIRECT_COERCE, ABI_INFO_DIRECT_COERCE_INT, ABI_INFO_DIRECT_SPLIT_STRUCT_I32, ABI_INFO_EXPAND_COERCE, ABI_INFO_INDIRECT, ABI_INFO_EXPAND, } AbiInfoKind; enum class Side : u8 { left, right, }; struct NodeDataType { enum class Id : u8 { VOID, INTEGER, TUPLE, CONTROL, MEMORY, POINTER, }; Id id; u8 bit_count:5; }; union AbiInfoPayload { NodeDataType direct; NodeDataType direct_pair[2]; NodeDataType direct_coerce; struct { NodeDataType type; u32 alignment; } indirect; }; typedef union AbiInfoPayload AbiInfoPayload; struct AbiInfoAttributes { u8 by_reg: 1; u8 zero_extend: 1; u8 sign_extend: 1; u8 realign: 1; u8 by_value: 1; }; typedef struct AbiInfoAttributes AbiInfoAttributes; struct AbiInfo { AbiInfoPayload payload; u16 indices[2]; AbiInfoAttributes attributes; AbiInfoKind kind; }; struct FunctionPrototype { AbiInfo* argument_type_abis; // The count for this array is "original_argument_count", not "abi_argument_count" SemaType** original_argument_types; // TODO: are these needed? // Node::DataType* abi_argument_types; // u32 abi_argument_count; SemaType* original_return_type; AbiInfo return_type_abi; u32 original_argument_count; // TODO: is this needed? // Node::DataType abi_return_type; u8 varags:1; }; struct Function; global auto constexpr void_type_index = 0; global auto constexpr noreturn_type_index = 1; global auto constexpr opaque_pointer_type_index = 2; // global auto constexpr f32_type_offset = 3; // global auto constexpr f64_type_offset = 4; global auto constexpr integer_type_offset = 5; global auto constexpr integer_type_count = 64 * 2; global auto constexpr builtin_type_count = integer_type_count + integer_type_offset + 1; struct Unit { PinnedArray<File> files; PinnedArray<Function> functions; Arena* arena; Arena* node_arena; Arena* type_arena; PinnedHashmap<Hash, String> identifiers; SemaType* builtin_types; u64 generate_debug_information : 1; SemaType* get_integer_type(u8 bit_count, u8 signedness) { auto index = integer_type_offset + signedness * 64 + bit_count - 1; return &builtin_types[index]; } }; struct Node; struct Function { Symbol symbol; Node* root_node; Node** parameters; FunctionPrototype prototype; u32 node_count; u16 parameter_count; }; struct ProjectionData { NodeDataType type; u16 index; }; // This is a node in the "sea of nodes" sense: // https://en.wikipedia.org/wiki/Sea_of_nodes struct Node { enum class Id: u8 { ROOT, PROJECTION, RETURN, CONSTANT_INT, }; static_assert(sizeof(NodeDataType) <= 2); Node** edges; u32 gvn; u16 input_count; u16 output_count; u16 capacity; NodeDataType data_type; Id id; union { struct { u32 index; } projection; u64 constant_int; }; forceinline Slice<Node*> get_inputs() { return { .pointer = edges, .length = input_count, }; } forceinline Slice<Node*> get_outputs() { return { .pointer = edges + input_count, .length = output_count, }; } [[nodiscard]] fn Node* add(Unit* unit) { Node* node = unit->node_arena->allocate_one<Node>(); *node = {}; return node; } [[nodiscard]] fn Node* add_from_function(Unit* unit, Function* function) { u32 gvn = function->node_count; function->node_count += 1; Node* node = unit->node_arena->allocate_one<Node>(); *node = { .gvn = gvn, }; return node; } Node* project(Unit* unit, Function* function, ProjectionData data) { assert(data_type.id == NodeDataType::Id::TUPLE); Node* projection = Node::add_from_function(unit, function); assert(projection != this); projection->id = Node::Id::PROJECTION; projection->data_type = data.type; projection->reallocate_edges(unit, 4); projection->input_count = 1; projection->set_input(unit, function, this, 0); projection->projection.index = data.index; return projection; } void set_input(Unit* unit, Function* function, Node* input, u16 slot) { assert(slot < input_count); remove_output(unit, function, slot); *get_input_slot(slot) = input; if (input) { add_output(unit, function, input); } } Node** get_output_slot(u16 slot) { assert(slot < output_count); return edges + input_count + slot; } Node** get_input_slot(u16 slot) { assert(slot < input_count); return edges + slot; } void add_output(Unit* unit, Function* function, Node* input) { unused(unit); unused(function); if (input->output_count + input->input_count >= input->capacity) { trap(); } auto index = input->output_count; input->output_count += 1; *input->get_output_slot(index) = this; } void remove_output(Unit* unit, Function* function, u16 slot) { unused(unit); unused(function); if (slot < output_count) { Node** output_slot = get_output_slot(slot); if (*output_slot) { trap(); } } } void reallocate_edges(Unit* unit, u16 new_capacity) { auto old_capacity = capacity; assert(new_capacity > old_capacity); auto length = output_count + input_count; Node** new_edges = unit->arena->allocate_many<Node*>(new_capacity); memcpy(new_edges, edges, length * sizeof(Node*)); memset(new_edges + length, 0, (new_capacity - length) * sizeof(Node)); capacity = new_capacity; edges = new_edges; // TODO: free } }; fn u64 round_up_to_next_power_of_2(u64 n) { n -= 1; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n |= n >> 32; n += 1; return n; } fn Hash intern_identifier(Unit* unit, String identifier) { Hash hash = hash_bytes(identifier); (void)unit->identifiers.get_or_put(hash, identifier); return hash; } global String integer_names[] = { strlit("u1"), strlit("u2"), strlit("u3"), strlit("u4"), strlit("u5"), strlit("u6"), strlit("u7"), strlit("u8"), strlit("u9"), strlit("u10"), strlit("u11"), strlit("u12"), strlit("u13"), strlit("u14"), strlit("u15"), strlit("u16"), strlit("u17"), strlit("u18"), strlit("u19"), strlit("u20"), strlit("u21"), strlit("u22"), strlit("u23"), strlit("u24"), strlit("u25"), strlit("u26"), strlit("u27"), strlit("u28"), strlit("u29"), strlit("u30"), strlit("u31"), strlit("u32"), strlit("u33"), strlit("u34"), strlit("u35"), strlit("u36"), strlit("u37"), strlit("u38"), strlit("u39"), strlit("u40"), strlit("u41"), strlit("u42"), strlit("u43"), strlit("u44"), strlit("u45"), strlit("u46"), strlit("u47"), strlit("u48"), strlit("u49"), strlit("u50"), strlit("u51"), strlit("u52"), strlit("u53"), strlit("u54"), strlit("u55"), strlit("u56"), strlit("u57"), strlit("u58"), strlit("u59"), strlit("u60"), strlit("u61"), strlit("u62"), strlit("u63"), strlit("u64"), strlit("s1"), strlit("s2"), strlit("s3"), strlit("s4"), strlit("s5"), strlit("s6"), strlit("s7"), strlit("s8"), strlit("s9"), strlit("s10"), strlit("s11"), strlit("s12"), strlit("s13"), strlit("s14"), strlit("s15"), strlit("s16"), strlit("s17"), strlit("s18"), strlit("s19"), strlit("s20"), strlit("s21"), strlit("s22"), strlit("s23"), strlit("s24"), strlit("s25"), strlit("s26"), strlit("s27"), strlit("s28"), strlit("s29"), strlit("s30"), strlit("s31"), strlit("s32"), strlit("s33"), strlit("s34"), strlit("s35"), strlit("s36"), strlit("s37"), strlit("s38"), strlit("s39"), strlit("s40"), strlit("s41"), strlit("s42"), strlit("s43"), strlit("s44"), strlit("s45"), strlit("s46"), strlit("s47"), strlit("s48"), strlit("s49"), strlit("s50"), strlit("s51"), strlit("s52"), strlit("s53"), strlit("s54"), strlit("s55"), strlit("s56"), strlit("s57"), strlit("s58"), strlit("s59"), strlit("s60"), strlit("s61"), strlit("s62"), strlit("s63"), strlit("s64"), }; fn void unit_initialize(Unit* unit) { Arena* type_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64)); SemaType* builtin_types = type_arena->allocate_many<SemaType>(builtin_type_count); *unit = { .arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4)), .node_arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(64)), .type_arena = type_arena, .builtin_types = builtin_types, }; builtin_types[void_type_index] = { .size = 0, .alignment = 1, .id = SemaTypeId::VOID, .resolved = 1, .name = intern_identifier(unit, strlit("void")), }; builtin_types[noreturn_type_index] = { .size = 0, .alignment = 1, .id = SemaTypeId::NORETURN, .resolved = 1, .name = intern_identifier(unit, strlit("noreturn")), }; builtin_types[opaque_pointer_type_index] = { .size = 8, .alignment = 8, .id = SemaTypeId::POINTER, .resolved = 1, .name = intern_identifier(unit, strlit("*any")), }; // TODO: float types u64 i; for (i = integer_type_offset; i < integer_type_offset + 64; i += 1) { u64 bit_count = i - integer_type_offset + 1; assert(bit_count >= 1 | bit_count <= 64); auto aligned_bit_count = round_up_to_next_power_of_2(bit_count); auto byte_count = max<u64>(aligned_bit_count / 8, 1); assert(byte_count <= bit_count); assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8); builtin_types[i] = { .size = byte_count, .alignment = byte_count, .id = SemaTypeId::INTEGER, .resolved = 1, .flags = static_cast<u32>(bit_count), .name = intern_identifier(unit, integer_names[bit_count - 1]), }; } for (; i < integer_type_offset + integer_type_count; i += 1) { u64 bit_count = i - (integer_type_offset + 64 - 1); assert(bit_count >= 1 | bit_count <= 64); auto aligned_bit_count = round_up_to_next_power_of_2(bit_count); auto byte_count = max<u64>(aligned_bit_count / 8, 1); assert(byte_count <= bit_count); assert(byte_count == 1 | byte_count == 2 | byte_count == 4 | byte_count == 8); builtin_types[i] = { .size = byte_count, .alignment = byte_count, .id = SemaTypeId::INTEGER, .resolved = 1, .flags = static_cast<u32>(bit_count | (1 << (type_flags_bit_count - 1))), // Signedness bit .name = intern_identifier(unit, integer_names[bit_count + 63]), }; } } static_assert(array_length(integer_names) == 128, "Integer name array must be 128 bytes"); struct Instance { Arena* arena; }; typedef struct Instance Instance; fn Unit* instance_add_unit(Instance* instance) { Unit* unit = instance->arena->allocate_one<Unit>(); *unit = { }; return unit; } struct Parser { u64 i; u32 line; u32 column; }; typedef struct Parser Parser; fn u64 safe_flag(u64 value, u64 flag) { u64 result = value & ((u64)0 - flag); return result; } fn u8 get_next_ch_safe(String file, u64 index) { u64 next_index = index + 1; u64 is_in_range = next_index < file.length; u64 safe_index = safe_flag(next_index, is_in_range); u8 unsafe_result = file.pointer[safe_index]; u64 safe_result = safe_flag(unsafe_result, is_in_range); assert(safe_result < 256); return (u8)safe_result; } fn u32 is_space(u8 ch, u8 next_ch) { u32 is_comment = (ch == '/') & (next_ch == '/'); u32 is_whitespace = ch == ' '; u32 is_vertical_tab = ch == 0x0b; u32 is_horizontal_tab = ch == '\t'; u32 is_line_feed = ch == '\n'; u32 is_carry_return = ch == '\r'; u32 result = (((is_vertical_tab | is_horizontal_tab) | (is_line_feed | is_carry_return)) | (is_comment | is_whitespace)); return result; } fn void skip_space(Parser* parser, String src) { u64 original_i = parser->i; if (original_i != src.length) { if (is_space(src.pointer[original_i], get_next_ch_safe(src, original_i))) { while (parser->i < src.length) { u64 index = parser->i; u8 ch = src.pointer[index]; u64 new_line = ch == '\n'; parser->line += new_line; if (new_line) { parser->column = index + 1; } if (!is_space(ch, get_next_ch_safe(src, parser->i))) { break; } u32 is_comment = src.pointer[index] == '/'; parser->i += is_comment + is_comment; if (is_comment) { while (parser->i < src.length) { if (src.pointer[parser->i] == '\n') { break; } parser->i += 1; } continue; } parser->i += 1; } } } } fn u64 is_lower(u8 ch) { return (ch >= 'a') & (ch <= 'z'); } fn u64 is_upper(u8 ch) { return (ch >= 'A') & (ch <= 'Z'); } fn u64 is_alphabetic(u8 ch) { return is_lower(ch) | is_upper(ch); } fn u64 is_decimal_digit(u8 ch) { return (ch >= '0') & (ch <= '9'); } fn u64 is_hex_digit(u8 ch) { return (is_decimal_digit(ch) | ((ch == 'a' | ch == 'A') | (ch == 'b' | ch == 'B'))) | (((ch == 'c' | ch == 'C') | (ch == 'd' | ch == 'D')) | ((ch == 'e' | ch == 'E') | (ch == 'f' | ch == 'F'))); } fn u64 is_identifier_start(u8 ch) { u64 alphabetic = is_alphabetic(ch); u64 is_underscore = ch == '_'; return alphabetic | is_underscore; } fn u64 is_identifier_ch(u8 ch) { u64 identifier_start = is_identifier_start(ch); u64 decimal = is_decimal_digit(ch); return identifier_start | decimal; } fn void expect_character(Parser* parser, String src, u8 expected_ch) { u64 index = parser->i; if (expect(index < src.length, 1)) { u8 ch = src.pointer[index]; u64 matches = ch == expected_ch; expect(matches, 1); parser->i += matches; if (!matches) { print(strlit("expected character '")); print(ch_to_str(expected_ch)); print(strlit("', but found '")); print(ch_to_str(ch)); print(strlit("'\n")); fail(); } } else { print(strlit("expected character '")); print(ch_to_str(expected_ch)); print(strlit("', but found end of file\n")); fail(); } } fn String parse_identifier(Parser* parser, String src) { u64 identifier_start_index = parser->i; u64 is_string_literal = src.pointer[identifier_start_index] == '"'; parser->i += is_string_literal; u8 identifier_start_ch = src.pointer[parser->i]; u64 is_valid_identifier_start = is_identifier_start(identifier_start_ch); parser->i += is_valid_identifier_start; if (expect(is_valid_identifier_start, 1)) { while (parser->i < src.length) { u8 ch = src.pointer[parser->i]; u64 is_identifier = is_identifier_ch(ch); expect(is_identifier, 1); parser->i += is_identifier; if (!is_identifier) { if (expect(is_string_literal, 0)) { expect_character(parser, src, '"'); } String result = src.slice(identifier_start_index, parser->i - is_string_literal); return result; } } fail(); } else { fail(); } } typedef enum Keyword : u32 { KEYWORD_COUNT, KEYWORD_INVALID = ~0u, } Keyword; // TODO: // fn Keyword parse_keyword(String identifier) // { // Keyword result = KEYWORD_INVALID; // return result; // } fn Hash parse_and_intern_identifier(Parser* parser, Unit* unit, String src) { String identifier = parse_identifier(parser, src); // Keyword keyword_index = parse_keyword(identifier); // if (expect(keyword_index != KEYWORD_INVALID, 0)) // { // fail(); // } if (expect(identifier.equal(strlit("_")), 0)) { return 0; } Hash result = intern_identifier(unit, identifier); return result; } // fn u32 get_line(Parser* parser) // { // return parser->line + 1; // } // // fn u32 get_column(Parser* parser) // { // return parser->i - parser->column + 1; // } fn File* unit_add_file(Unit* unit, String file_path) { auto* file = unit->files.add_one(); *file = { .path = file_path, }; return file; } fn void unit_file_read(Unit* unit, File* file) { assert(file->status == FILE_STATUS_ADDED || file->status == FILE_STATUS_QUEUED); file->source_code = file_read(unit->arena, file->path); file->status = FILE_STATUS_READ; } global constexpr auto brace_open = '{'; global constexpr auto brace_close = '}'; global constexpr auto parenthesis_open = '('; global constexpr auto parenthesis_close = ')'; global constexpr auto bracket_open = '['; global constexpr auto bracket_close = ']'; global constexpr auto pointer_sign = '*'; global constexpr auto end_of_statement = ';'; global constexpr auto end_of_argument = ','; global constexpr auto function_argument_start = parenthesis_open; global constexpr auto function_argument_end = parenthesis_close; global constexpr auto function_attribute_start = bracket_open; global constexpr auto function_attribute_end = bracket_close; global constexpr auto symbol_attribute_start = bracket_open; global constexpr auto symbol_attribute_end = bracket_close; global constexpr auto block_start = brace_open; global constexpr auto block_end = brace_close; global constexpr auto array_expression_start = bracket_open; // global constexpr auto array_expression_end = bracket_close; global constexpr auto composite_initialization_start = brace_open; // global constexpr auto composite_initialization_end = brace_close; global String function_attributes[] = { strlit("cc"), }; typedef enum FunctionAttribute { FUNCTION_ATTRIBUTE_CC, FUNCTION_ATTRIBUTE_COUNT, } FunctionAttribute; static_assert(array_length(function_attributes) == FUNCTION_ATTRIBUTE_COUNT, ""); global String calling_conventions[] = { strlit("c"), strlit("custom"), }; typedef enum CallingConvention { CALLING_CONVENTION_C, CALLING_CONVENTION_CUSTOM, CALLING_CONVENTION_COUNT, } CallingConvention; static_assert(array_length(calling_conventions) == CALLING_CONVENTION_COUNT, ""); typedef enum GlobalSymbolAttribute { GLOBAL_SYMBOL_ATTRIBUTE_EXPORT, GLOBAL_SYMBOL_ATTRIBUTE_EXTERN, GLOBAL_SYMBOL_ATTRIBUTE_COUNT, } GlobalSymbolAttribute; global String global_symbol_attributes[] = { strlit("export"), strlit("extern"), }; struct GlobalSymbolAttributes { u8 exported: 1; u8 external: 1; }; typedef struct GlobalSymbolAttributes GlobalSymbolAttributes; static_assert(array_length(global_symbol_attributes) == GLOBAL_SYMBOL_ATTRIBUTE_COUNT, ""); struct Analyzer { Function* function; }; fn SemaType* analyze_type(Parser* parser, Unit* unit, String src) { u64 start_index = parser->i; u8 start_ch = src.pointer[start_index]; u32 array_start = start_ch == array_expression_start; u32 u_start = start_ch == 'u'; u32 s_start = start_ch == 's'; u32 float_start = start_ch == 'f'; u32 void_start = start_ch == 'v'; u32 pointer_start = start_ch == pointer_sign; u32 integer_start = u_start | s_start; u32 number_start = integer_start | float_start; if (void_start) { trap(); } else if (array_start) { trap(); } else if (pointer_start) { trap(); } else if (number_start) { u64 expected_digit_start = start_index + 1; u64 i = expected_digit_start; u32 decimal_digit_count = 0; u64 top = i + 5; while (i < top) { u8 ch = src.pointer[i]; u32 is_digit = is_decimal_digit(ch); decimal_digit_count += is_digit; if (!is_digit) { u32 is_alpha = is_alphabetic(ch); if (is_alpha) { decimal_digit_count = 0; } break; } i += 1; } if (decimal_digit_count) { parser->i += 1; if (integer_start) { u64 signedness = s_start; u64 bit_size; u64 current_i = parser->i; assert(src.pointer[current_i] >= '0' & src.pointer[current_i] <= '9'); switch (decimal_digit_count) { case 0: fail(); case 1: bit_size = src.pointer[current_i] - '0'; break; case 2: bit_size = (src.pointer[current_i] - '0') * 10 + (src.pointer[current_i + 1] - '0'); break; default: fail(); } parser->i += decimal_digit_count; assert(!is_decimal_digit(src.pointer[parser->i])); if (bit_size) { auto* result = unit->get_integer_type(bit_size, signedness); return result; } else { fail(); } } else if (float_start) { trap(); } else { trap(); } } else { fail(); } } trap(); } fn u64 parse_hex(String string) { u64 value = 0; for (u8 ch : string) { u8 byte; auto is_decimal = (ch >= '0') & (ch <= '9'); auto is_lower_hex = (ch >= 'a') & (ch <= 'f'); auto is_upper_hex = (ch >= 'A') & (ch <= 'F'); if (is_decimal) { byte = ch - '0'; } else if (is_lower_hex) { byte = ch - 'a' + 10; } else if (is_upper_hex) { byte = ch - 'A' + 10; } else { fail(); } value = (value << 4) | (byte & 0x0f); } return value; } [[nodiscard]] fn Node* parse_constant_integer(Parser* parser, Unit* unit, String src, SemaType* type) { auto starting_ch = src[parser->i]; if (starting_ch == '0') { auto follow_up_character = src[parser->i + 1]; auto is_hex_start = follow_up_character == 'x'; auto is_octal_start = follow_up_character == 'o'; auto is_bin_start = follow_up_character == 'b'; auto is_prefixed_start = is_hex_start | is_octal_start | is_bin_start; auto follow_up_alpha = is_alphabetic(follow_up_character); auto follow_up_digit = is_decimal_digit(follow_up_character); auto is_valid_after_zero = is_space(follow_up_character, get_next_ch_safe(src, follow_up_character)) | (!follow_up_digit and !follow_up_alpha); if (is_prefixed_start) { enum class IntegerPrefix { hexadecimal, octal, binary, }; IntegerPrefix prefix; switch (follow_up_character) { case 'x': prefix = IntegerPrefix::hexadecimal; break; case 'o': prefix = IntegerPrefix::octal; break; case 'b': prefix = IntegerPrefix::binary; break; default: fail(); }; parser->i += 2; auto start = parser->i; switch (prefix) { case IntegerPrefix::hexadecimal: { while (is_hex_digit(src[parser->i])) { parser->i += 1; } auto slice = src.slice(start, parser->i); auto number = parse_hex(slice); auto* constant_int = Node::add(unit); constant_int->id = Node::Id::CONSTANT_INT; constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() }; constant_int->constant_int = number; // TODO: is this fine? constant_int->reallocate_edges(unit, 1); return constant_int; } case IntegerPrefix::octal: trap(); case IntegerPrefix::binary: trap(); } } else if (is_valid_after_zero) { parser->i += 1; auto* constant_int = Node::add(unit); constant_int->id = Node::Id::CONSTANT_INT; constant_int->data_type = { .id = NodeDataType::Id::INTEGER, .bit_count = type->get_bit_count() }; constant_int->reallocate_edges(unit, 1); constant_int->constant_int = 0; return constant_int; } else { fail(); } trap(); } else { trap(); } } [[nodiscard]] fn Node* analyze_single_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side) { unused(side); enum class Unary { NONE, ONE_COMPLEMENT, NEGATION, }; auto unary_operation = Unary::NONE; auto original_starting_ch_index = parser->i; u8 original_starting_ch = src[original_starting_ch_index]; switch (src[parser->i]) { case '\'': trap(); case '"': trap(); case '-': trap(); case '~': trap(); case '#': trap(); case composite_initialization_start: trap(); case array_expression_start: trap(); default: assert(is_decimal_digit(original_starting_ch) | is_identifier_start(original_starting_ch)); break; } auto starting_ch_index = parser->i; u8 starting_ch = src[starting_ch_index]; auto is_digit = is_decimal_digit(starting_ch); auto is_identifier = is_identifier_start(starting_ch); // auto line = get_line(parser); // auto column = get_column(parser); if (is_digit) { SemaType* integer_type; if (type) { integer_type = type; } else { switch (unary_operation) { case Unary::NONE: integer_type = unit->get_integer_type(64, 0); break; case Unary::ONE_COMPLEMENT: fail(); case Unary::NEGATION: fail(); } } if (integer_type->id != SemaTypeId::INTEGER) { fail(); } Node* constant_int = parse_constant_integer(parser, unit, src, integer_type); constant_int->gvn = analyzer->function->node_count; analyzer->function->node_count += 1; return constant_int; } else if (is_identifier) { trap(); } else { fail(); } } [[nodiscard]] fn Node* analyze_expression(Analyzer* analyzer, Parser* parser, Unit* unit, String src, SemaType* type, Side side) { enum class CurrentOperation { NONE, }; u64 iterations = 0; SemaType* iteration_type = type; auto current_operation = CurrentOperation::NONE; Node* previous_node = 0; while (1) { if ((iterations == 0) & !iteration_type) { trap(); } // u32 line = get_line(parser); // u32 column = get_column(parser); Node* current_node; if (src[parser->i] == '(') { trap(); } else { current_node = analyze_single_expression(analyzer, parser, unit, src, iteration_type, side); } skip_space(parser, src); switch (current_operation) { case CurrentOperation::NONE: previous_node = current_node; break; } auto original_index = parser->i; u8 original = src[original_index]; switch (original) { case end_of_statement: case end_of_argument: case parenthesis_close: case bracket_close: return previous_node; default: trap(); } iterations += 1; } } fn void analyze_local_block(Analyzer* analyzer, Parser* parser, Unit* unit, String src) { expect_character(parser, src, block_start); while (1) { skip_space(parser, src); if (src[parser->i] == block_end) { break; } auto statement_start_index = parser->i; u8 statement_start_ch = src[statement_start_index]; if (is_identifier_start(statement_start_ch)) { String identifier = parse_identifier(parser, src); if (identifier.equal(strlit("return"))) { skip_space(parser, src); auto* return_value = analyze_expression(analyzer, parser, unit, src, analyzer->function->prototype.original_return_type, Side::right); expect_character(parser, src, ';'); Function* function = analyzer->function; Node* ret_node = Node::add_from_function(unit, function); ret_node->id = Node::Id::RETURN; ret_node->data_type = { .id = NodeDataType::Id::CONTROL }; ret_node->reallocate_edges(unit, 4); ret_node->input_count = 2; ret_node->set_input(unit, function, function->root_node, 0); ret_node->set_input(unit, function, return_value, 1); } else { trap(); } } else { trap(); } } expect_character(parser, src, block_end); } typedef enum SystemVClass { SYSTEMV_CLASS_NONE, SYSTEMV_CLASS_MEMORY, SYSTEMV_CLASS_INTEGER, SYSTEMV_CLASS_SSE, SYSTEMV_CLASS_SSEUP, } SystemVClass; struct SystemVClassification { SystemVClass v[2]; }; typedef struct SystemVClassification SystemVClassification; struct SystemVRegisterCount { u32 gp_registers; u32 sse_registers; }; typedef struct SystemVRegisterCount SystemVRegisterCount; fn SystemVClassification systemv_classify(SemaType* type, u64 base_offset) { SystemVClassification result; u32 is_memory = base_offset >= 8; u32 current_index = is_memory; result.v[current_index] = SYSTEMV_CLASS_MEMORY; result.v[!current_index] = SYSTEMV_CLASS_NONE; switch (type->id) { case SemaTypeId::VOID: trap(); case SemaTypeId::NORETURN: trap(); case SemaTypeId::POINTER: trap(); case SemaTypeId::INTEGER: { u8 bit_count = type->get_bit_count(); switch (bit_count) { case 8: case 16: case 32: case 64: result.v[current_index] = SYSTEMV_CLASS_INTEGER; break; default: trap(); } } break; case SemaTypeId::COUNT: trap(); default: trap(); } return result; } fn u8 contains_no_user_data(SemaType* type, u64 start, u64 end) { unused(end); if (type->size <= start) { return 1; } switch (type->id) { case SemaTypeId::ARRAY: trap(); case SemaTypeId::STRUCT: trap(); case SemaTypeId::UNION: trap(); default: return 0; case SemaTypeId::COUNT: trap(); } } fn SemaType* systemv_get_int_type_at_offset(SemaType* type, u64 offset, SemaType* source_type, u64 source_offset) { unused(source_type); switch (type->id) { case SemaTypeId::VOID: trap(); case SemaTypeId::NORETURN: trap(); case SemaTypeId::POINTER: trap(); case SemaTypeId::INTEGER: { u8 bit_count = type->get_bit_count(); switch (bit_count) { case 8: case 16: case 32: case 64: if (offset == 0) { u64 start = source_offset + type->size; u64 end = source_offset + 8; if (contains_no_user_data(type, start, end)) { return type; } trap(); } else { trap(); } default: trap(); } trap(); } break; case SemaTypeId::COUNT: trap(); case SemaTypeId::ARRAY: trap(); case SemaTypeId::STRUCT: trap(); case SemaTypeId::UNION: trap(); } } fn void analyze_function(Parser* parser, Unit* unit, String src) { expect_character(parser, src, 'f'); expect_character(parser, src, 'n'); skip_space(parser, src); u64 has_function_attributes = src.pointer[parser->i] == function_attribute_start; parser->i += has_function_attributes; CallingConvention calling_convention = CALLING_CONVENTION_CUSTOM; if (has_function_attributes) { u64 mask = 0; while (1) { skip_space(parser, src); if (src.pointer[parser->i] == function_attribute_end) { break; } String attribute_candidate = parse_identifier(parser, src); u64 attribute_i; for (attribute_i = 0; attribute_i < array_length(function_attributes); attribute_i += 1) { String function_attribute_string = function_attributes[attribute_i]; if (attribute_candidate.equal(function_attribute_string)) { if (mask & (1 << attribute_i)) { fail(); } auto function_attribute = static_cast<FunctionAttribute>(attribute_i); mask |= (1 << attribute_i); switch (function_attribute) { case FUNCTION_ATTRIBUTE_CC: { skip_space(parser, src); expect_character(parser, src, '('); skip_space(parser, src); expect_character(parser, src, '.'); String candidate_cc = parse_identifier(parser, src); skip_space(parser, src); expect_character(parser, src, ')'); u64 cc_i; for (cc_i = 0; cc_i < array_length(calling_conventions); cc_i += 1) { String calling_convention_string = calling_conventions[cc_i]; if (calling_convention_string.equal(candidate_cc)) { calling_convention = static_cast<CallingConvention>(cc_i); break; } } if (cc_i == array_length(calling_conventions)) { fail(); } } break; default: trap(); } break; } } if (attribute_i == array_length(function_attributes)) { fail(); } skip_space(parser, src); u8 after_ch = src.pointer[parser->i]; switch (after_ch) { case function_attribute_end: break; default: fail(); } } expect_character(parser, src, function_attribute_end); skip_space(parser, src); } Hash name_hash = parse_and_intern_identifier(parser, unit, src); skip_space(parser, src); u64 has_global_attributes = src.pointer[parser->i] == symbol_attribute_start; parser->i += has_global_attributes; GlobalSymbolAttributes symbol_attributes = {}; if (has_global_attributes) { u64 mask = 0; while (1) { skip_space(parser, src); if (src.pointer[parser->i] == symbol_attribute_end) { break; } String candidate_attribute = parse_identifier(parser, src); skip_space(parser, src); switch (src.pointer[parser->i]) { case symbol_attribute_end: break; case ',': parser->i += 1; break; default: fail(); } u64 attribute_i; for (attribute_i = 0; attribute_i < array_length(global_symbol_attributes); attribute_i += 1) { String attribute_string = global_symbol_attributes[attribute_i]; if (attribute_string.equal(candidate_attribute)) { if (mask & (1 << attribute_i)) { fail(); } mask |= 1 << attribute_i; auto attribute = static_cast<GlobalSymbolAttribute>(attribute_i); switch (attribute) { case GLOBAL_SYMBOL_ATTRIBUTE_EXPORT: symbol_attributes.exported = 1; break; case GLOBAL_SYMBOL_ATTRIBUTE_EXTERN: symbol_attributes.external = 1; break; default: trap(); } break; } } if (attribute_i == array_length(global_symbol_attributes)) { fail(); } } expect_character(parser, src, symbol_attribute_end); skip_space(parser, src); } if (symbol_attributes.exported & symbol_attributes.external) { fail(); } expect_character(parser, src, function_argument_start); while (1) { skip_space(parser, src); if (src.pointer[parser->i] == function_argument_end) { break; } // TODO: function arguments in function definition trap(); } expect_character(parser, src, function_argument_end); skip_space(parser, src); PinnedArray<SemaType*> original_argument_types = {}; SemaType* original_return_type = analyze_type(parser, unit, src); skip_space(parser, src); switch (calling_convention) { case CALLING_CONVENTION_C: { // First process the return type ABI AbiInfo return_type_abi = {}; { SystemVClassification return_type_classes = systemv_classify(original_return_type, 0); assert(return_type_classes.v[1] != SYSTEMV_CLASS_MEMORY | return_type_classes.v[0] == SYSTEMV_CLASS_MEMORY); assert(return_type_classes.v[1] != SYSTEMV_CLASS_SSEUP | return_type_classes.v[0] == SYSTEMV_CLASS_SSE); SemaType* low_part = 0; switch (return_type_classes.v[0]) { case SYSTEMV_CLASS_INTEGER: { SemaType* result_type = systemv_get_int_type_at_offset(original_return_type, 0, original_return_type, 0); if (return_type_classes.v[1] == SYSTEMV_CLASS_NONE & original_return_type->get_bit_count() < 32) { trap(); } low_part = result_type; } break; default: trap(); } assert(low_part); SemaType* high_part = 0; switch (return_type_classes.v[1]) { case SYSTEMV_CLASS_NONE: break; case SYSTEMV_CLASS_MEMORY: trap(); case SYSTEMV_CLASS_INTEGER: trap(); case SYSTEMV_CLASS_SSE: trap(); case SYSTEMV_CLASS_SSEUP: trap(); } if (high_part) { trap(); } else { // TODO: u8 is_type = 1; if (is_type) { if (low_part == original_return_type) { return_type_abi = { .kind = ABI_INFO_DIRECT, }; } else { trap(); } } else { trap(); } } } // Now process the ABI for argument types PinnedArray<AbiInfo> argument_type_abis = {}; // u32 abi_argument_type_count = 0; { SystemVRegisterCount available_registers = { .gp_registers = 6, .sse_registers = 8, }; available_registers.gp_registers -= return_type_abi.kind == ABI_INFO_INDIRECT; // TODO: return by reference u8 return_by_reference = 0; if (return_by_reference) { trap(); } for (u32 original_argument_index = 0; original_argument_index < original_argument_types.length; original_argument_index += 1) { trap(); } } switch (return_type_abi.kind) { case ABI_INFO_IGNORE: case ABI_INFO_DIRECT: break; case ABI_INFO_DIRECT_PAIR: trap(); case ABI_INFO_DIRECT_COERCE: trap(); case ABI_INFO_DIRECT_COERCE_INT: trap(); case ABI_INFO_DIRECT_SPLIT_STRUCT_I32: trap(); case ABI_INFO_EXPAND_COERCE: trap(); case ABI_INFO_INDIRECT: trap(); case ABI_INFO_EXPAND: trap(); } // assert(abi_argument_type_count == 0); // TODO: reserve memory for them // Slice<Node::DataType> abi_argument_types = {}; for (u32 i = 0; i < argument_type_abis.length; i += 1) { trap(); } auto* function = unit->functions.add_one(); *function = { .symbol = { .name = name_hash, .id = Symbol::Id::function, .linkage = symbol_attributes.external ? Symbol::Linkage::external : Symbol::Linkage::internal, }, .root_node = 0, .parameters = unit->arena->allocate_many<Node*>(argument_type_abis.length), .prototype = { .argument_type_abis = argument_type_abis.pointer, .original_argument_types = original_argument_types.pointer, .original_return_type = original_return_type, .return_type_abi = return_type_abi, .original_argument_count = original_argument_types.length, .varags = 0, }, .node_count = 0, .parameter_count = (u16)argument_type_abis.length, }; Node* root_node = Node::add_from_function(unit, function); root_node->id = Node::Id::ROOT; root_node->data_type = { .id = NodeDataType::Id::TUPLE, }; root_node->reallocate_edges(unit, 4); // TODO: revisit // auto* control_node = root_node->project(unit, function, { // .type = { .id = NodeDataType::Id::CONTROL }, // }); // auto* memory_node = root_node->project(unit, function, {}); // auto* pointer_node = root_node->project(unit, function, {}); // function->parameters[0] = control_node; // function->parameters[1] = memory_node; // function->parameters[2] = pointer_node; for (u32 argument_i = 0; argument_i < argument_type_abis.length; argument_i += 1) { trap(); } // TODO: callgraph // TODO: revisit // Node* ret_node = Node::add_from_function(unit, function); // ret_node->id = Node::Id::RETURN; // ret_node->data_type = { .id = NodeDataType::Id::CONTROL }; // ret_node->reallocate_edges(unit, 4); // ret_node->input_count = 2; // ret_node->set_input(unit, function, root_node, 0); switch (symbol_attributes.external) { case 0: { Analyzer analyzer = {}; analyzer.function = function; analyze_local_block(&analyzer, parser, unit, src); } break; case 1: trap(); } } break; case CALLING_CONVENTION_CUSTOM: trap(); break; case CALLING_CONVENTION_COUNT: trap(); break; } } fn void unit_file_analyze(Unit* unit, File* file) { unit_file_read(unit, file); Parser parser = {}; String src = file->source_code; while (1) { skip_space(&parser, src); if (parser.i >= src.length) { break; } // u32 line = get_line(&parser); // u32 column = get_column(&parser); u64 declaration_start_index = parser.i; u8 declaration_start_ch = src.pointer[declaration_start_index]; switch (declaration_start_ch) { case '>': trap(); break; case 'f': if (get_next_ch_safe(src, declaration_start_index) == 'n') { analyze_function(&parser, unit, src); } else { fail(); } break; default: fail(); } } } global Instance instance; extern "C" void entry_point() { instance.arena = Arena::init(Arena::default_size, Arena::minimum_granularity, KB(4)); Unit* unit = instance_add_unit(&instance); File* file = unit_add_file(unit, strlit("tests/first/main.nat")); unit_initialize(unit); unit_file_analyze(unit, file); }