diff --git a/.gitignore b/.gitignore index b67016f..3f99447 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ project imgui.ini /cache/ .DS_Store +/*.perf +/.gdb_history diff --git a/bootstrap/bloat-buster/bb.c b/bootstrap/bloat-buster/bb.c index 2b63695..f63e8f1 100644 --- a/bootstrap/bloat-buster/bb.c +++ b/bootstrap/bloat-buster/bb.c @@ -1,287 +1,4928 @@ #include #include +#include +#include +#include #include -#include - -#if 0 -#include -#include -#include -#include -#include -#include - #include #include -#include -#include -#include -#include +#include -#define default_font_height (24) -global_variable u32 proportional_font_height = default_font_height; -global_variable u32 monospace_font_height = default_font_height; +#include -fn TextureIndex white_texture_create(Arena* arena, Renderer* renderer) +global_variable char** environment_pointer; + +typedef enum GPR_x86_64 { - u32 white_texture_width = 1024; - u32 white_texture_height = white_texture_width; - let(white_texture_buffer, arena_allocate(arena, u32, white_texture_width * white_texture_height)); - memset(white_texture_buffer, 0xff, white_texture_width * white_texture_height * sizeof(u32)); + REGISTER_X86_64_AL = 0x0, + REGISTER_X86_64_AH = REGISTER_X86_64_AL | (1 << 2), + REGISTER_X86_64_AX = REGISTER_X86_64_AL, + REGISTER_X86_64_EAX = REGISTER_X86_64_AL, + REGISTER_X86_64_RAX = REGISTER_X86_64_AL, - let(white_texture, renderer_texture_create(renderer, (TextureMemory) { - .pointer = white_texture_buffer, - .width = white_texture_width, - .height = white_texture_height, - .depth = 1, - .format = TEXTURE_FORMAT_R8G8B8A8_SRGB, - })); + REGISTER_X86_64_CL = 0x1, + REGISTER_X86_64_CH = REGISTER_X86_64_CL | (1 << 2), + REGISTER_X86_64_CX = REGISTER_X86_64_CL, + REGISTER_X86_64_ECX = REGISTER_X86_64_CL, + REGISTER_X86_64_RCX = REGISTER_X86_64_CL, - return white_texture; + REGISTER_X86_64_DL = 0x2, + REGISTER_X86_64_DH = REGISTER_X86_64_DL | (1 << 2), + REGISTER_X86_64_DX = REGISTER_X86_64_DL, + REGISTER_X86_64_EDX = REGISTER_X86_64_DL, + REGISTER_X86_64_RDX = REGISTER_X86_64_DL, + + REGISTER_X86_64_BL = 0x3, + REGISTER_X86_64_BH = REGISTER_X86_64_BL | (1 << 2), + REGISTER_X86_64_BX = REGISTER_X86_64_BL, + REGISTER_X86_64_EBX = REGISTER_X86_64_BL, + REGISTER_X86_64_RBX = REGISTER_X86_64_BL, + + REGISTER_X86_64_SPL = 0x4, + REGISTER_X86_64_SP = REGISTER_X86_64_SPL, + REGISTER_X86_64_ESP = REGISTER_X86_64_SPL, + REGISTER_X86_64_RSP = REGISTER_X86_64_SPL, + + REGISTER_X86_64_BPL = 0x5, + REGISTER_X86_64_BP = REGISTER_X86_64_BPL, + REGISTER_X86_64_EBP = REGISTER_X86_64_BPL, + REGISTER_X86_64_RBP = REGISTER_X86_64_BPL, + + REGISTER_X86_64_SIL = 0x6, + REGISTER_X86_64_SI = REGISTER_X86_64_SIL, + REGISTER_X86_64_ESI = REGISTER_X86_64_SIL, + REGISTER_X86_64_RSI = REGISTER_X86_64_SIL, + + REGISTER_X86_64_DIL = 0x7, + REGISTER_X86_64_DI = REGISTER_X86_64_DIL, + REGISTER_X86_64_EDI = REGISTER_X86_64_DIL, + REGISTER_X86_64_RDI = REGISTER_X86_64_DIL, + + REGISTER_X86_64_R8L = 0x8, + REGISTER_X86_64_R8W = REGISTER_X86_64_R8L, + REGISTER_X86_64_R8D = REGISTER_X86_64_R8L, + REGISTER_X86_64_R8 = REGISTER_X86_64_R8L, + + REGISTER_X86_64_R9L = 0x9, + REGISTER_X86_64_R9W = REGISTER_X86_64_R9L, + REGISTER_X86_64_R9D = REGISTER_X86_64_R9L, + REGISTER_X86_64_R9 = REGISTER_X86_64_R9L, + + REGISTER_X86_64_R10L = 0xa, + REGISTER_X86_64_R10W = REGISTER_X86_64_R10L, + REGISTER_X86_64_R10D = REGISTER_X86_64_R10L, + REGISTER_X86_64_R10 = REGISTER_X86_64_R10L, + + REGISTER_X86_64_R11L = 0xb, + REGISTER_X86_64_R11W = REGISTER_X86_64_R11L, + REGISTER_X86_64_R11D = REGISTER_X86_64_R11L, + REGISTER_X86_64_R11 = REGISTER_X86_64_R11L, + + REGISTER_X86_64_R12L = 0xc, + REGISTER_X86_64_R12W = REGISTER_X86_64_R12L, + REGISTER_X86_64_R12D = REGISTER_X86_64_R12L, + REGISTER_X86_64_R12 = REGISTER_X86_64_R12L, + + REGISTER_X86_64_R13L = 0xd, + REGISTER_X86_64_R13W = REGISTER_X86_64_R13L, + REGISTER_X86_64_R13D = REGISTER_X86_64_R13L, + REGISTER_X86_64_R13 = REGISTER_X86_64_R13L, + + REGISTER_X86_64_R14L = 0xe, + REGISTER_X86_64_R14W = REGISTER_X86_64_R14L, + REGISTER_X86_64_R14D = REGISTER_X86_64_R14L, + REGISTER_X86_64_R14 = REGISTER_X86_64_R14L, + + REGISTER_X86_64_R15L = 0xf, + REGISTER_X86_64_R15W = REGISTER_X86_64_R15L, + REGISTER_X86_64_R15D = REGISTER_X86_64_R15L, + REGISTER_X86_64_R15 = REGISTER_X86_64_R15L, +} GPR_x86_64; + +fn u8 gpr_is_extended(GPR_x86_64 gpr) +{ + return (gpr & 0b1000) >> 3; } -STRUCT(BBPanel) +#define X86_64_GPR_COUNT (16) + +STRUCT(Opcode) { - BBPanel* first; - BBPanel* last; - BBPanel* next; - BBPanel* previous; - BBPanel* parent; - f32 parent_percentage; - Axis2 split_axis; + u8 length:2; + u8 plus_register:1; + u8 reserved:2; + u8 bytes[3]; + u8 extension; }; -STRUCT(BBWindow) +typedef enum LegacyPrefix { - WindowingInstance* handle; - RenderWindow* render; - BBWindow* previous; - BBWindow* next; - BBPanel* root_panel; - UI_State* ui; -}; + LEGACY_PREFIX_F0, + LEGACY_PREFIX_F2, + LEGACY_PREFIX_F3, + LEGACY_PREFIX_2E, + LEGACY_PREFIX_36, + LEGACY_PREFIX_3E, + LEGACY_PREFIX_26, + LEGACY_PREFIX_64, + LEGACY_PREFIX_65, + LEGACY_PREFIX_66, + LEGACY_PREFIX_67, + LEGACY_PREFIX_COUNT, +} LegacyPrefix; -STRUCT(BBGUIState) +typedef enum SegmentRegisterOverride { - Arena* arena; - Timestamp last_frame_timestamp; - BBWindow* first_window; - BBWindow* last_window; - Renderer* renderer; - // TODO: should this not be thread local? - WindowingEventQueue event_queue; -}; -global_variable BBGUIState state; + SEGMENT_REGISTER_OVERRIDE_CS, + SEGMENT_REGISTER_OVERRIDE_SS, + SEGMENT_REGISTER_OVERRIDE_DS, + SEGMENT_REGISTER_OVERRIDE_ES, + SEGMENT_REGISTER_OVERRIDE_FS, + SEGMENT_REGISTER_OVERRIDE_GS, + SEGMENT_REGISTER_OVERRIDE_COUNT, +} SegmentRegisterOverride; -fn void ui_top_bar() +fn String segment_register_override_to_register_string(SegmentRegisterOverride segment_register_override) { - ui_push(pref_height, ui_em(1, 1)); + switch (segment_register_override) { - ui_push(child_layout_axis, AXIS2_X); - let(top_bar, ui_widget_make((UI_WidgetFlags) { - }, strlit("top_bar"))); - ui_push(parent, top_bar); - { - ui_button(strlit("Button 1")); - ui_button(strlit("Button 2")); - ui_button(strlit("Button 3")); - } - ui_pop(parent); - ui_pop(child_layout_axis); + case SEGMENT_REGISTER_OVERRIDE_CS: return strlit("cs"); + case SEGMENT_REGISTER_OVERRIDE_SS: return strlit("ss"); + case SEGMENT_REGISTER_OVERRIDE_DS: return strlit("ds"); + case SEGMENT_REGISTER_OVERRIDE_ES: return strlit("es"); + case SEGMENT_REGISTER_OVERRIDE_FS: return strlit("fs"); + case SEGMENT_REGISTER_OVERRIDE_GS: return strlit("gs"); + case SEGMENT_REGISTER_OVERRIDE_COUNT: unreachable(); } - ui_pop(pref_height); } -STRUCT(UI_Node) -{ - String name; - String type; - String value; - String namespace; - String function; +global_variable const u8 segment_register_overrides[] = { + [SEGMENT_REGISTER_OVERRIDE_CS] = LEGACY_PREFIX_2E, + [SEGMENT_REGISTER_OVERRIDE_SS] = LEGACY_PREFIX_36, + [SEGMENT_REGISTER_OVERRIDE_DS] = LEGACY_PREFIX_3E, + [SEGMENT_REGISTER_OVERRIDE_ES] = LEGACY_PREFIX_26, + [SEGMENT_REGISTER_OVERRIDE_FS] = LEGACY_PREFIX_64, + [SEGMENT_REGISTER_OVERRIDE_GS] = LEGACY_PREFIX_65, +}; +static_assert(array_length(segment_register_overrides) == SEGMENT_REGISTER_OVERRIDE_COUNT); + +global_variable u8 legacy_prefixes[] = { + 0xf0, + 0xf2, + 0xf3, + 0x2e, + 0x36, + 0x3e, + 0x26, + 0x64, + 0x65, + 0x66, + 0x67, }; -fn void ui_node(UI_Node node) +static_assert(array_length(legacy_prefixes) == LEGACY_PREFIX_COUNT); + +STRUCT(EncodingScalar) { - let(node_widget, ui_widget_make_format((UI_WidgetFlags) { - .draw_background = 1, - .draw_text = 1, - }, "{s} : {s} = {s}##{s}{s}", node.name, node.type, node.value, node.function, node.namespace)); + u64 rm_register:4; + u64 reg_register:4; + u64 is_rm_register:1; + u64 is_reg_register:1; + u64 implicit_register:1; + u64 is_immediate:4; + u64 is_displacement8:1; + u64 is_displacement32:1; + u64 is_relative8:1; + u64 is_relative32:1; + u64 rex_w:1; + u64 legacy_prefixes:LEGACY_PREFIX_COUNT; + union + { + u8 bytes[8]; + u64 value; + } immediate; + union + { + s32 value; + s8 bytes[4]; + } displacement; + union + { + s32 value; + s8 bytes[4]; + } relative; + Opcode opcode; +}; + +#define batch_element_count (64) +#define max_instruction_byte_count (16) + +u32 encode_instruction_batch(u8* restrict output, const EncodingScalar* const restrict encodings, u64 encoding_count) +{ + assert(encoding_count); + u8 buffers[batch_element_count][max_instruction_byte_count]; + u8 instruction_lengths[batch_element_count]; + + for (u32 encoding_index = 0; encoding_index < encoding_count; encoding_index += 1) + { + let(encoding, encodings[encoding_index]); + + const u8* const start = (const u8* const) &buffers[encoding_index]; + u8* restrict local_buffer = (u8* restrict)&buffers[encoding_index]; + u8* restrict it = local_buffer; + + for (LegacyPrefix prefix = 0; prefix < LEGACY_PREFIX_COUNT; prefix += 1) + { + let(is_prefix, (encoding.legacy_prefixes & (1 << prefix)) >> prefix); + let(prefix_byte, legacy_prefixes[prefix]); + *it = prefix_byte; + it += is_prefix; + } + + u8 has_base_register = encoding.is_rm_register | encoding.is_reg_register | encoding.implicit_register; + + u8 rex_base = 0x40; + u8 rex_b = 0x01; + u8 rex_x = 0x02; + unused(rex_x); + u8 rex_r = 0x04; + u8 rex_w = 0x08; + u8 is_reg_direct_addressing_mode = !(encoding.is_displacement8 | encoding.is_displacement32); + u8 reg_register = encoding.reg_register; + u8 rm_register = encoding.rm_register; + u8 byte_rex_b = rex_b * gpr_is_extended(rm_register); + u8 byte_rex_x = 0; // TODO: rex_x * encoding.scaled_index_register; + u8 byte_rex_r = rex_r * gpr_is_extended(reg_register); + u8 byte_rex_w = rex_w * encoding.rex_w; + u8 byte_rex = (byte_rex_b | byte_rex_x) | (byte_rex_r | byte_rex_w); + u8 rex = (rex_base | byte_rex); + u8 encode_rex = byte_rex != 0; + *it = rex; + it += encode_rex; + + *it = encoding.opcode.bytes[0] | ((encoding.rm_register & 0b111) * encoding.opcode.plus_register * (encoding.opcode.length == 1)); + it += 1; + + *it = encoding.opcode.bytes[1] | ((encoding.rm_register & 0b111) * encoding.opcode.plus_register * (encoding.opcode.length == 2)); + it += encoding.opcode.length > 1; + + *it = encoding.opcode.bytes[2] | ((encoding.rm_register & 0b111) * encoding.opcode.plus_register * (encoding.opcode.length == 3)); + it += encoding.opcode.length > 2; + + u8 encode_mod_rm = ((encoding.is_rm_register | encoding.is_reg_register) & (!encoding.opcode.plus_register)) | encoding.is_displacement8 | encoding.is_displacement32; + + // Mod: + // 00: No displacement (except when R/M = 101, where a 32-bit displacement follows). + // 01: 8-bit signed displacement follows. + // 10: 32-bit signed displacement follows. + // 11: Register addressing (no memory access). + + u8 mod_is_displacement32 = encoding.is_displacement32; + u8 mod_is_displacement8 = encoding.is_displacement8 & ((encoding.displacement.bytes[0] != 0) | (encoding.is_rm_register & ((encoding.rm_register & 0b111) == REGISTER_X86_64_RBP))); + // TODO: fix if necessary + u8 mod = (((mod_is_displacement32 * has_base_register) << 1) | (mod_is_displacement8 * has_base_register)) | ((is_reg_direct_addressing_mode << 1) | is_reg_direct_addressing_mode); + // A register operand. + // An opcode extension (in some instructions). + u8 reg = (reg_register & 0b111) | encoding.opcode.extension; + // When mod is 00, 01, or 10: Specifies a memory address or a base register. + // When mod is 11: Specifies a register. + u8 rm = (rm_register & 0b111) | (!has_base_register * 0b100); + u8 mod_rm = (mod << 6) | (reg << 3) | rm; + *it = mod_rm; + it += encode_mod_rm; + + // When mod is 00, 01, or 10 and rm = 100, a SIB (Scale-Index-Base) byte follows the ModR/M byte to further specify the addressing mode. + u8 encode_sib = (mod != 0b11) & (rm == 0b100); + u8 sib_scale = 0; + u8 sib_index = 0b100; + u8 sib_base = ((rm_register & 0b111) * encoding.is_rm_register) | (!encoding.is_rm_register * 0b101); + u8 sib_byte = sib_scale << 6 | sib_index << 3 | sib_base; + *it = sib_byte; + it += encode_sib; + + *(s8*)it = encoding.displacement.bytes[0]; + it += mod_is_displacement8 * sizeof(s8); + + *(s32*)it = encoding.displacement.value; + it += mod_is_displacement32 * sizeof(s32); + + *(u8*) it = encoding.immediate.bytes[0]; + it += ((encoding.is_immediate & (1 << 0)) >> 0) * sizeof(u8); + + *(u16*) it = *(u16*)(&encoding.immediate.bytes[0]); + it += ((encoding.is_immediate & (1 << 1)) >> 1) * sizeof(u16); + + *(u32*) it = *(u32*)(&encoding.immediate.bytes[0]); + it += ((encoding.is_immediate & (1 << 2)) >> 2) * sizeof(u32); + + *(u64*) it = encoding.immediate.value; + it += ((encoding.is_immediate & (1 << 3)) >> 3) * sizeof(u64); + + *(s8*)it = encoding.relative.bytes[0]; + it += encoding.is_relative8 * sizeof(s8); + + *(s32*)it = encoding.relative.value; + it += encoding.is_relative32 * sizeof(s32); + + let_cast(u8, instruction_length, it - start); + instruction_lengths[encoding_index] = instruction_length; + } + + u8* restrict it = output; + + for (u32 encoding_index = 0; encoding_index < MIN(encoding_count, batch_element_count); encoding_index += 1) + { + let(instruction_length, instruction_lengths[encoding_index]); +#if USE_MEMCPY + memcpy(it, &buffers[encoding_index], instruction_length); +#else + for (u8 byte = 0; byte < instruction_length; byte += 1) + { + it[byte] = buffers[encoding_index][byte]; + } +#endif + it += instruction_length; + } + + let(length, (u32)(it - output)); + assert(it - output != 0); + assert(length); + return length; } -fn void app_update() +#define cc_count(x) ((MNEMONIC_x86_64_ ## x ## z - MNEMONIC_x86_64_ ## x ## a) + 1) +// #define cmov_count ((MNEMONIC_x86_64_cmovz - MNEMONIC_x86_64_cmova) + 1) +// #define jcc_count ((MNEMONIC_x86_64_jz - MNEMONIC_x86_64_ja) + 1) +#define cmov_count cc_count(cmov) +#define jcc_count cc_count(j) +#define setcc_count cc_count(set) + +#define cc_index(x) \ +fn u8 x ## _index(Mnemonic_x86_64 mnemonic) \ +{\ + assert(mnemonic >= MNEMONIC_x86_64_ ## x ## a && mnemonic <= MNEMONIC_x86_64_ ## x ## z);\ + return (u8)(mnemonic - MNEMONIC_x86_64_ ## x ## a);\ +} + +cc_index(cmov) +cc_index(j) +cc_index(set) + +global_variable const u8 cc_opcodes_low[] = { + 0x07, + 0x03, + 0x02, + 0x06, + 0x02, + 0x04, + 0x0F, + 0x0D, + 0x0C, + 0x0E, + 0x06, + 0x02, + 0x03, + 0x07, + 0x03, + 0x05, + 0x0E, + 0x0C, + 0x0D, + 0x0F, + 0x01, + 0x0B, + 0x09, + 0x05, + 0x00, + 0x0A, + 0x0A, + 0x0B, + 0x08, + 0x04, +}; +static_assert(array_length(cc_opcodes_low) == cmov_count); +static_assert(array_length(cc_opcodes_low) == jcc_count); +static_assert(array_length(cc_opcodes_low) == setcc_count); + +ENUM(OperandId, u8, + op_none, + op_al, + op_ax, + op_eax, + op_rax, + op_cl, + op_cx, + op_ecx, + op_rcx, + op_dl, + op_dx, + op_edx, + op_rdx, + op_r8, + op_r16, + op_r32, + op_r64, + op_rm8, + op_rm16, + op_rm32, + op_rm64, + op_imm8, + op_imm16, + op_imm32, + op_imm64, + op_rel8, + op_rel32, + op_m8, + op_m16, + op_m32, + op_m64, + op_m128, + + op_ds_rsi_m8, + op_ds_rsi_m16, + op_ds_rsi_m32, + op_ds_rsi_m64, + + op_es_rdi_m8, + op_es_rdi_m16, + op_es_rdi_m32, + op_es_rdi_m64, + + op_one_literal, +); + +#define operand_kind_array_element_count (4) + +fn String operand_to_string(OperandId operand_id) { - let(frame_end, os_timestamp()); - windowing_poll_events(/* &state.event_queue */); - let(frame_ms, os_resolve_timestamps(state.last_frame_timestamp, frame_end, TIME_UNIT_MILLISECONDS)); - state.last_frame_timestamp = frame_end; - - Renderer* renderer = state.renderer; - - BBWindow* window = state.first_window; - while (likely(window)) + switch (operand_id) { - let(previous, window->previous); - let(next, window->next); + case_to_name(op_, none); + case op_al: return strlit("al"); + case op_ax: return strlit("ax"); + case op_eax: return strlit("eax"); + case op_rax: return strlit("rax"); + case op_cl: return strlit("cl"); + case op_cx: return strlit("cx"); + case op_ecx: return strlit("ecx"); + case op_rcx: return strlit("rcx"); + case op_dl: return strlit("dl"); + case op_dx: return strlit("dx"); + case op_edx: return strlit("edx"); + case op_rdx: return strlit("rdx"); + case_to_name(op_, r8); + case_to_name(op_, r16); + case_to_name(op_, r32); + case_to_name(op_, r64); + case_to_name(op_, rm8); + case_to_name(op_, rm16); + case_to_name(op_, rm32); + case_to_name(op_, rm64); + case_to_name(op_, imm8); + case_to_name(op_, imm16); + case_to_name(op_, imm32); + case_to_name(op_, imm64); + case_to_name(op_, rel8); + case_to_name(op_, rel32); + case_to_name(op_, m8); + case_to_name(op_, m16); + case_to_name(op_, m32); + case_to_name(op_, m64); + case_to_name(op_, m128); + case_to_name(op_, ds_rsi_m8); + case_to_name(op_, ds_rsi_m16); + case_to_name(op_, ds_rsi_m32); + case_to_name(op_, ds_rsi_m64); - let(render_window, window->render); - renderer_window_frame_begin(renderer, render_window); + case_to_name(op_, es_rdi_m8); + case_to_name(op_, es_rdi_m16); + case_to_name(op_, es_rdi_m32); + case_to_name(op_, es_rdi_m64); + case op_one_literal: return strlit("1"); + } +} - ui_state_select(window->ui); +STRUCT(Operands) +{ + OperandId values[operand_kind_array_element_count]; + u8 count:7; + u8 implicit_operands:1; +}; - if (likely(ui_build_begin(window->handle, frame_ms, &state.event_queue))) +STRUCT(Encoding) +{ + Operands operands; + Opcode opcode; + u8 rex_w:1; + u8 operand_size_override:1; +}; +decl_vb(Encoding); + +STRUCT(Encoding2) +{ + // Values + union + { + union { - ui_push(font_size, default_font_height); + s8 cb; + s16 cw; + s32 cd; + s32 cp_offset; + s64 co; + s64 ct_offset; + } code_offset; + union + { + u8 ib; + u16 iw; + u32 id; + u64 io; + } immediate; + }; + u16 segment_selector; + u32 digit:3; - ui_top_bar(); - ui_push(child_layout_axis, AXIS2_X); - let(workspace_widget, ui_widget_make_format((UI_WidgetFlags) {}, "workspace{u64}", window->handle)); - ui_push(parent, workspace_widget); + // Encoding description + u32 np:1; + u32 nfx:1; + u32 rex_w:1; + u32 is_digit:1; + u32 reg:1; + u32 is_code_offset:6; + u32 is_immediate:4; + u32 is_plus_reg:4; +}; + +STRUCT(Batch) +{ + Mnemonic_x86_64 mnemonic; + u64 legacy_prefixes:LEGACY_PREFIX_COUNT; + u32 encoding_offset; + u32 encoding_count; +}; +decl_vb(Batch); + +fn u8 op_is_gpra(OperandId operand_kind) +{ + return operand_kind >= op_al && operand_kind <= op_rax; +} + +fn u8 op_gpra_get_index(OperandId operand) +{ + assert(op_is_gpra(operand)); + return operand - op_al; +} + +fn String op_gpra_to_string(OperandId operand) +{ + let(index, op_gpra_get_index(operand)); + String register_a_names[] = { + strlit("al"), + strlit("ax"), + strlit("eax"), + strlit("rax"), + }; + + return register_a_names[index]; +} + +fn u8 op_is_gprd(OperandId operand_kind) +{ + return operand_kind >= op_dl && operand_kind <= op_rdx; +} + +fn String op_gprd_to_string(OperandId operand) +{ + assert(op_is_gprd(operand)); + switch (operand) + { + case op_dl: return strlit("dl"); + case op_dx: return strlit("dx"); + case op_edx: return strlit("edx"); + case op_rdx: return strlit("rdx"); + default: unreachable(); + } +} + +fn u8 op_is_imm(OperandId operand_kind) +{ + return operand_kind >= op_imm8 && operand_kind <= op_imm64; +} + +fn u8 op_is_gpr_no_gpra_exclusive(OperandId operand_kind) +{ + return operand_kind >= op_r8 && operand_kind <= op_r64; +} + +fn u8 op_is_rm(OperandId operand_kind) +{ + return operand_kind >= op_rm8 && operand_kind <= op_rm64; +} + +fn u8 op_is_gpr_no_gpra(OperandId operand_kind) +{ + return op_is_gpr_no_gpra_exclusive(operand_kind) | op_is_rm(operand_kind); +} + +fn u8 op_is_relative(OperandId operand_kind) +{ + return operand_kind >= op_rel8 && operand_kind <= op_rel32; +} + +fn u8 op_is_memory(OperandId operand) +{ + return operand >= op_m8 && operand <= op_m128; +} + +fn u8 op_is_es_rdi_memory(OperandId operand) +{ + return operand >= op_es_rdi_m8 && operand <= op_es_rdi_m64; +} + +fn u8 op_is_ds_rsi_memory(OperandId operand) +{ + return operand >= op_ds_rsi_m8 && operand <= op_ds_rsi_m64; +} + +fn u8 op_rm_get_index(OperandId operand_kind) +{ + assert(op_is_rm(operand_kind)); + return operand_kind - op_rm8; +} + +fn u8 op_gprd_get_index(OperandId operand_kind) +{ + assert(op_is_gprd(operand_kind)); + return operand_kind >= op_dl && operand_kind <= op_rdx; +} + +fn u8 op_gpr_exclusive_get_index(OperandId operand_kind) +{ + assert(op_is_gpr_no_gpra_exclusive(operand_kind)); + return operand_kind - op_r8; +} + +fn u8 op_gpr_get_index(OperandId operand_kind) +{ + assert(op_is_gpr_no_gpra(operand_kind)); + return op_is_rm(operand_kind) ? op_rm_get_index(operand_kind) : op_gpr_exclusive_get_index(operand_kind); +} + +fn u8 op_imm_get_index(OperandId operand_kind) +{ + assert(op_is_imm(operand_kind)); + return operand_kind - op_imm8; +} + +fn u8 op_get_size_out_of_index(u8 index) +{ + return 1 << index; +} + +STRUCT(TestDataset) +{ + const Batch* const restrict batches; + u64 batch_count; + const Encoding* const restrict encodings; + u64 encoding_count; +}; + +fn String sample_immediate_strings(u8 index) +{ + global_variable const String strings[] = { + strlit("10"), + strlit("1000"), + strlit("10000000"), + strlit("1000000000000000"), + }; + + return strings[index]; +} + +fn u64 sample_immediate_values(u8 index) +{ + global_variable const u64 immediates[] = { + 10, + 1000, + 10000000, + 1000000000000000, + }; + return immediates[index]; +} + +fn String gpr_to_string(GPR_x86_64 gpr, u8 index, u8 switcher) +{ + assert(switcher == 0 || switcher == 1); + global_variable const String gpr_names[X86_64_GPR_COUNT][4] = { + [REGISTER_X86_64_AX] = { + strlit("al"), + strlit("ax"), + strlit("eax"), + strlit("rax"), + }, + [REGISTER_X86_64_CX] = { + strlit("cl"), + strlit("cx"), + strlit("ecx"), + strlit("rcx"), + }, + [REGISTER_X86_64_DX] = { + strlit("dl"), + strlit("dx"), + strlit("edx"), + strlit("rdx"), + }, + [REGISTER_X86_64_BX] = { + strlit("bl"), + strlit("bx"), + strlit("ebx"), + strlit("rbx"), + }, + [REGISTER_X86_64_SP] = { + strlit("ah"), // Check alt names + strlit("sp"), + strlit("esp"), + strlit("rsp"), + }, + [REGISTER_X86_64_BP] = { + strlit("ch"), + strlit("bp"), + strlit("ebp"), + strlit("rbp"), + }, + [REGISTER_X86_64_SI] = { + strlit("dh"), + strlit("si"), + strlit("esi"), + strlit("rsi"), + }, + [REGISTER_X86_64_DI] = { + strlit("bh"), + strlit("di"), + strlit("edi"), + strlit("rdi"), + }, + [REGISTER_X86_64_R8] = { + strlit("r8b"), + strlit("r8w"), + strlit("r8d"), + strlit("r8"), + }, + [REGISTER_X86_64_R9] = { + strlit("r9b"), + strlit("r9w"), + strlit("r9d"), + strlit("r9"), + }, + [REGISTER_X86_64_R10] = { + strlit("r10b"), + strlit("r10w"), + strlit("r10d"), + strlit("r10"), + }, + [REGISTER_X86_64_R11] = { + strlit("r11b"), + strlit("r11w"), + strlit("r11d"), + strlit("r11"), + }, + [REGISTER_X86_64_R12] = { + strlit("r12b"), + strlit("r12w"), + strlit("r12d"), + strlit("r12"), + }, + [REGISTER_X86_64_R13] = { + strlit("r13b"), + strlit("r13w"), + strlit("r13d"), + strlit("r13"), + }, + [REGISTER_X86_64_R14] = { + strlit("r14b"), + strlit("r14w"), + strlit("r14d"), + strlit("r14"), + }, + [REGISTER_X86_64_R15] = { + strlit("r15b"), + strlit("r15w"), + strlit("r15d"), + strlit("r15"), + }, + }; + + global_variable const String alt_register_names[] = { + strlit("spl"), + strlit("bpl"), + strlit("sil"), + strlit("dil"), + }; + + return (unlikely(((gpr & 0b100) >> 2) & ((switcher != 0) & (index == 0)))) ? alt_register_names[gpr & 0b11] : gpr_names[gpr][index]; +} + +fn String format_instruction1(String buffer, String mnemonic, String op) +{ + u64 i = 0; + + memcpy(buffer.pointer + i, mnemonic.pointer, mnemonic.length); + i += mnemonic.length; + + buffer.pointer[i] = ' '; + i += 1; + + memcpy(buffer.pointer + i, op.pointer, op.length); + i += op.length; + + assert(i < buffer.length); + buffer.pointer[i] = 0; + + return (String) { + .pointer = buffer.pointer, + .length = i, + }; +} + +fn String format_instruction2(String buffer, String mnemonic, String op1, String op2) +{ + u64 i = 0; + + memcpy(buffer.pointer + i, mnemonic.pointer, mnemonic.length); + i += mnemonic.length; + + buffer.pointer[i] = ' '; + i += 1; + + memcpy(buffer.pointer + i, op1.pointer, op1.length); + i += op1.length; + + buffer.pointer[i] = ','; + buffer.pointer[i + 1] = ' '; + i += 2; + + memcpy(buffer.pointer + i, op2.pointer, op2.length); + i += op2.length; + + assert(i < buffer.length); + buffer.pointer[i] = 0; + + return (String) { + .pointer = buffer.pointer, + .length = i, + }; +} + +fn String format_instruction3(String buffer, String mnemonic, String op1, String op2, String op3) +{ + u64 i = 0; + + memcpy(buffer.pointer + i, mnemonic.pointer, mnemonic.length); + i += mnemonic.length; + + buffer.pointer[i] = ' '; + i += 1; + + memcpy(buffer.pointer + i, op1.pointer, op1.length); + i += op1.length; + + buffer.pointer[i] = ','; + buffer.pointer[i + 1] = ' '; + i += 2; + + memcpy(buffer.pointer + i, op2.pointer, op2.length); + i += op2.length; + + buffer.pointer[i] = ','; + buffer.pointer[i + 1] = ' '; + i += 2; + + memcpy(buffer.pointer + i, op3.pointer, op3.length); + i += op3.length; + + assert(i < buffer.length); + buffer.pointer[i] = 0; + + return (String) { + .pointer = buffer.pointer, + .length = i, + }; +} + +fn String format_displacement(String buffer, String register_string, String displacement_string, u8 register_index) +{ + u64 length = 0; + String result = { + .pointer = buffer.pointer, + }; + + const String indirect_types[] = { + strlit("byte ptr "), + strlit("word ptr "), + strlit("dword ptr "), + strlit("qword ptr "), + strlit("xmmword ptr "), + }; + + String indirect_type = indirect_types[register_index]; + + memcpy(&buffer.pointer[length], indirect_type.pointer, indirect_type.length); + length += indirect_type.length; + + buffer.pointer[length] = '['; + length += 1; + + memcpy(&buffer.pointer[length], register_string.pointer, register_string.length); + length += register_string.length; + + u8 omit_displacement = displacement_string.pointer[0] == '0' && displacement_string.length == 1; + buffer.pointer[length] = ' '; + length += !omit_displacement; + + buffer.pointer[length] = '+'; + length += !omit_displacement; + + buffer.pointer[length] = ' '; + length += !omit_displacement; + + memcpy(&buffer.pointer[length], displacement_string.pointer, displacement_string.length); + length += displacement_string.length * !omit_displacement; + + buffer.pointer[length] = ']'; + length += 1; + + result.length = length; + + return result; +} + +STRUCT(ClangCompileAssembly) +{ + String instruction; + String clang_path; + VirtualBuffer(u8)* clang_pipe_buffer; +}; + +fn String clang_compile_assembly(Arena* arena, ClangCompileAssembly args) +{ + String my_assembly_path = strlit(BUILD_DIR "/my_assembly_source.S"); + FileWriteOptions options = { + .path = my_assembly_path, + .content = args.instruction, + }; + file_write(options); + + String out_path = strlit(BUILD_DIR "/my_assembly_output"); + + char* arguments[] = { + string_to_c(args.clang_path), + string_to_c(my_assembly_path), + "-o", + string_to_c(out_path), + "-masm=intel", + "-nostdlib", + "-Wl,--oformat=binary", + 0, + }; + RunCommandOptions run_options = { + .stdout_stream = { + .buffer = args.clang_pipe_buffer->pointer, + .length = &args.clang_pipe_buffer->length, + .capacity = args.clang_pipe_buffer->capacity, + .policy = CHILD_PROCESS_STREAM_PIPE, + }, + // .stderr_stream = { + // .policy = CHILD_PROCESS_STREAM_IGNORE, + // }, + }; + RunCommandResult result = run_command(arena, (CStringSlice)array_to_slice(arguments), environment_pointer, run_options); + let(success, result.termination_kind == PROCESS_TERMINATION_EXIT && result.termination_code == 0); + if (!success) + { + os_exit(1); + } + + String bytes = file_read(arena, out_path); + return bytes; +} + +STRUCT(DisassemblyResult) +{ + String whole; + String instruction; +}; + +STRUCT(DisassemblyArguments) +{ + String binary; + LLVMDisasmContextRef context; + String disassembly_buffer; + u64 gross:1; +}; + +#define llvm_initialize_macro(target, fn_prefix) \ + fn_prefix LLVMInitialize ## target ## Target();\ + fn_prefix LLVMInitialize ## target ## TargetInfo();\ + fn_prefix LLVMInitialize ## target ## TargetMC();\ + fn_prefix LLVMInitialize ## target ## AsmParser();\ + fn_prefix LLVMInitialize ## target ## AsmPrinter();\ + fn_prefix LLVMInitialize ## target ## Disassembler() + +#define _null_prefix_() + +llvm_initialize_macro(X86, extern void); + +fn String disassemble_binary(Arena* arena, DisassemblyArguments arguments) +{ + unused(arena); + unused(arguments); + String result = {}; + let(instruction_bytes, LLVMDisasmInstruction(arguments.context, arguments.binary.pointer, arguments.binary.length, 0, (char*)arguments.disassembly_buffer.pointer, arguments.disassembly_buffer.length)); + + if (instruction_bytes) + { + result = cstr(arguments.disassembly_buffer.pointer); + + assert(result.pointer[0] == '\t'); + result.pointer += 1; + result.length -= 1; + for (u64 i = 0; i < result.length; i += 1) + { + if (result.pointer[i] == '\t') { - // Node visualizer - ui_push(child_layout_axis, AXIS2_Y); - let(node_visualizer_widget, ui_widget_make_format((UI_WidgetFlags) { - .draw_background = 1, - }, "node_visualizer{u64}", window->handle)); - - ui_push(parent, node_visualizer_widget); - { - ui_node((UI_Node) { - .name = strlit("a"), - .type = strlit("s32"), - .value = strlit("1"), - .namespace = strlit("foo"), - .function = strlit("main"), - }); - ui_node((UI_Node) { - .name = strlit("b"), - .type = strlit("s32"), - .value = strlit("2"), - .namespace = strlit("foo"), - .function = strlit("main"), - }); - } - ui_pop(parent); - ui_pop(child_layout_axis); - - // Side-panel stub - ui_button(strlit("Options")); + result.pointer[i] = ' '; } - ui_pop(parent); - ui_pop(child_layout_axis); + } + } + + return result; +} - ui_build_end(); +STRUCT(CheckInstructionArguments) +{ + String clang_path; + String text; + String binary; + String error_buffer; + u64* error_buffer_length; + VirtualBuffer(u8)* clang_pipe_buffer; + LLVMDisasmContextRef disassembler; + u64 reserved:63; +}; - ui_draw(); +fn Mnemonic_x86_64 parse_cmov(String instruction) +{ + let(space_index, string_first_ch(instruction, ' ')); + assert(space_index != STRING_NO_MATCH); + String mnemonic_string = s_get_slice(u8, instruction, 0, space_index); + String cmov_prefix = strlit("cmov"); + assert(string_starts_with(mnemonic_string, cmov_prefix)); + String cmov_suffix = s_get_slice(u8, mnemonic_string, cmov_prefix.length, mnemonic_string.length); + String suffixes[] = { + strlit("a"), + strlit("ae"), + strlit("b"), + strlit("be"), + strlit("c"), + strlit("e"), + strlit("g"), + strlit("ge"), + strlit("l"), + strlit("le"), + strlit("na"), + strlit("nae"), + strlit("nb"), + strlit("nbe"), + strlit("nc"), + strlit("ne"), + strlit("ng"), + strlit("nge"), + strlit("nl"), + strlit("nle"), + strlit("no"), + strlit("np"), + strlit("ns"), + strlit("nz"), + strlit("o"), + strlit("p"), + strlit("pe"), + strlit("po"), + strlit("s"), + strlit("z"), + }; + u64 suffix; + for (suffix = 0; suffix < array_length(suffixes); suffix += 1) + { + if (s_equal(cmov_suffix, suffixes[suffix])) + { + break; + } + } - ui_pop(font_size); + assert(suffix != array_length(suffixes)); + Mnemonic_x86_64 result = suffix + MNEMONIC_x86_64_cmova; + return result; +} - renderer_window_frame_end(renderer, render_window); +fn Mnemonic_x86_64 parse_cc_ext(String instruction, String prefix, Mnemonic_x86_64 base_mnemonic) +{ + let(space_index, string_first_ch(instruction, ' ')); + assert(space_index != STRING_NO_MATCH); + String mnemonic_string = s_get_slice(u8, instruction, 0, space_index); + assert(string_starts_with(mnemonic_string, prefix)); + String suffix = s_get_slice(u8, mnemonic_string, prefix.length, mnemonic_string.length); + String suffixes[] = { + strlit("a"), + strlit("ae"), + strlit("b"), + strlit("be"), + strlit("c"), + strlit("e"), + strlit("g"), + strlit("ge"), + strlit("l"), + strlit("le"), + strlit("na"), + strlit("nae"), + strlit("nb"), + strlit("nbe"), + strlit("nc"), + strlit("ne"), + strlit("ng"), + strlit("nge"), + strlit("nl"), + strlit("nle"), + strlit("no"), + strlit("np"), + strlit("ns"), + strlit("nz"), + strlit("o"), + strlit("p"), + strlit("pe"), + strlit("po"), + strlit("s"), + strlit("z"), + }; + u64 suffix_index; + for (suffix_index = 0; suffix_index < array_length(suffixes); suffix_index += 1) + { + if (s_equal(suffix, suffixes[suffix_index])) + { + break; + } + } + + assert(suffix_index != array_length(suffixes)); + Mnemonic_x86_64 result = base_mnemonic + suffix_index; + return result; +} + +fn String parse_operand(String instruction, u8 operand_index) +{ + String result = {}; + String it = instruction; + u8 index = 0; + + it = s_get_slice(u8, it, string_first_ch(it, ' ') + 1, it.length); + + while (1) + { + if (it.length == 0) + { + break; + } + + if (operand_index == index) + { + let(length, MIN(string_first_ch(it, ','), it.length)); + result = s_get_slice(u8, it, 0, length); + break; + } + + let(next, MIN(string_first_ch(it, ','), it.length)); + it = s_get_slice(u8, it, next + 2, it.length); + index += 1; + } + + return result; +} + +#define parse_cc(i, cc_i_kind) parse_cc_ext(i, strlit(TOSTRING(cc_i_kind)), (MNEMONIC_x86_64_ ## cc_i_kind ## a)) + +fn u64 check_instruction(Arena* arena, CheckInstructionArguments arguments) +{ + StringFormatter error_buffer = { + .buffer = arguments.error_buffer, + }; + u8 disassembly_buffer[256]; + assert(arguments.binary.length); + + u8 result = 1; + + DisassemblyArguments disassemble_arguments = { + .binary = arguments.binary, + .disassembly_buffer = (String)array_to_slice(disassembly_buffer), + .context = arguments.disassembler, + }; + String disassembly_text = disassemble_binary(arena, disassemble_arguments); + + result = disassembly_text.length == arguments.text.length; + if (result) + { + for (u64 i = 0; i < arguments.text.length; i += 1) + { + if (disassembly_text.pointer[i] != arguments.text.pointer[i]) + { + result = 0; + + break; + } + } + } + + if (!result) + { + if (string_starts_with(arguments.text, strlit("ud0"))) + { + // TODO: figure out + // Somehow clang doesn't disassemble this instruction properly + assert(disassembly_text.pointer == 0); + assert(disassembly_text.length == 0); + result = 1; + } + else if (string_starts_with(arguments.text, strlit("xchg "))) + { + if (s_equal(disassembly_text, strlit("nop"))) + { + result = 1; + } + else + { + String my_op0 = parse_operand(arguments.text, 0); + String my_op1 = parse_operand(arguments.text, 1); + + String their_op0 = parse_operand(disassembly_text, 0); + String their_op1 = parse_operand(disassembly_text, 1); + + result = s_equal(my_op0, their_op1) && s_equal(my_op1, their_op0); + } + } + if (string_starts_with(arguments.text, strlit("cmov")) && string_starts_with(disassembly_text, strlit("cmov"))) + { + Mnemonic_x86_64 mine = parse_cc(arguments.text, cmov); + Mnemonic_x86_64 theirs = parse_cc(disassembly_text, cmov); + u8 my_opcode = cc_opcodes_low[cmov_index(mine)]; + u8 their_opcode = cc_opcodes_low[cmov_index(theirs)]; + result = my_opcode == their_opcode; + } + else if (string_starts_with(arguments.text, strlit("j")) && string_starts_with(disassembly_text, strlit("j"))) + { + Mnemonic_x86_64 mine = parse_cc(arguments.text, j); + Mnemonic_x86_64 theirs = parse_cc(disassembly_text, j); + u8 my_opcode = cc_opcodes_low[j_index(mine)]; + u8 their_opcode = cc_opcodes_low[j_index(theirs)]; + result = my_opcode == their_opcode; + } + else if (string_starts_with(arguments.text, strlit("set")) && string_starts_with(disassembly_text, strlit("set"))) + { + Mnemonic_x86_64 mine = parse_cc(arguments.text, set); + Mnemonic_x86_64 theirs = parse_cc(disassembly_text, set); + u8 my_opcode = cc_opcodes_low[set_index(mine)]; + u8 their_opcode = cc_opcodes_low[set_index(theirs)]; + result = my_opcode == their_opcode; + } + else if (string_starts_with(arguments.text, strlit("mov r")) && string_starts_with(disassembly_text, strlit("movabs r"))) + { + result = 1; + } + else if (string_starts_with(arguments.text, strlit("sal ")) && string_starts_with(disassembly_text, strlit("shl "))) + { + result = 1; + } + } + + if (!result) + { + if (disassembly_text.length) + { + formatter_append(&error_buffer, "Disassembly mismatch. Intended to assemble:\n\t{s}\nbut got from LLVM:\n\t{s}\n", arguments.text, disassembly_text); + } + assert(arguments.binary.length); + ClangCompileAssembly args = { + .instruction = arguments.text, + .clang_path = arguments.clang_path, + .clang_pipe_buffer = arguments.clang_pipe_buffer, + }; + String clang_binary = clang_compile_assembly(arena, args); + + if (clang_binary.pointer && s_equal(clang_binary, arguments.binary)) + { + formatter_append_string(&error_buffer, strlit("Clang and this binary generated the same output (earlier string comparison failed):\n\t")); + for (u64 bin_i = 0; bin_i < arguments.binary.length; bin_i += 1) + { + formatter_append(&error_buffer, "0x{u32:x,w=2} ", (u32)arguments.binary.pointer[bin_i]); + } } else { - if (previous) + formatter_append_string(&error_buffer, strlit("Failed to match correct output. Got:\n\t")); + + for (u64 bin_i = 0; bin_i < arguments.binary.length; bin_i += 1) { - previous->next = next; + formatter_append(&error_buffer, "0x{u32:x,w=2} ", (u32)arguments.binary.pointer[bin_i]); } - if (next) + formatter_append_character(&error_buffer, '\n'); + + formatter_append_string(&error_buffer, strlit("While clang generated the following:\n\t")); + + for (u64 bin_i = 0; bin_i < clang_binary.length; bin_i += 1) { - next->previous = previous; + formatter_append(&error_buffer, "0x{u32:x,w=2} ", (u32)clang_binary.pointer[bin_i]); } - if (state.first_window == window) - { - state.first_window = next; - } + formatter_append_character(&error_buffer, '\n'); + } + } - if (state.last_window == window) + assert(!!error_buffer.index == !result); + + return error_buffer.index; +} + +STRUCT(EncodingTestOptions) +{ + u64 scalar:1; + u64 wide:1; +}; + +#if defined(__x86_64__) +#include +#endif +typedef u64 Bitset; + +STRUCT(GPR) +{ + Bitset mask[4]; +}; + +STRUCT(OpcodeLen) +{ + u8 length0:2; + u8 length1:2; + u8 length2:2; + u8 length3:2; +}; + +STRUCT(VectorOpcode) +{ + u8 values[3][64]; + OpcodeLen lengths[64/4]; + u8 extension[64]; + Bitset plus_register; +}; + +STRUCT(EncodingBatch) +{ + Bitset legacy_prefixes[LEGACY_PREFIX_COUNT]; + Bitset is_rm_register; + Bitset is_reg_register; + GPR rm_register; + GPR reg_register; + Bitset is_relative8; + Bitset is_relative32; + Bitset implicit_register; + VectorOpcode opcode; + Bitset is_displacement8; + Bitset is_displacement32; + Bitset rex_w; + u8 segment_register_override[64]; + Bitset is_immediate[4]; + u8 immediate[8][64]; + u8 displacement[4][64]; + u8 relative[4][64]; +}; + +fn Bitset bitset_from_bit(u8 bit) +{ + return -(u64)(bit != 0); +} + +fn GPR register_mask_batch_from_scalar(u8 scalar_register) +{ + u64 reg = scalar_register & 0b1111; + assert(reg == scalar_register); + u64 value64 = (reg << 60) | (reg << 56) | (reg << 52) | (reg << 48) | (reg << 44) | (reg << 40) | (reg << 36) | (reg << 32) | (reg << 28) | (reg << 24) | (reg << 20) | (reg << 16) | (reg << 12) | (reg << 8) | (reg << 4) | reg; + GPR result = { value64, value64, value64, value64 }; + return result; +} + +fn EncodingBatch encoding_batch_from_scalar(EncodingScalar scalar) +{ + EncodingBatch batch = { + .rm_register = register_mask_batch_from_scalar(scalar.rm_register), + .reg_register = register_mask_batch_from_scalar(scalar.reg_register), + .is_rm_register = bitset_from_bit(scalar.is_rm_register), + .is_reg_register = bitset_from_bit(scalar.is_reg_register), + .is_displacement8 = bitset_from_bit(scalar.is_displacement8), + .is_displacement32 = bitset_from_bit(scalar.is_displacement32), + .is_relative8 = bitset_from_bit(scalar.is_relative8), + .is_relative32 = bitset_from_bit(scalar.is_relative32), + .rex_w = bitset_from_bit(scalar.rex_w), + .implicit_register = bitset_from_bit(scalar.implicit_register), + .is_immediate = { + bitset_from_bit(scalar.is_immediate & (1 << 0)), + bitset_from_bit(scalar.is_immediate & (1 << 1)), + bitset_from_bit(scalar.is_immediate & (1 << 2)), + bitset_from_bit(scalar.is_immediate & (1 << 3)), + }, + }; + + for (LegacyPrefix legacy_prefix = 0; legacy_prefix < LEGACY_PREFIX_COUNT; legacy_prefix += 1) + { + batch.legacy_prefixes[legacy_prefix] = bitset_from_bit((scalar.legacy_prefixes & (1 << legacy_prefix)) >> legacy_prefix); + } + + for (u64 i = 0; i < batch_element_count; i += 1) + { + batch.opcode.values[0][i] = scalar.opcode.bytes[0]; + batch.opcode.values[1][i] = scalar.opcode.bytes[1]; + batch.opcode.values[2][i] = scalar.opcode.bytes[2]; + batch.opcode.extension[i] = scalar.opcode.extension; + batch.opcode.plus_register |= scalar.opcode.plus_register << i; + } + + for (u64 i = 0; i < array_length(batch.opcode.lengths); i += 1) + { + batch.opcode.lengths[i] = (OpcodeLen){ + .length0 = scalar.opcode.length, + .length1 = scalar.opcode.length, + .length2 = scalar.opcode.length, + .length3 = scalar.opcode.length, + }; + } + + for (u32 immediate_index = 0; immediate_index < array_length(scalar.immediate.bytes); immediate_index += 1) + { + for (u32 batch_index = 0; batch_index < batch_element_count; batch_index += 1) + { + batch.immediate[immediate_index][batch_index] = scalar.immediate.bytes[immediate_index]; + } + } + + for (u32 displacement_index = 0; displacement_index < array_length(scalar.displacement.bytes); displacement_index += 1) + { + for (u32 batch_index = 0; batch_index < batch_element_count; batch_index += 1) + { + batch.displacement[displacement_index][batch_index] = scalar.displacement.bytes[displacement_index]; + } + } + + for (u32 relative_index = 0; relative_index < array_length(scalar.relative.bytes); relative_index += 1) + { + for (u32 batch_index = 0; batch_index < batch_element_count; batch_index += 1) + { + batch.relative[relative_index][batch_index] = scalar.relative.bytes[relative_index]; + } + } + + return batch; +} + +u32 encode(u8* restrict buffer, const EncodingBatch* const restrict batch) +{ + __m512i prefixes[LEGACY_PREFIX_COUNT]; + __mmask64 prefix_masks[LEGACY_PREFIX_COUNT]; + for (LegacyPrefix prefix = 0; prefix < LEGACY_PREFIX_COUNT; prefix += 1) + { + prefix_masks[prefix] = _cvtu64_mask64(batch->legacy_prefixes[prefix]); + prefixes[prefix] = _mm512_maskz_set1_epi8(prefix_masks[prefix], legacy_prefixes[prefix]); + } + + __m512i instruction_length; + + u8 prefix_group1_bytes[64]; + u8 prefix_group1_positions[64]; + { + __mmask64 prefix_group1_mask = _kor_mask64(_kor_mask64(prefix_masks[LEGACY_PREFIX_F0], prefix_masks[LEGACY_PREFIX_F2]), prefix_masks[LEGACY_PREFIX_F3]); + __m512i prefix_group1 = _mm512_or_epi32(_mm512_or_epi32(prefixes[LEGACY_PREFIX_F0], prefixes[LEGACY_PREFIX_F2]), prefixes[LEGACY_PREFIX_F3]); + __m512i prefix_group1_position = _mm512_maskz_set1_epi8(_knot_mask64(prefix_group1_mask), 0x0f); + instruction_length = _mm512_maskz_set1_epi8(prefix_group1_mask, 0x01); + + _mm512_storeu_epi8(prefix_group1_bytes, prefix_group1); + _mm512_storeu_epi8(prefix_group1_positions, prefix_group1_position); + } + + u8 prefix_group2_bytes[64]; + u8 prefix_group2_positions[64]; + { + __mmask64 prefix_group2_mask = _kor_mask64(_kor_mask64(_kor_mask64(prefix_masks[LEGACY_PREFIX_2E], prefix_masks[LEGACY_PREFIX_36]), _kor_mask64(prefix_masks[LEGACY_PREFIX_3E], prefix_masks[LEGACY_PREFIX_26])), _kor_mask64(prefix_masks[LEGACY_PREFIX_64], prefix_masks[LEGACY_PREFIX_65])); + __m512i prefix_group2 = _mm512_or_epi32(_mm512_or_epi32(_mm512_or_epi32(prefixes[LEGACY_PREFIX_2E], prefixes[LEGACY_PREFIX_36]), _mm512_or_epi32(prefixes[LEGACY_PREFIX_3E], prefixes[LEGACY_PREFIX_26])), _mm512_or_epi32(prefixes[LEGACY_PREFIX_64], prefixes[LEGACY_PREFIX_65])); + __m512i prefix_group2_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group2_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group2_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group2_bytes, prefix_group2); + _mm512_storeu_epi8(prefix_group2_positions, prefix_group2_position); + } + + u8 prefix_group3_bytes[64]; + u8 prefix_group3_positions[64]; + { + __mmask64 prefix_group3_mask = prefix_masks[LEGACY_PREFIX_66]; + __m512i prefix_group3 = prefixes[LEGACY_PREFIX_66]; + __m512i prefix_group3_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group3_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group3_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group3_bytes, prefix_group3); + _mm512_storeu_epi8(prefix_group3_positions, prefix_group3_position); + } + + u8 prefix_group4_bytes[64]; + u8 prefix_group4_positions[64]; + { + __mmask64 prefix_group4_mask = prefix_masks[LEGACY_PREFIX_67]; + __m512i prefix_group4 = prefixes[LEGACY_PREFIX_67]; + __m512i prefix_group4_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group4_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group4_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group4_bytes, prefix_group4); + _mm512_storeu_epi8(prefix_group4_positions, prefix_group4_position); + } + + __mmask64 is_plus_register = _cvtu64_mask64(batch->opcode.plus_register); + __mmask64 is_implicit_register = _cvtu64_mask64(batch->implicit_register); + + __mmask64 is_displacement8 = _cvtu64_mask64(batch->is_displacement8); + __mmask64 is_displacement32 = _cvtu64_mask64(batch->is_displacement32); + + __mmask64 is_rm_register; + __m512i rm_register; + { + __m256i register_mask_256 = _mm256_loadu_epi8(&batch->rm_register); + __m256i selecting_mask = _mm256_set1_epi8(0x0f); + __m256i low_bits = _mm256_and_si256(register_mask_256, selecting_mask); + __m256i high_bits = _mm256_and_si256(_mm256_srli_epi64(register_mask_256, 4), selecting_mask); + __m256i low_bytes = _mm256_unpacklo_epi8(low_bits, high_bits); + __m256i high_bytes = _mm256_unpackhi_epi8(low_bits, high_bits); + rm_register = _mm512_inserti64x4(_mm512_castsi256_si512(low_bytes), high_bytes, 1); + is_rm_register = _cvtu64_mask64(batch->is_rm_register); + } + + __mmask64 is_reg_register; + __m512i reg_register; + { + __m256i register_mask_256 = _mm256_loadu_epi8(&batch->reg_register); + __m256i selecting_mask = _mm256_set1_epi8(0x0f); + __m256i low_bits = _mm256_and_si256(register_mask_256, selecting_mask); + __m256i high_bits = _mm256_and_si256(_mm256_srli_epi64(register_mask_256, 4), selecting_mask); + __m256i low_bytes = _mm256_unpacklo_epi8(low_bits, high_bits); + __m256i high_bytes = _mm256_unpackhi_epi8(low_bits, high_bits); + reg_register = _mm512_inserti64x4(_mm512_castsi256_si512(low_bytes), high_bytes, 1); + is_reg_register = _cvtu64_mask64(batch->is_reg_register); + } + + __mmask64 is_reg_direct_addressing_mode = _knot_mask64(_kor_mask64(is_displacement8, is_displacement32)); + __mmask64 has_base_register = _kor_mask64(_kor_mask64(is_rm_register, is_reg_register), is_implicit_register); + + __mmask64 is_relative8 = _cvtu64_mask64(batch->is_relative8); + __mmask64 is_relative32 = _cvtu64_mask64(batch->is_relative32); + + __m512i rex_b = _mm512_maskz_set1_epi8(_mm512_test_epi8_mask(rm_register, _mm512_set1_epi8(0b1000)), 1 << 0); + __m512i rex_x = _mm512_set1_epi8(0); // TODO + __m512i rex_r = _mm512_maskz_set1_epi8(_mm512_test_epi8_mask(reg_register, _mm512_set1_epi8(0b1000)), 1 << 2); + __m512i rex_w = _mm512_maskz_set1_epi8(_cvtu64_mask64(batch->rex_w), 1 << 3); + __m512i rex_byte = _mm512_or_epi32(_mm512_set1_epi32(0x40), _mm512_or_epi32(_mm512_or_epi32(rex_b, rex_x), _mm512_or_epi32(rex_r, rex_w))); + __mmask64 rex_mask = _mm512_test_epi8_mask(rex_byte, _mm512_set1_epi8(0x0f)); + __m512i rex_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), rex_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(rex_mask, 0x01)); + + u8 rex_bytes[64]; + u8 rex_positions[64]; + _mm512_storeu_epi8(rex_bytes, rex_byte); + _mm512_storeu_epi8(rex_positions, rex_position); + + __m128i opcode_lengths_128 = _mm_loadu_epi8(&batch->opcode.lengths); + __m128i selecting_mask = _mm_set1_epi8(0x03); + __m128i opcode_length_nibbles_0 = _mm_and_si128(opcode_lengths_128, selecting_mask); + __m128i opcode_length_nibbles_1 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 1), selecting_mask); + __m128i opcode_length_nibbles_2 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 2), selecting_mask); + __m128i opcode_length_nibbles_3 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 3), selecting_mask); + + __m512i opcode_lengths_512 = _mm512_inserti64x4(_mm512_castsi256_si512(_mm256_inserti32x4(_mm256_castsi128_si256(_mm_unpacklo_epi8(opcode_length_nibbles_0, opcode_length_nibbles_1)), _mm_unpackhi_epi8(opcode_length_nibbles_0, opcode_length_nibbles_1), 1)), _mm256_inserti32x4(_mm256_castsi128_si256(_mm_unpacklo_epi8(opcode_length_nibbles_2, opcode_length_nibbles_3)), _mm_unpackhi_epi8(opcode_length_nibbles_2, opcode_length_nibbles_3), 1), 1); + + __mmask64 opcode_length1 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(1)); + __mmask64 opcode_length2 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(2)); + __mmask64 opcode_length3 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(3)); + + __m512i plus_register = _mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)); + + __m512i opcode_extension = _mm512_loadu_epi8(&batch->opcode.extension[0]); + __m512i opcode1 = _mm512_or_epi32(_mm512_loadu_epi8(&batch->opcode.values[0]), _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_length1), plus_register)); + __m512i opcode1_position = instruction_length; + instruction_length = _mm512_add_epi8(instruction_length, _mm512_set1_epi8(0x01)); + + u8 opcode1_bytes[64]; + u8 opcode1_positions[64]; + _mm512_storeu_epi8(opcode1_bytes, opcode1); + _mm512_storeu_epi8(opcode1_positions, opcode1_position); + + __m512i opcode2 = _mm512_or_epi32(_mm512_loadu_epi8(&batch->opcode.values[1]), _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_length2), plus_register)); + __mmask64 opcode2_mask = _mm512_test_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(0b10)); + __m512i opcode2_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), opcode2_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(opcode2_mask, 0x01)); + + u8 opcode2_bytes[64]; + u8 opcode2_positions[64]; + _mm512_storeu_epi8(opcode2_bytes, opcode2); + _mm512_storeu_epi8(opcode2_positions, opcode2_position); + + __m512i opcode3 = _mm512_or_epi32(_mm512_loadu_epi8(&batch->opcode.values[2]), _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_length3), plus_register)); + __mmask64 opcode3_mask = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(0b11)); + __m512i opcode3_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), opcode3_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(opcode3_mask, 0x01)); + + u8 opcode3_bytes[64]; + u8 opcode3_positions[64]; + _mm512_storeu_epi8(opcode3_bytes, opcode3); + _mm512_storeu_epi8(opcode3_positions, opcode3_position); + + __m512i displacement8 = _mm512_loadu_epi8(batch->displacement[0]); + __mmask64 mod_is_displacement32 = is_displacement32; + __mmask64 mod_is_displacement8 = _kand_mask64(is_displacement8, _kor_mask64(_mm512_test_epi8_mask(displacement8, displacement8), _kand_mask64(is_rm_register, _mm512_cmpeq_epi8_mask(_mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)), _mm512_set1_epi8(REGISTER_X86_64_BP))))); + + __mmask64 mod_rm_mask = _kor_mask64(_kand_mask64(_kor_mask64(is_rm_register, is_reg_register), _knot_mask64(is_plus_register)), _kor_mask64(is_displacement8, is_displacement32)); + __m512i register_direct_address_mode = _mm512_maskz_set1_epi8(is_reg_direct_addressing_mode, 1); + __m512i mod = _mm512_or_epi32(_mm512_or_epi32(_mm512_slli_epi32(_mm512_maskz_set1_epi8(_kand_mask64(mod_is_displacement32, has_base_register), 1), 1), _mm512_maskz_set1_epi8(mod_is_displacement8, 1)), _mm512_or_epi32(_mm512_slli_epi32(register_direct_address_mode, 1), register_direct_address_mode)); + __m512i rm = _mm512_or_epi32(_mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)), _mm512_maskz_set1_epi8(_knot_mask64(has_base_register), 0b100)); + __m512i reg = _mm512_or_epi32(_mm512_and_si512(reg_register, _mm512_set1_epi8(0b111)), opcode_extension); + __m512i mod_rm = _mm512_or_epi32(_mm512_or_epi32(rm, _mm512_slli_epi32(reg, 3)), _mm512_slli_epi32(mod, 6)); + __m512i mod_rm_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_rm_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_rm_mask, 0x01)); + + u8 mod_rm_bytes[64]; + u8 mod_rm_positions[64]; + _mm512_storeu_epi8(mod_rm_bytes, mod_rm); + _mm512_storeu_epi8(mod_rm_positions, mod_rm_position); + + __mmask64 sib_mask = _kand_mask64(_mm512_cmpneq_epi8_mask(mod, _mm512_set1_epi8(0b11)), _mm512_cmpeq_epi8_mask(rm, _mm512_set1_epi8(0b100))); + __m512i sib_scale = _mm512_set1_epi8(0); + __m512i sib_index = _mm512_maskz_set1_epi8(sib_mask, 0b100 << 3); + __m512i sib_base = _mm512_or_epi32(_mm512_and_si512(rm_register, _mm512_maskz_set1_epi8(is_rm_register, 0b111)), _mm512_maskz_set1_epi8(_knot_mask64(is_rm_register), 0b101)); + __m512i sib = _mm512_or_epi32(_mm512_or_epi32(sib_index, sib_base), sib_scale); + __m512i sib_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), sib_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(sib_mask, 0x01)); + + u8 sib_bytes[64]; + u8 sib_positions[64]; + _mm512_storeu_epi8(sib_bytes, sib); + _mm512_storeu_epi8(sib_positions, sib_position); + + __m512i displacement8_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_is_displacement8, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_is_displacement8, sizeof(s8))); + u8 displacement8_positions[64]; + _mm512_storeu_epi8(displacement8_positions, displacement8_position); + + __m512i displacement32_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_is_displacement32, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_is_displacement32, sizeof(s32))); + u8 displacement32_positions[64]; + _mm512_storeu_epi8(displacement32_positions, displacement32_position); + + __m512i relative8_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), is_relative8, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(is_relative8, sizeof(s8))); + u8 relative8_positions[64]; + _mm512_storeu_epi8(relative8_positions, relative8_position); + + __m512i relative32_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), is_relative32, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(is_relative32, sizeof(s32))); + u8 relative32_positions[64]; + _mm512_storeu_epi8(relative32_positions, relative32_position); + + u8 immediate_positions[array_length(batch->is_immediate)][64]; + for (u32 i = 0; i < array_length(immediate_positions); i += 1) + { + __mmask64 immediate_mask = _cvtu64_mask64(batch->is_immediate[i]); + __m512i immediate_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), immediate_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(immediate_mask, 1 << i)); + _mm512_storeu_epi8(immediate_positions[i], immediate_position); + } + + u8 separate_buffers[64][max_instruction_byte_count]; + u8 separate_lengths[64]; + _mm512_storeu_epi8(separate_lengths, instruction_length); + + for (u32 i = 0; i < array_length(separate_lengths); i += 1) + { + separate_buffers[i][prefix_group1_positions[i]] = prefix_group1_bytes[i]; + separate_buffers[i][prefix_group2_positions[i]] = prefix_group2_bytes[i]; + separate_buffers[i][prefix_group3_positions[i]] = prefix_group3_bytes[i]; + separate_buffers[i][prefix_group4_positions[i]] = prefix_group4_bytes[i]; + + separate_buffers[i][rex_positions[i]] = rex_bytes[i]; + + separate_buffers[i][opcode1_positions[i]] = opcode1_bytes[i]; + separate_buffers[i][opcode2_positions[i]] = opcode2_bytes[i]; + separate_buffers[i][opcode3_positions[i]] = opcode3_bytes[i]; + + separate_buffers[i][mod_rm_positions[i]] = mod_rm_bytes[i]; + + separate_buffers[i][sib_positions[i]] = sib_bytes[i]; + + for (u32 immediate_position_index = 0; immediate_position_index < array_length(immediate_positions); immediate_position_index += 1) + { + u8 start_position = immediate_positions[immediate_position_index][i]; + for (u32 byte = 0; byte < 1 << immediate_position_index; byte += 1) { - state.last_window = previous; + u8 destination_index = start_position + byte * (start_position != 0xf); + separate_buffers[i][destination_index] = batch->immediate[byte][i]; } } - window = next; + separate_buffers[i][displacement8_positions[i]] = batch->displacement[0][i]; + + u8 displacement32_start = displacement32_positions[i]; + for (u32 byte = 0; byte < 4; byte += 1) + { + u8 destination_index = displacement32_start + byte * (displacement32_start != 0xf); + separate_buffers[i][destination_index] = batch->displacement[byte][i]; + } + + separate_buffers[i][relative8_positions[i]] = batch->relative[0][i]; + + u8 relative32_start = relative32_positions[i]; + for (u32 byte = 0; byte < 4; byte += 1) + { + u8 destination_index = relative32_start + byte * (relative32_start != 0xf); + separate_buffers[i][destination_index] = batch->relative[byte][i]; + } } + + u32 buffer_i = 0; + + for (u32 i = 0; i < array_length(separate_lengths); i += 1) + { + let(separate_length, separate_lengths[i]); + if (separate_length >= 1 && separate_length <= 15) + { + memcpy(&buffer[buffer_i], &separate_buffers[i], separate_length); + buffer_i += separate_length; + } + else + { + unreachable(); + } + } + + return buffer_i; } -fn void window_refresh_callback(WindowingInstance* window, void* context) +STRUCT(TestCounter) { - unused(window); - unused(context); - app_update(); -} + u64 total; + u64 failure; +}; -int main() +typedef enum TestMode { - state.arena = arena_initialize_default(MB(2)); - if (!windowing_initialize()) + TEST_MODE_SCALAR, + TEST_MODE_WIDE, + TEST_MODE_COUNT, +} TestMode; + +fn String test_mode_to_string(TestMode test_mode) +{ + switch (test_mode) { - return 1; + case_to_name(TEST_MODE_, SCALAR); + case_to_name(TEST_MODE_, WIDE); + case TEST_MODE_COUNT: unreachable(); } - - state.renderer = rendering_initialize(state.arena); - if (!state.renderer) - { - return 1; - } - - WindowingInstantiate window_create_options = { - .name = strlit("Bloat Buster"), - .size = { .width = 1600, .height = 900 }, - }; - state.first_window = state.last_window = arena_allocate(state.arena, BBWindow, 1); - state.first_window->handle = windowing_instantiate(window_create_options); - - state.first_window->render = rendering_initialize_window(state.renderer, state.first_window->handle); - - state.first_window->ui = ui_state_allocate(state.renderer, state.first_window->render); - state.first_window->root_panel = arena_allocate(state.arena, BBPanel, 1); - state.first_window->root_panel->parent_percentage = 1.0f; - state.first_window->root_panel->split_axis = AXIS2_X; - -#ifndef __APPLE__ - window_rect_texture_update_begin(state.first_window->render); - - let(white_texture, white_texture_create(state.arena, state.renderer)); - TextureAtlasCreate monospace_font_create = { -#ifdef _WIN32 - .font_path = strlit("C:/Users/David/Downloads/Fira_Sans/FiraSans-Regular.ttf"), -#elif defined(__linux__) - .font_path = strlit("/usr/share/fonts/TTF/FiraSans-Regular.ttf"), -#elif defined(__APPLE__) - .font_path = strlit("/Users/david/Library/Fonts/FiraSans-Regular.ttf"), -#else - .font_path = strlit("WRONG_PATH"), -#endif - .text_height = monospace_font_height, - }; - let(monospace_font, font_texture_atlas_create(state.arena, state.renderer, monospace_font_create)); - let(proportional_font, monospace_font); - - window_queue_rect_texture_update(state.first_window->render, RECT_TEXTURE_SLOT_WHITE, white_texture); - renderer_queue_font_update(state.renderer, state.first_window->render, RENDER_FONT_TYPE_MONOSPACE, monospace_font); - renderer_queue_font_update(state.renderer, state.first_window->render, RENDER_FONT_TYPE_PROPORTIONAL, proportional_font); - - window_rect_texture_update_end(state.renderer, state.first_window->render); -#endif - - state.last_frame_timestamp = os_timestamp(); - - while (state.first_window) - { - app_update(); - } - - return 0; } -#else + +STRUCT(TestSetup) +{ + String instruction_binary_buffer; + String clang_path; + String error_buffer; + VirtualBuffer(u8)* clang_pipe_buffer; + LLVMDisasmContextRef disassembler; + Arena* arena; + TestCounter counters[TEST_MODE_COUNT]; + EncodingTestOptions options; +}; + +STRUCT(TestInstruction) +{ + EncodingScalar encoding; + String text; +}; + +fn void test_instruction(TestSetup* setup, TestInstruction* instruction) +{ + if (setup->options.scalar) + { + let(length, encode_instruction_batch(setup->instruction_binary_buffer.pointer, &instruction->encoding, 1)); + assert(length <= setup->instruction_binary_buffer.length); + String instruction_bytes = { + .pointer = setup->instruction_binary_buffer.pointer, + .length = length, + }; + CheckInstructionArguments check_args = { + .clang_path = setup->clang_path, + .text = instruction->text, + .binary = instruction_bytes, + .error_buffer = setup->error_buffer, + .clang_pipe_buffer = setup->clang_pipe_buffer, + .disassembler = setup->disassembler, + }; + u64 error_buffer_length = check_instruction(setup->arena, check_args); + setup->counters[TEST_MODE_SCALAR].total += 1; + let(first_failure, setup->counters[TEST_MODE_SCALAR].total == 0); + setup->counters[TEST_MODE_SCALAR].failure += error_buffer_length != 0; + String error_string = { .pointer = setup->error_buffer.pointer, .length = error_buffer_length }; + if (error_buffer_length != 0) + { + print("{cstr}{u64}) {s}... [FAILED]\n{s}\n", first_failure ? "\n" : "", setup->counters[TEST_MODE_SCALAR].total, instruction->text, error_string); + os_exit(1); + } + } + + if (setup->options.wide) + { + EncodingBatch batch = encoding_batch_from_scalar(instruction->encoding); + let(wide_length, encode(setup->instruction_binary_buffer.pointer, &batch)); + assert(wide_length % batch_element_count == 0); + let(length, wide_length / batch_element_count); + + String instruction_bytes = { + .pointer = setup->instruction_binary_buffer.pointer, + .length = length, + }; + CheckInstructionArguments check_args = { + .clang_path = setup->clang_path, + .text = instruction->text, + .binary = instruction_bytes, + .error_buffer = setup->error_buffer, + .clang_pipe_buffer = setup->clang_pipe_buffer, + .disassembler = setup->disassembler, + }; + u64 error_buffer_length = check_instruction(setup->arena, check_args); + setup->counters[TEST_MODE_WIDE].total += 1; + let(first_failure, setup->counters[TEST_MODE_WIDE].total == 0); + setup->counters[TEST_MODE_WIDE].failure += error_buffer_length != 0; + String error_string = { .pointer = setup->error_buffer.pointer, .length = error_buffer_length }; + if (error_buffer_length != 0) + { + print("{cstr}{u64}) {s}... [FAILED]\n{s}\n", first_failure ? "\n" : "", setup->counters[TEST_MODE_WIDE].total, instruction->text, error_string); + } + } +} + +fn u8 encoding_test_instruction_batches(Arena* arena, TestDataset dataset, EncodingTestOptions options) +{ + u8 result = 0; + u8 instruction_binary_buffer[256 * batch_element_count]; + u8 instruction_text_buffer[256]; + u8 error_buffer[4096]; + String instruction_text_buffer_slice = array_to_slice(instruction_text_buffer); + VirtualBuffer(u8) clang_pipe_buffer = {}; + vb_ensure_capacity(&clang_pipe_buffer, 1024*1024); + llvm_initialize_macro(X86, _null_prefix_()); + let(disassembler, LLVMCreateDisasmCPU("x86_64-freestanding", "znver5", 0, 0, 0, 0)); + u64 disassembly_options = LLVMDisassembler_Option_AsmPrinterVariant | LLVMDisassembler_Option_PrintImmHex; + if (!LLVMSetDisasmOptions(disassembler, disassembly_options)) + { + failed_execution(); + } + + String clang_path = executable_find_in_path(arena, strlit("clang"), cstr(getenv("PATH"))); + assert(clang_path.pointer); + + global_variable const s32 displacements[] = { + 0, + 10, + 10000000, + }; + + global_variable const String displacement_strings[] = { + strlit("0"), + strlit("10"), + strlit("10000000"), + }; + + TestSetup setup = { + .instruction_binary_buffer = array_to_slice(instruction_binary_buffer), + .clang_path = clang_path, + .error_buffer = array_to_slice(error_buffer), + .clang_pipe_buffer = &clang_pipe_buffer, + .options = options, + .disassembler = disassembler, + .arena = arena, + }; + + for (u64 batch_index = 0; batch_index < dataset.batch_count; batch_index += 1) + { + let(batch, &dataset.batches[batch_index]); + + String mnemonic_string = mnemonic_x86_64_to_string(batch->mnemonic); + print("============================\n~~~~~~~ MNEMONIC {s} ~~~~~~~\n============================\n", mnemonic_string); + + u64 encoding_top = batch->encoding_offset + batch->encoding_count; + + for (u64 encoding_index = batch->encoding_offset; encoding_index < encoding_top; encoding_index += 1) + { + memset(setup.counters, 0, sizeof(setup.counters)); + let(encoding, &dataset.encodings[encoding_index]); + OperandId first_operand = encoding->operands.values[0]; + OperandId second_operand = encoding->operands.values[1]; + u8 operand_count = encoding->operands.count; + + u8 encoding_buffer[256]; + u8 encoding_separator[256]; + u64 encoding_buffer_i = 0; + String encoding_string; + String encoding_separator_string; + { + memcpy(encoding_buffer + encoding_buffer_i, mnemonic_string.pointer, mnemonic_string.length); + encoding_buffer_i += mnemonic_string.length; + + encoding_buffer[encoding_buffer_i] = ' '; + encoding_buffer_i += operand_count != 0; + + for (u8 operand_i = 0; operand_i < operand_count; operand_i += 1) + { + String operand_string = operand_to_string(encoding->operands.values[operand_i]); + memcpy(encoding_buffer + encoding_buffer_i, operand_string.pointer, operand_string.length); + encoding_buffer_i += operand_string.length; + + u8 not_last_operand = operand_i != operand_count - 1; + + encoding_buffer[encoding_buffer_i] = ','; + encoding_buffer_i += not_last_operand; + + encoding_buffer[encoding_buffer_i] = ' '; + encoding_buffer_i += not_last_operand; + } + memcpy(&encoding_buffer[encoding_buffer_i], "... ", 4); + encoding_buffer_i += 4; + + encoding_buffer[encoding_buffer_i] = 0; + + encoding_string = (String) { .pointer = encoding_buffer, .length = encoding_buffer_i }; + + let(failed_string, strlit("FAILED")); + encoding_separator_string = (String) { .pointer = encoding_separator, .length = encoding_buffer_i + 3 + 1 + failed_string.length }; + memset(encoding_separator, '-', encoding_separator_string.length); + print_string(encoding_separator_string); + print_string(strlit("\n")); + print_string(encoding_string); + } + + if (operand_count == 0) + { + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .legacy_prefixes = batch->legacy_prefixes | (encoding->operand_size_override << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = mnemonic_string, + }; + + test_instruction(&setup, &instruction); + } + else if (op_is_gpra(first_operand)) + { + let(first_operand_index, op_gpra_get_index(first_operand)); + String register_a_names[] = { + strlit("al"), + strlit("ax"), + strlit("eax"), + strlit("rax"), + }; + String first_operand_register_name = register_a_names[first_operand_index]; + String first_operand_string = first_operand_register_name; + + switch (operand_count) + { + case 1: + { + if (encoding->operands.implicit_operands) + { + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || (first_operand_index == 3), + .implicit_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = mnemonic_string, + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } break; + case 2: + { + if (op_is_gpr_no_gpra(second_operand)) + { + u8 second_operand_index = op_gpr_get_index(second_operand); + GPR_x86_64 second_operand_register_count = (unlikely(second_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + u8 second_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String second_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + u8 second_is_rm = op_is_rm(second_operand); + + if (second_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String second_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + second_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(second_rm_buffer[gpr][displacement_index]), second_operand_rm_name, displacement_strings[displacement_index], second_operand_index); + } + } + } + + for (GPR_x86_64 second_gpr = 0; second_gpr < second_operand_register_count; second_gpr += 1) + { + String second_operand_string = gpr_to_string(second_gpr, second_operand_index, 0); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || second_operand_index == 3, + .rm_register = second_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + + if (second_is_rm) + { + for (GPR_x86_64 second_gpr = 0; second_gpr < X86_64_GPR_COUNT; second_gpr += 1) + { + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String second_operand_string = second_rm_strings[second_gpr][displacement_index]; + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || second_operand_index == 3, + .rm_register = second_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + else if (op_is_imm(second_operand)) + { + let(second_operand_index, op_imm_get_index(second_operand)); + // We output the string directly to avoid formatting cost + String second_operand_string = sample_immediate_strings(second_operand_index); + u64 immediate = sample_immediate_values(second_operand_index); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .implicit_register = 1, + .is_immediate = 1 << second_operand_index, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .immediate = { .value = immediate, }, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + test_instruction(&setup, &instruction); + } + else if (op_is_gprd(second_operand)) + { + assert(encoding->operands.implicit_operands); + String second_operand_string = op_gprd_to_string(second_operand); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .implicit_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + test_instruction(&setup, &instruction); + } + else if (op_is_ds_rsi_memory(second_operand)) + { + // u8 second_operand_index = second_operand - op_ds_rsi_m8; + String second_operand_string; + switch (second_operand) + { + case op_ds_rsi_m8: second_operand_string = strlit("byte ptr [rsi]"); break; + case op_ds_rsi_m16: second_operand_string = strlit("word ptr [rsi]"); break; + case op_ds_rsi_m32: second_operand_string = strlit("dword ptr [rsi]"); break; + case op_ds_rsi_m64: second_operand_string = strlit("qword ptr [rsi]"); break; + default: unreachable(); + } + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else if (op_is_es_rdi_memory(second_operand)) + { + // u8 second_operand_index = second_operand - op_ds_rsi_m8; + String second_operand_string; + switch (second_operand) + { + case op_es_rdi_m8: second_operand_string = strlit("byte ptr es:[rdi]"); break; + case op_es_rdi_m16: second_operand_string = strlit("word ptr es:[rdi]"); break; + case op_es_rdi_m32: second_operand_string = strlit("dword ptr es:[rdi]"); break; + case op_es_rdi_m64: second_operand_string = strlit("qword ptr es:[rdi]"); break; + default: unreachable(); + } + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } break; + case 3: + { + todo(); + } break; + case 4: + { + todo(); + } break; + default: unreachable(); + } + } + else + { + switch (operand_count) + { + case 1: + { + if (op_is_gpr_no_gpra(first_operand)) + { + u8 first_operand_index = op_gpr_get_index(first_operand); + GPR_x86_64 first_operand_register_count = (unlikely(first_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + u8 first_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String first_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + u8 first_is_rm = op_is_rm(first_operand); + + if (first_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String first_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + first_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(first_rm_buffer[gpr][displacement_index]), first_operand_rm_name, displacement_strings[displacement_index], first_operand_index); + } + } + } + + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + + if (first_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String first_operand_string = first_rm_strings[first_gpr][displacement_index]; + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + else if (op_is_relative(first_operand)) + { + String first_operand_string = strlit("-1"); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .is_relative8 = first_operand == op_rel8, + .is_relative32 = first_operand == op_rel32, + .legacy_prefixes = batch->legacy_prefixes | (encoding->operand_size_override << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + .relative = { .value = 0xffffffff }, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else if (op_is_memory(first_operand)) + { + u8 first_operand_index = first_operand - op_m8; + String first_operand_indirect_string; + switch (first_operand_index) + { + case 0: first_operand_indirect_string = strlit("byte ptr "); break; + case 1: first_operand_indirect_string = strlit("word ptr "); break; + case 2: first_operand_indirect_string = strlit("dword ptr "); break; + case 3: first_operand_indirect_string = strlit("qword ptr "); break; + case 4: first_operand_indirect_string = strlit("xmmword ptr "); break; + default: unreachable(); + } + + // Segment overrides + { + let_cast(u32, memory_value, sample_immediate_values(2)); + String memory_string = sample_immediate_strings(2); + + for (SegmentRegisterOverride segment_register_override = 0; segment_register_override < SEGMENT_REGISTER_OVERRIDE_COUNT; segment_register_override += 1) + { + String segment_register_string = segment_register_override_to_register_string(segment_register_override); + + String parts[] = { + first_operand_indirect_string, + segment_register_string, + strlit(":["), + memory_string, + strlit("]"), + }; + String first_operand_string = arena_join_string(setup.arena, (Slice(String)) array_to_slice(parts)); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 4, + .is_displacement32 = 1, + .legacy_prefixes = batch->legacy_prefixes | (1 << segment_register_overrides[segment_register_override]) | (encoding->operand_size_override << LEGACY_PREFIX_66), + .displacement = { .value = memory_value, }, + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + + // No segment override + { + let_cast(u32, memory_value, sample_immediate_values(2)); + String memory_string = sample_immediate_strings(2); + + String parts[] = { + first_operand_indirect_string, + strlit("["), + memory_string, + strlit("]"), + }; + String first_operand_string = arena_join_string(setup.arena, (Slice(String)) array_to_slice(parts)); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 4, + .is_displacement32 = 1, + .legacy_prefixes = batch->legacy_prefixes | (encoding->operand_size_override << LEGACY_PREFIX_66), + .displacement = { .value = memory_value, }, + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + String first_operand_rm_name = gpr_to_string(first_gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + u8 first_operand_buffer[256]; + String first_operand_string = format_displacement((String)array_to_slice(first_operand_buffer), first_operand_rm_name, displacement_strings[displacement_index], first_operand_index); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 4, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | (encoding->operand_size_override << LEGACY_PREFIX_66), + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + else if (op_is_imm(first_operand)) + { + u8 first_operand_index = op_imm_get_index(first_operand); + String first_operand_string = sample_immediate_strings(first_operand_index); + u64 immediate = sample_immediate_values(first_operand_index); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .is_immediate = 1 << first_operand_index, + .immediate = { .value = immediate }, + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } break; + case 2: + { + if (op_is_gpr_no_gpra(first_operand)) + { + u8 first_operand_index = op_gpr_get_index(first_operand); + GPR_x86_64 first_operand_register_count = (unlikely(first_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + + u8 first_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String first_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + u8 first_is_rm = op_is_rm(first_operand); + + if (first_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String first_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + first_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(first_rm_buffer[gpr][displacement_index]), first_operand_rm_name, displacement_strings[displacement_index], first_operand_index); + } + } + } + + if (op_is_gpr_no_gpra(second_operand)) + { + u8 second_operand_index = op_gpr_get_index(second_operand); + GPR_x86_64 second_operand_register_count = (unlikely(second_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + u8 second_is_rm = op_is_rm(second_operand); + u8 second_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String second_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + + if (second_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String second_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + second_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(second_rm_buffer[gpr][displacement_index]), second_operand_rm_name, displacement_strings[displacement_index], second_operand_index); + } + } + } + + // Only test with rm_r and not r_rm with register direct addressing mode because it makes no sense otherwise + if (first_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + + for (GPR_x86_64 second_gpr = 0; second_gpr < second_operand_register_count; second_gpr += 1) + { + String second_operand_string = gpr_to_string(second_gpr, second_operand_index, 0); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .reg_register = second_gpr, + .is_reg_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + + if (first_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String first_operand_string = first_rm_strings[first_gpr][displacement_index]; + + for (GPR_x86_64 second_gpr = 0; second_gpr < second_operand_register_count; second_gpr += 1) + { + String second_operand_string = gpr_to_string(second_gpr, second_operand_index, gpr_is_extended(first_gpr)); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .reg_register = second_gpr, + .is_rm_register = 1, + .is_reg_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + + if (second_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + for (GPR_x86_64 second_gpr = 0; second_gpr < X86_64_GPR_COUNT; second_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, gpr_is_extended(second_gpr)); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String second_operand_string = second_rm_strings[second_gpr][displacement_index]; + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = second_gpr, + .reg_register = first_gpr, + .is_rm_register = 1, + .is_reg_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + } + else if (op_is_gpra(second_operand)) + { + String second_operand_string = op_gpra_to_string(second_operand); + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + else if (op_is_imm(second_operand)) + { + u8 second_operand_index = op_imm_get_index(second_operand); + String second_operand_string = sample_immediate_strings(second_operand_index); + u64 immediate = sample_immediate_values(second_operand_index); + + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .is_immediate = 1 << second_operand_index, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .immediate = { .value = immediate }, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + + if (first_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String first_operand_string = first_rm_strings[first_gpr][displacement_index]; + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .is_immediate = 1 << second_operand_index, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .immediate = { .value = immediate }, + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + else if (op_is_memory(second_operand)) + { + u8 second_operand_index = second_operand - op_m8; + String second_operand_indirect_string; + String memory_string = sample_immediate_strings(2); + switch (second_operand_index) + { + case 0: second_operand_indirect_string = strlit(""); break; + case 1: second_operand_indirect_string = strlit(""); break; + case 2: second_operand_indirect_string = strlit(""); break; + case 3: second_operand_indirect_string = strlit(""); break; + case 4: second_operand_indirect_string = strlit(""); break; + default: unreachable(); + } + String parts[] = { + second_operand_indirect_string, + strlit("["), + memory_string, + strlit("]"), + }; + String second_operand_string = arena_join_string(setup.arena, (Slice(String)) array_to_slice(parts)); + + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + + let_cast(u32, memory_value, sample_immediate_values(2)); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3 || second_operand_index == 3, + .reg_register = first_gpr, + .is_reg_register = 0, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .displacement = { .value = memory_value, }, + .is_displacement8 = 0, + .is_displacement32 = 1, + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + else if (second_operand == op_one_literal) + { + GPR_x86_64 first_operand_register_count = (unlikely(first_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction1(instruction_text_buffer_slice, mnemonic_string, first_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + else if (second_operand == op_cl) + { + String second_operand_string = strlit("cl"); + GPR_x86_64 first_operand_register_count = (unlikely(first_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_gpr, + .is_rm_register = 1, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + else + { + todo(); + } + } + else if (op_is_ds_rsi_memory(first_operand)) + { + u8 first_operand_index = first_operand - op_ds_rsi_m8; + String first_operand_string; + switch (first_operand) + { + case op_ds_rsi_m8: first_operand_string = strlit("byte ptr [rsi]"); break; + case op_ds_rsi_m16: first_operand_string = strlit("word ptr [rsi]"); break; + case op_ds_rsi_m32: first_operand_string = strlit("dword ptr [rsi]"); break; + case op_ds_rsi_m64: first_operand_string = strlit("qword ptr [rsi]"); break; + default: unreachable(); + } + + if (op_is_es_rdi_memory(second_operand)) + { + String second_operand_string; + switch (second_operand) + { + case op_es_rdi_m8: second_operand_string = strlit("byte ptr es:[rdi]"); break; + case op_es_rdi_m16: second_operand_string = strlit("word ptr es:[rdi]"); break; + case op_es_rdi_m32: second_operand_string = strlit("dword ptr es:[rdi]"); break; + case op_es_rdi_m64: second_operand_string = strlit("qword ptr es:[rdi]"); break; + default: unreachable(); + } + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } + else if (op_is_es_rdi_memory(first_operand)) + { + u8 first_operand_index = first_operand - op_es_rdi_m8; + String first_operand_string; + switch (first_operand) + { + case op_es_rdi_m8: first_operand_string = strlit("byte ptr es:[rdi]"); break; + case op_es_rdi_m16: first_operand_string = strlit("word ptr es:[rdi]"); break; + case op_es_rdi_m32: first_operand_string = strlit("dword ptr es:[rdi]"); break; + case op_es_rdi_m64: first_operand_string = strlit("qword ptr es:[rdi]"); break; + default: unreachable(); + } + + if (op_is_ds_rsi_memory(second_operand)) + { + u8 second_operand_index = second_operand - op_ds_rsi_m8; + String second_operand_string; + switch (second_operand) + { + case op_ds_rsi_m8: second_operand_string = strlit("byte ptr [rsi]"); break; + case op_ds_rsi_m16: second_operand_string = strlit("word ptr [rsi]"); break; + case op_ds_rsi_m32: second_operand_string = strlit("dword ptr [rsi]"); break; + case op_ds_rsi_m64: second_operand_string = strlit("qword ptr [rsi]"); break; + default: unreachable(); + } + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else if (op_is_gpra(second_operand)) + { + u8 second_operand_index = op_gpra_get_index(second_operand); + String second_operand_string = op_gpra_to_string(second_operand); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || second_operand_index == 3, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else if (second_operand == op_dx) + { + String second_operand_string = strlit("dx"); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } + else if (op_is_imm(first_operand)) + { + u8 first_operand_index = op_imm_get_index(first_operand); + u64 first_operand_value = sample_immediate_values(first_operand_index); + String first_operand_string = sample_immediate_strings(first_operand_index); + + if (op_is_gpra(second_operand)) + { + let(second_operand_index, op_gpra_get_index(second_operand)); + String second_operand_string = op_gpra_to_string(second_operand); + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + .immediate = { .value = first_operand_value }, + .is_immediate = 1 << first_operand_index, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } + else if (first_operand == op_dx) + { + String first_operand_string = strlit("dx"); + + if (op_is_gpra(second_operand)) + { + let(second_operand_index, op_gpra_get_index(second_operand)); + let(second_operand_string, op_gpra_to_string(second_operand)); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w, + .legacy_prefixes = batch->legacy_prefixes | ((encoding->operand_size_override || second_operand_index == 1) << LEGACY_PREFIX_66), + .opcode = encoding->opcode, + }, + .text = format_instruction2(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string), + }; + + test_instruction(&setup, &instruction); + } + else + { + todo(); + } + } + else + { + todo(); + } + } break; + case 3: + { + OperandId third_operand = encoding->operands.values[2]; + if (op_is_gpr_no_gpra(first_operand)) + { + u8 first_operand_index = op_gpr_get_index(first_operand); + GPR_x86_64 first_operand_register_count = (unlikely(first_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + + u8 first_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String first_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + u8 first_is_rm = op_is_rm(first_operand); + + if (first_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String first_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + first_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(first_rm_buffer[gpr][displacement_index]), first_operand_rm_name, displacement_strings[displacement_index], first_operand_index); + } + } + } + + if (op_is_gpr_no_gpra(second_operand)) + { + u8 second_operand_index = op_gpr_get_index(second_operand); + GPR_x86_64 second_operand_register_count = (unlikely(second_operand_index == 0)) ? (X86_64_GPR_COUNT / 2) : X86_64_GPR_COUNT; + u8 second_is_rm = op_is_rm(second_operand); + u8 second_rm_buffer[X86_64_GPR_COUNT][array_length(displacements)][32]; + String second_rm_strings[X86_64_GPR_COUNT][array_length(displacements)]; + + if (second_is_rm) + { + for (GPR_x86_64 gpr = 0; gpr < X86_64_GPR_COUNT; gpr += 1) + { + String second_operand_rm_name = gpr_to_string(gpr, 3, 0); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + second_rm_strings[gpr][displacement_index] = format_displacement((String)array_to_slice(second_rm_buffer[gpr][displacement_index]), second_operand_rm_name, displacement_strings[displacement_index], second_operand_index); + } + } + } + + if (op_is_imm(third_operand)) + { + u8 third_operand_index = op_imm_get_index(third_operand); + String third_operand_string = sample_immediate_strings(third_operand_index); + u64 third_operand_value = sample_immediate_values(third_operand_index); + + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, 0); + + for (GPR_x86_64 second_gpr = 0; second_gpr < second_operand_register_count; second_gpr += 1) + { + String second_operand_string = gpr_to_string(second_gpr, second_operand_index, 0); + + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_is_rm ? first_gpr : second_gpr, + .reg_register = first_is_rm ? second_gpr : first_gpr, + .is_rm_register = 1, + .is_reg_register = 1, + .is_immediate = 1 << third_operand_index, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .immediate = { .value = third_operand_value, }, + .opcode = encoding->opcode, + }, + .text = format_instruction3(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string, third_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + + if (first_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < X86_64_GPR_COUNT; first_gpr += 1) + { + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + // String first_operand_string = first_rm_strings[first_gpr][displacement_index]; + + for (GPR_x86_64 second_gpr = 0; second_gpr < second_operand_register_count; second_gpr += 1) + { + // String second_operand_string = gpr_to_string(second_gpr, second_operand_index, gpr_is_extended(first_gpr)); + + todo(); + } + } + } + } + + if (second_is_rm) + { + for (GPR_x86_64 first_gpr = 0; first_gpr < first_operand_register_count; first_gpr += 1) + { + for (GPR_x86_64 second_gpr = 0; second_gpr < X86_64_GPR_COUNT; second_gpr += 1) + { + String first_operand_string = gpr_to_string(first_gpr, first_operand_index, gpr_is_extended(second_gpr)); + + for (u32 displacement_index = 0; displacement_index < array_length(displacements); displacement_index += 1) + { + String second_operand_string = second_rm_strings[second_gpr][displacement_index]; + TestInstruction instruction = { + .encoding = { + .rex_w = encoding->rex_w || first_operand_index == 3, + .rm_register = first_is_rm ? first_gpr : second_gpr, + .reg_register = first_is_rm ? second_gpr : first_gpr, + .is_rm_register = 1, + .is_reg_register = 1, + .is_immediate = 1 << third_operand_index, + .legacy_prefixes = batch->legacy_prefixes | ((first_operand_index == 1 || second_operand_index == 1 || encoding->operand_size_override) << LEGACY_PREFIX_66), + .immediate = { .value = third_operand_value, }, + .displacement = { .value = displacements[displacement_index] }, + .is_displacement8 = displacement_index != 2, + .is_displacement32 = displacement_index == 2, + .opcode = encoding->opcode, + }, + .text = format_instruction3(instruction_text_buffer_slice, mnemonic_string, first_operand_string, second_operand_string, third_operand_string), + }; + + test_instruction(&setup, &instruction); + } + } + } + } + } + else + { + todo(); + } + } + } + else + { + todo(); + } + } break; + case 4: + { + todo(); + } break; + } + } + + u64 failure_count = 0; + static_assert(array_length(setup.counters) == TEST_MODE_COUNT); + print_string(strlit("\n")); + for (TestMode test_mode = 0; test_mode < TEST_MODE_COUNT; test_mode += 1) + { + String test_mode_string = test_mode_to_string(test_mode); + TestCounter test_counter = setup.counters[test_mode]; + failure_count += test_counter.failure; + if (test_counter.failure) + { + print("[{s}] {s}... [FAILED] {u64}/{u64} failures\n", test_mode_string, encoding_string, test_counter.failure, test_counter.total); + } + else + { + print("[{s}] [OK] ({u64}/{u64})\n", test_mode_string, test_counter.total, test_counter.total); + } + } + + print_string(strlit("\n")); + print_string(encoding_separator_string); + print_string(strlit("\n")); + + if (failure_count) + { + failed_execution(); + } + } + } + + return result; +} + +#define encode_instruction(_opcode, _operands)\ + do{\ + Encoding encoding = {\ + .opcode = _opcode,\ + .operands = _operands,\ + };\ + *vb_add(&builder->encodings, 1) = encoding;\ + } while (0) + +#define ops(...) ((Operands){ .values = { __VA_ARGS__ }, .count = array_length(((OperandId[]){ __VA_ARGS__ })), }) +#define ops_implicit_operands(...) ((Operands){ .values = { __VA_ARGS__ }, .count = array_length(((OperandId[]){ __VA_ARGS__ })), .implicit_operands = 1 }) +#define extension_and_opcode(_opcode_extension, ...) ((Opcode) { .length = array_length(((u8[]){__VA_ARGS__})), .bytes = { __VA_ARGS__ }, _opcode_extension }) +#define opcode(...) ((Opcode) { .length = array_length(((u8[]){__VA_ARGS__})), .bytes = { __VA_ARGS__ } }) + +#define imm8_l 0x10 +#define imm16_l 0x1000 +#define imm32_l 0x10000000 +#define imm64_l 0x1000000000000000 + +#define imm8_s "0x10" +#define imm16_s "0x1000" +#define imm32_s "0x10000000" +#define imm64_s "0x1000000000000000" + +#define imm8_a 0x10, +#define imm16_a 0x00, 0x10, +#define imm32_a 0x00, 0x00, 0x00, 0x10, +#define imm64_a 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + +STRUCT(TestBuilder) +{ + VirtualBuffer(Batch) batches; + VirtualBuffer(Encoding) encodings; +}; + +STRUCT(ArithmeticOptions) +{ + Opcode ra_imm; + Opcode rm_imm; + Opcode rm_imm8; + Opcode rm_r; + Opcode r_rm; +}; + +fn Opcode decrement_opcode(Opcode opcode) +{ + Opcode result = opcode; + result.bytes[0] -= 1; + return result; +} + +fn Batch batch_start(TestBuilder* builder, Mnemonic_x86_64 mnemonic) +{ + Batch batch = { + .mnemonic = mnemonic, + .encoding_offset = builder->encodings.length, + }; + + return batch; +} + +fn Batch batch_start_legacy_prefixes(TestBuilder* builder, Mnemonic_x86_64 mnemonic, u64 legacy_prefixes) +{ + Batch batch = { + .mnemonic = mnemonic, + .legacy_prefixes = legacy_prefixes, + .encoding_offset = builder->encodings.length, + }; + + return batch; +} + +fn void batch_end(TestBuilder* builder, Batch batch) +{ + batch.encoding_count = builder->encodings.length - batch.encoding_offset; + *vb_add(&builder->batches, 1) = batch; +} + +fn void encode_arithmetic_ex(TestBuilder* builder, Mnemonic_x86_64 mnemonic, ArithmeticOptions options) +{ + Batch batch = batch_start(builder, mnemonic); + + Opcode ra_imm8 = decrement_opcode(options.ra_imm); + encode_instruction(ra_imm8, ops(op_al, op_imm8)); + encode_instruction(options.ra_imm, ops(op_ax, op_imm16)); + encode_instruction(options.ra_imm, ops(op_eax, op_imm32)); + encode_instruction(options.ra_imm, ops(op_rax, op_imm32)); + + Opcode rm_imm_8 = decrement_opcode(options.rm_imm); + encode_instruction(rm_imm_8, ops(op_rm8, op_imm8)); + encode_instruction(options.rm_imm, ops(op_rm16, op_imm16)); + encode_instruction(options.rm_imm, ops(op_rm32, op_imm32)); + encode_instruction(options.rm_imm, ops(op_rm64, op_imm32)); + + encode_instruction(options.rm_imm8, ops(op_rm16, op_imm8)); + encode_instruction(options.rm_imm8, ops(op_rm32, op_imm8)); + encode_instruction(options.rm_imm8, ops(op_rm64, op_imm8)); + + Opcode rm_r8 = decrement_opcode(options.rm_r); + encode_instruction(rm_r8, ops(op_rm8, op_r8)); + encode_instruction(options.rm_r, ops(op_rm16, op_r16)); + encode_instruction(options.rm_r, ops(op_rm32, op_r32)); + encode_instruction(options.rm_r, ops(op_rm64, op_r64)); + + Opcode r_rm8 = decrement_opcode(options.r_rm); + encode_instruction(r_rm8, ops(op_r8, op_rm8)); + encode_instruction(options.r_rm, ops(op_r16, op_rm16)); + encode_instruction(options.r_rm, ops(op_r32, op_rm32)); + encode_instruction(options.r_rm, ops(op_r64, op_rm64)); + + batch_end(builder, batch); +} +#define encode_arithmetic(_mnemonic, ...) encode_arithmetic_ex(&builder, MNEMONIC_x86_64_ ## _mnemonic, (ArithmeticOptions) { __VA_ARGS__ }) + +fn void encode_unsigned_add_flag(TestBuilder* builder, Mnemonic_x86_64 mnemonic) +{ + let(prefix_66, mnemonic == MNEMONIC_x86_64_adcx); + let(prefix_f3, mnemonic == MNEMONIC_x86_64_adox); + let(legacy_prefixes, (prefix_66 << LEGACY_PREFIX_66) | (prefix_f3 << LEGACY_PREFIX_F3)); + + Batch batch = batch_start_legacy_prefixes(builder, mnemonic, legacy_prefixes); + + encode_instruction(opcode(0x0f, 0x38, 0xf6), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x0f, 0x38, 0xf6), ops(op_r64, op_rm64)); + + batch_end(builder, batch); +} + +// TODO: undo abstraction? +fn void encode_bit_instruction(TestBuilder* builder, Mnemonic_x86_64 mnemonic, u8 opcode_byte) +{ + Batch batch = batch_start(builder, mnemonic); + + let(opcode, opcode(0x0f, opcode_byte)); + encode_instruction(opcode, ops(op_r16, op_rm16)); + encode_instruction(opcode, ops(op_r32, op_rm32)); + encode_instruction(opcode, ops(op_r64, op_rm64)); + + batch_end(builder, batch); +} + +typedef enum BitScanKind +{ + BIT_SCAN_FORWARD = 0, + BIT_SCAN_REVERSE = 1, +} BitScanKind; + +fn void encode_bit_scan(TestBuilder* builder, BitScanKind bit_scan_kind) +{ + let(mnemonic, MNEMONIC_x86_64_bsf + bit_scan_kind); + let(opcode_byte, 0xbc | bit_scan_kind); + encode_bit_instruction(builder, mnemonic, opcode_byte); +} + +fn void encode_bswap(TestBuilder* builder) +{ + let(mnemonic, MNEMONIC_x86_64_bswap); + Batch batch = batch_start(builder, mnemonic); + + u8 opcode_bytes[] = { 0x0f, 0xc8 }; + Opcode opcode = { + .length = array_length(opcode_bytes), + .plus_register = 1, + .bytes = { opcode_bytes[0], opcode_bytes[1] }, + }; + + encode_instruction(opcode, ops(op_r32)); + encode_instruction(opcode, ops(op_r64)); + + batch_end(builder, batch); +} + +fn void encode_bit_test(TestBuilder* builder, Mnemonic_x86_64 mnemonic, u8 opcode_last, u8 opcode_extension) +{ + Batch batch = batch_start(builder, mnemonic); + + { + let(opcode, opcode(0x0f, opcode_last)); + + encode_instruction(opcode, ops(op_rm16, op_r16)); + encode_instruction(opcode, ops(op_rm32, op_r32)); + encode_instruction(opcode, ops(op_rm64, op_r64)); + } + + { + u8 opcode_bytes[] = { 0x0f, 0xba }; + Opcode opcode = { + .length = array_length(opcode_bytes), + .extension = opcode_extension, + .bytes = { opcode_bytes[0], opcode_bytes[1] }, + }; + + encode_instruction(opcode, ops(op_rm16, op_imm8)); + encode_instruction(opcode, ops(op_rm32, op_imm8)); + encode_instruction(opcode, ops(op_rm64, op_imm8)); + } + + batch_end(builder, batch); +} + +fn void encode_call(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_call); + encode_instruction(opcode(0xe8), ops(op_rel32)); + + encode_instruction(extension_and_opcode(0x02, 0xff), ops(op_rm64)); + + // TODO: Figure out memory offset + + batch_end(builder, batch); +} + +fn void encode_convert(TestBuilder* builder) +{ + u8 base_opcode = 0x98; + + Mnemonic_x86_64 mnemonics[2][3] = { + { MNEMONIC_x86_64_cbw, MNEMONIC_x86_64_cwde, MNEMONIC_x86_64_cdqe }, + { MNEMONIC_x86_64_cwd, MNEMONIC_x86_64_cdq, MNEMONIC_x86_64_cqo }, + }; + + OperandId operands[] = { op_ax, op_eax, op_rax }; + + for (u32 category = 0; category < array_length(mnemonics); category += 1) + { + for (u32 i = 0; i < array_length(mnemonics[0]); i += 1) + { + Batch batch = batch_start(builder, mnemonics[category][i]); + let(implicit_operand, ops(operands[i])); + implicit_operand.implicit_operands = 1; + encode_instruction(opcode(base_opcode + category), implicit_operand); + batch_end(builder, batch); + } + } +} + +fn void encode_no_operand_instruction(TestBuilder* builder, Mnemonic_x86_64 mnemonic, Opcode opcode) +{ + Batch batch = batch_start(builder, mnemonic); + Operands operands = {}; + encode_instruction(opcode, operands); + batch_end(builder, batch); +} + +fn void encode_clflush(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_clflush); + encode_instruction(extension_and_opcode(0x07, 0x0f, 0xae), ops(op_m8)); + batch_end(builder, batch); +} + +fn void encode_clflushopt(TestBuilder* builder) +{ + Batch batch = batch_start_legacy_prefixes(builder, MNEMONIC_x86_64_clflushopt, 1 << LEGACY_PREFIX_66); + encode_instruction(extension_and_opcode(0x07, 0x0f, 0xae), ops(op_m8)); + batch_end(builder, batch); +} + +fn void encode_cmov_instructions(TestBuilder* builder) +{ + for (u8 cmov_index = 0; cmov_index < cmov_count; cmov_index += 1) + { + Mnemonic_x86_64 mnemonic = MNEMONIC_x86_64_cmova + cmov_index; + Batch batch = batch_start(builder, mnemonic); + u8 opcode = 0x40 | cc_opcodes_low[cmov_index]; + encode_instruction(opcode(0x0f, opcode), ops(op_r16, op_rm16)); + encode_instruction(opcode(0x0f, opcode), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x0f, opcode), ops(op_r64, op_rm64)); + batch_end(builder, batch); + } +} + +fn void encode_cmps(TestBuilder* builder) +{ + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_cmpsb + i); + Operands operands = { + .values = { op_ds_rsi_m8 + i, op_es_rdi_m8 + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 cmps_opcode = 0xa7 - (i == 0); + encode_instruction(opcode(cmps_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_cmpxchg(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_cmpxchg); + encode_instruction(opcode(0x0f, 0xb0), ops(op_rm8, op_r8)); + encode_instruction(opcode(0x0f, 0xb1), ops(op_rm16, op_r16)); + encode_instruction(opcode(0x0f, 0xb1), ops(op_rm32, op_r32)); + encode_instruction(opcode(0x0f, 0xb1), ops(op_rm64, op_r64)); + batch_end(builder, batch); +} + +fn void encode_cmpxchg_bytes(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_cmpxchg8b); + encode_instruction(extension_and_opcode(1, 0x0f, 0xc7), ops(op_m64)); + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_cmpxchg16b); + encode_instruction(extension_and_opcode(1, 0x0f, 0xc7), ops(op_m128)); + batch_end(builder, batch); + } +} + +fn void encode_crc32(TestBuilder* builder) +{ + Batch batch = batch_start_legacy_prefixes(builder, MNEMONIC_x86_64_crc32, 1 << LEGACY_PREFIX_F2); + encode_instruction(opcode(0x0f, 0x38, 0xf0), ops(op_r32, op_rm8)); + + Encoding encoding = { + .opcode = opcode(0x0f, 0x38, 0xf1), + .operands = ops(op_r32, op_rm16), + .operand_size_override = 1, + }; + *vb_add(&builder->encodings, 1) = encoding; + + encode_instruction(opcode(0x0f, 0x38, 0xf1), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x0f, 0x38, 0xf0), ops(op_r64, op_rm8)); + encode_instruction(opcode(0x0f, 0x38, 0xf1), ops(op_r64, op_rm64)); + batch_end(builder, batch); +} + + +typedef enum IncDec +{ + OP_INC = 0, + OP_DEC = 1, +} IncDec; + +fn void encode_dec_inc(TestBuilder* builder, IncDec inc_dec) +{ + Batch batch = batch_start(builder, inc_dec == OP_DEC ? MNEMONIC_x86_64_dec : MNEMONIC_x86_64_inc); + encode_instruction(extension_and_opcode(inc_dec, 0xfe), ops(op_rm8)); + encode_instruction(extension_and_opcode(inc_dec, 0xff), ops(op_rm16)); + encode_instruction(extension_and_opcode(inc_dec, 0xff), ops(op_rm32)); + encode_instruction(extension_and_opcode(inc_dec, 0xff), ops(op_rm64)); + batch_end(builder, batch); +} + +typedef enum Signedness +{ + SIGNEDNESS_UNSIGNED = 0, + SIGNEDNESS_SIGNED = 1, +} Signedness; + +fn void encode_div(TestBuilder* builder, Signedness signedness) +{ + global_variable const Mnemonic_x86_64 div_mnemonics[] = { MNEMONIC_x86_64_div, MNEMONIC_x86_64_idiv }; + Batch batch = batch_start(builder, div_mnemonics[signedness]); + u8 opcode_extension = 6 | signedness; + encode_instruction(extension_and_opcode(opcode_extension, 0xf6), ops(op_rm8)); + encode_instruction(extension_and_opcode(opcode_extension, 0xf7), ops(op_rm16)); + encode_instruction(extension_and_opcode(opcode_extension, 0xf7), ops(op_rm32)); + encode_instruction(extension_and_opcode(opcode_extension, 0xf7), ops(op_rm64)); + batch_end(builder, batch); +} + +fn void encode_imul(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_imul); + + encode_instruction(extension_and_opcode(5, 0xf6), ops(op_rm8)); + encode_instruction(extension_and_opcode(5, 0xf7), ops(op_rm16)); + encode_instruction(extension_and_opcode(5, 0xf7), ops(op_rm32)); + encode_instruction(extension_and_opcode(5, 0xf7), ops(op_rm64)); + + encode_instruction(opcode(0x0f, 0xaf), ops(op_r16, op_rm16)); + encode_instruction(opcode(0x0f, 0xaf), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x0f, 0xaf), ops(op_r64, op_rm64)); + + encode_instruction(opcode(0x6b), ops(op_r16, op_rm16, op_imm8)); + encode_instruction(opcode(0x6b), ops(op_r32, op_rm32, op_imm8)); + encode_instruction(opcode(0x6b), ops(op_r64, op_rm64, op_imm8)); + + encode_instruction(opcode(0x69), ops(op_r16, op_rm16, op_imm16)); + encode_instruction(opcode(0x69), ops(op_r32, op_rm32, op_imm32)); + encode_instruction(opcode(0x69), ops(op_r64, op_rm64, op_imm32)); + + batch_end(builder, batch); +} + +fn void encode_in(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_in); + + encode_instruction(opcode(0xe4), ops(op_al, op_imm8)); + encode_instruction(opcode(0xe5), ops(op_ax, op_imm8)); + encode_instruction(opcode(0xe5), ops(op_eax, op_imm8)); + + encode_instruction(opcode(0xec), ops_implicit_operands(op_al, op_dx)); + encode_instruction(opcode(0xed), ops_implicit_operands(op_ax, op_dx)); + encode_instruction(opcode(0xed), ops_implicit_operands(op_eax, op_dx)); + + batch_end(builder, batch); +} + +fn void encode_ins(TestBuilder* builder) +{ + for (u8 i = 0; i < 3; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_insb + i); + Operands operands = { + .values = { op_es_rdi_m8 + i, op_dx }, + .count = 2, + .implicit_operands = 1, + }; + u8 ins_opcode = 0x6d - (i == 0); + encode_instruction(opcode(ins_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_int(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_int); + encode_instruction(opcode(0xcd), ops(op_imm8)); + batch_end(builder, batch); + } + Operands operands = {}; + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_int3); + encode_instruction(opcode(0xcc), operands); + batch_end(builder, batch); + } +} + +fn void encode_invlpg(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_invlpg); + encode_instruction(extension_and_opcode(7, 0x0f, 0x01), ops(op_m8)); + batch_end(builder, batch); +} + +fn void encode_iret(TestBuilder* builder) +{ + Operands operands = {}; + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_iret); + Encoding encoding = { + .opcode = opcode(0xcf), + .operands = operands, + .operand_size_override = 1, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_iretd); + encode_instruction(opcode(0xcf), operands); + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_iretq); + Encoding encoding = { + .opcode = opcode(0xcf), + .operands = operands, + .rex_w = 1, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } +} + +fn void encode_jmp(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_jmp); + encode_instruction(opcode(0xeb), ops(op_rel8)); + encode_instruction(opcode(0xe9), ops(op_rel32)); + + encode_instruction(extension_and_opcode(4, 0xff), ops(op_rm64)); + + // TODO: (m16,m32,m64):(16,32,64) + + batch_end(builder, batch); +} + +fn void encode_jcc(TestBuilder* builder) +{ + for (u8 jcc_i = 0; jcc_i < jcc_count; jcc_i += 1) + { + Mnemonic_x86_64 mnemonic = MNEMONIC_x86_64_ja + jcc_i; + Batch batch = batch_start(builder, mnemonic); + encode_instruction(opcode(0x70 | cc_opcodes_low[jcc_i]), ops(op_rel8)); + encode_instruction(opcode(0x0f, 0x80 | cc_opcodes_low[jcc_i]), ops(op_rel32)); + batch_end(builder, batch); + } + + Mnemonic_x86_64 mnemonic = MNEMONIC_x86_64_jrcxz; + Batch batch = batch_start(builder, mnemonic); + encode_instruction(opcode(0xe3), ops(op_rel8)); + batch_end(builder, batch); +} + +fn void encode_lea(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_lea); + encode_instruction(opcode(0x8d), ops(op_r16, op_m16)); + encode_instruction(opcode(0x8d), ops(op_r32, op_m32)); + encode_instruction(opcode(0x8d), ops(op_r64, op_m64)); + batch_end(builder, batch); +} + +fn void encode_lods(TestBuilder* builder) +{ + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_lodsb + i); + Operands operands = { + .values = { op_al + i, op_ds_rsi_m8 + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 lods_opcode = 0xad - (i == 0); + encode_instruction(opcode(lods_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_loop(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_loop); + encode_instruction(opcode(0xe2), ops(op_rel8)); + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_loope); + encode_instruction(opcode(0xe1), ops(op_rel8)); + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_loopne); + encode_instruction(opcode(0xe0), ops(op_rel8)); + batch_end(builder, batch); + } +} + +fn void encode_mov(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_mov); + + encode_instruction(opcode(0x88), ops(op_rm8, op_r8)); + encode_instruction(opcode(0x89), ops(op_rm16, op_r16)); + encode_instruction(opcode(0x89), ops(op_rm32, op_r32)); + encode_instruction(opcode(0x89), ops(op_rm64, op_r64)); + + encode_instruction(opcode(0x8a), ops(op_r8, op_rm8)); + encode_instruction(opcode(0x8b), ops(op_r16, op_rm16)); + encode_instruction(opcode(0x8b), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x8b), ops(op_r64, op_rm64)); + + // TODO: segments + + Opcode opcode = { + .bytes = { 0xb0 }, + .length = 1, + .plus_register = 1, + }; + encode_instruction(opcode, ops(op_r8, op_imm8)); + opcode.bytes[0] |= 8; + encode_instruction(opcode, ops(op_r16, op_imm16)); + encode_instruction(opcode, ops(op_r32, op_imm32)); + encode_instruction(opcode, ops(op_r64, op_imm64)); + + encode_instruction(extension_and_opcode(0, 0xc6), ops(op_rm8, op_imm8)); + encode_instruction(extension_and_opcode(0, 0xc7), ops(op_rm16, op_imm16)); + encode_instruction(extension_and_opcode(0, 0xc7), ops(op_rm32, op_imm32)); + encode_instruction(extension_and_opcode(0, 0xc7), ops(op_rm64, op_imm32)); + + batch_end(builder, batch); +} + +fn void encode_movs(TestBuilder* builder) +{ + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_movsb + i); + Operands operands = { + .values = { op_es_rdi_m8 + i, op_ds_rsi_m8 + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 movs_opcode = 0xa5 - (i == 0); + Encoding encoding = { + .operands = operands, + .opcode = opcode(movs_opcode), + .rex_w = i == 3, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } +} + +fn void encode_movsx(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_movsx); + encode_instruction(opcode(0x0f, 0xbe), ops(op_r16, op_rm8)); + encode_instruction(opcode(0x0f, 0xbe), ops(op_r32, op_rm8)); + encode_instruction(opcode(0x0f, 0xbe), ops(op_r64, op_rm8)); + encode_instruction(opcode(0x0f, 0xbf), ops(op_r32, op_rm16)); + encode_instruction(opcode(0x0f, 0xbf), ops(op_r64, op_rm16)); + batch_end(builder, batch); + } + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_movsxd); + encode_instruction(opcode(0x63), ops(op_r64, op_rm32)); + batch_end(builder, batch); + } +} + +fn void encode_movzx(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_movzx); + encode_instruction(opcode(0x0f, 0xb6), ops(op_r16, op_rm8)); + encode_instruction(opcode(0x0f, 0xb6), ops(op_r32, op_rm8)); + encode_instruction(opcode(0x0f, 0xb6), ops(op_r64, op_rm8)); + + encode_instruction(opcode(0x0f, 0xb7), ops(op_r32, op_rm16)); + encode_instruction(opcode(0x0f, 0xb7), ops(op_r64, op_rm16)); + batch_end(builder, batch); + } +} + +fn void encode_mul(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_mul); + encode_instruction(extension_and_opcode(4, 0xf6), ops(op_rm8)); + encode_instruction(extension_and_opcode(4, 0xf7), ops(op_rm16)); + encode_instruction(extension_and_opcode(4, 0xf7), ops(op_rm32)); + encode_instruction(extension_and_opcode(4, 0xf7), ops(op_rm64)); + batch_end(builder, batch); +} + +fn void encode_neg(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_neg); + encode_instruction(extension_and_opcode(3, 0xf6), ops(op_rm8)); + encode_instruction(extension_and_opcode(3, 0xf7), ops(op_rm16)); + encode_instruction(extension_and_opcode(3, 0xf7), ops(op_rm32)); + encode_instruction(extension_and_opcode(3, 0xf7), ops(op_rm64)); + batch_end(builder, batch); +} + +fn void encode_nop(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_nop); + encode_instruction(opcode(0x90), (Operands){}); + encode_instruction(extension_and_opcode(0, 0x0f, 0x1f), ops(op_rm16)); + encode_instruction(extension_and_opcode(0, 0x0f, 0x1f), ops(op_rm32)); + batch_end(builder, batch); +} + +fn void encode_not(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_not); + encode_instruction(extension_and_opcode(2, 0xf6), ops(op_rm8)); + encode_instruction(extension_and_opcode(2, 0xf7), ops(op_rm16)); + encode_instruction(extension_and_opcode(2, 0xf7), ops(op_rm32)); + encode_instruction(extension_and_opcode(2, 0xf7), ops(op_rm64)); + batch_end(builder, batch); +} + +fn void encode_out(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_out); + + encode_instruction(opcode(0xe6), ops(op_imm8, op_al)); + encode_instruction(opcode(0xe7), ops(op_imm8, op_ax)); + encode_instruction(opcode(0xe7), ops(op_imm8, op_eax)); + + encode_instruction(opcode(0xee), ops_implicit_operands(op_dx, op_al)); + encode_instruction(opcode(0xef), ops_implicit_operands(op_dx, op_ax)); + encode_instruction(opcode(0xef), ops_implicit_operands(op_dx, op_eax)); + + batch_end(builder, batch); +} + +fn void encode_outs(TestBuilder* builder) +{ + for (u8 i = 0; i < 3; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_outsb + i); + Operands operands = { + .values = { op_dx, op_ds_rsi_m8 + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 outs_opcode = 0x6f - (i == 0); + encode_instruction(opcode(outs_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_pop(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_pop); + + encode_instruction(extension_and_opcode(0, 0x8f), ops(op_rm16)); + encode_instruction(extension_and_opcode(0, 0x8f), ops(op_rm64)); + + Opcode opcode = { + .bytes = { 0x58 }, + .length = 1, + .plus_register = 1, + }; + encode_instruction(opcode, ops(op_r16)); + encode_instruction(opcode, ops(op_r64)); + + batch_end(builder, batch); +} + +fn void encode_popcnt(TestBuilder* builder) +{ + Batch batch = batch_start_legacy_prefixes(builder, MNEMONIC_x86_64_popcnt, 1 << LEGACY_PREFIX_F3); + + encode_instruction(opcode(0x0f, 0xb8), ops(op_r16, op_rm16)); + encode_instruction(opcode(0x0f, 0xb8), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x0f, 0xb8), ops(op_r64, op_rm64)); + + batch_end(builder, batch); +} + +fn void encode_popf(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_popf); + Encoding encoding = { + .opcode = { + .bytes = { 0x9d }, + .length = 1, + }, + .operand_size_override = 1, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } + + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_popfq); + Encoding encoding = { + .opcode = { + .bytes = { 0x9d }, + .length = 1, + }, + .operand_size_override = 0, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } +} + +fn void encode_prefetch(TestBuilder* builder) +{ + for (u8 i = 0; i < 3; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_prefetcht0 + i); + encode_instruction(extension_and_opcode(i + 1, 0x0f, 0x18), ops(op_m8)); + batch_end(builder, batch); + } + + Batch batch = batch_start(builder, MNEMONIC_x86_64_prefetchnta); + encode_instruction(extension_and_opcode(0, 0x0f, 0x18), ops(op_m8)); + batch_end(builder, batch); +} + +fn void encode_push(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_push); + + encode_instruction(extension_and_opcode(6, 0xff), ops(op_rm16)); + encode_instruction(extension_and_opcode(6, 0xff), ops(op_rm64)); + + Opcode opcode = { + .bytes = { 0x50 }, + .length = 1, + .plus_register = 1, + }; + encode_instruction(opcode, ops(op_r16)); + encode_instruction(opcode, ops(op_r64)); + + encode_instruction(opcode(0x6a), ops(op_imm8)); + encode_instruction(opcode(0x68), ops(op_imm16)); + encode_instruction(opcode(0x68), ops(op_imm32)); + + batch_end(builder, batch); +} + +fn void encode_pushf(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_pushf); + Encoding encoding = { + .opcode = { + .bytes = { 0x9c }, + .length = 1, + }, + .operand_size_override = 1, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } + + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_pushfq); + Encoding encoding = { + .opcode = { + .bytes = { 0x9c }, + .length = 1, + }, + .operand_size_override = 0, + }; + *vb_add(&builder->encodings, 1) = encoding; + batch_end(builder, batch); + } +} + +fn void encode_rotate(TestBuilder* builder) +{ + Mnemonic_x86_64 mnemonics[] = { MNEMONIC_x86_64_rol, MNEMONIC_x86_64_ror, MNEMONIC_x86_64_rcl, MNEMONIC_x86_64_rcr }; + for (u8 opcode_extension = 0; opcode_extension < 4; opcode_extension += 1) + { + Batch batch = batch_start(builder, mnemonics[opcode_extension]); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd0), ops(op_rm8, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd2), ops(op_rm8, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc0), ops(op_rm8, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm16, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm16, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm16, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm32, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm32, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm32, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm64, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm64, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm64, op_imm8)); + + batch_end(builder, batch); + } +} + +typedef enum ReturnType +{ + RETURN_TYPE_NEAR, + RETURN_TYPE_FAR, +} ReturnType; + +fn void encode_ret(TestBuilder* builder, ReturnType return_type) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_ret + (return_type == RETURN_TYPE_FAR)); + + let(opcode_flag, (u8)safe_flag(0b1000, return_type == RETURN_TYPE_FAR)); + + encode_instruction(opcode(0xc3 | opcode_flag), (Operands) {}); + encode_instruction(opcode(0xc2 | opcode_flag), ops(op_imm16)); + + batch_end(builder, batch); +} + +fn void encode_shift(TestBuilder* builder) +{ + Mnemonic_x86_64 mnemonics[] = { MNEMONIC_x86_64_sal, MNEMONIC_x86_64_sar, MNEMONIC_x86_64_shl, MNEMONIC_x86_64_shr }; + u8 opcode_extensions[] = { 4, 7, 4, 5 }; + + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, mnemonics[i]); + u8 opcode_extension = opcode_extensions[i]; + + encode_instruction(extension_and_opcode(opcode_extension, 0xd0), ops(op_rm8, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd2), ops(op_rm8, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc0), ops(op_rm8, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm16, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm16, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm16, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm32, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm32, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm32, op_imm8)); + + encode_instruction(extension_and_opcode(opcode_extension, 0xd1), ops(op_rm64, op_one_literal)); + encode_instruction(extension_and_opcode(opcode_extension, 0xd3), ops(op_rm64, op_cl)); + encode_instruction(extension_and_opcode(opcode_extension, 0xc1), ops(op_rm64, op_imm8)); + + batch_end(builder, batch); + } +} + +fn void encode_scas(TestBuilder* builder) +{ + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_scasb + i); + Operands operands = { + .values = { op_al + i, op_es_rdi_m8 + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 scas_opcode = 0xaf - (i == 0); + encode_instruction(opcode(scas_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_setcc(TestBuilder* builder) +{ + for (u8 i = 0; i < setcc_count; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_seta + i); + encode_instruction(opcode(0x0f, 0x90 | cc_opcodes_low[i]), ops(op_rm8)); + batch_end(builder, batch); + } +} + +fn void encode_stos(TestBuilder* builder) +{ + for (u8 i = 0; i < 4; i += 1) + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_stosb + i); + Operands operands = { + .values = { op_es_rdi_m8 + i, op_al + i }, + .count = 2, + .implicit_operands = 1, + }; + u8 stos_opcode = 0xab - (i == 0); + encode_instruction(opcode(stos_opcode), operands); + batch_end(builder, batch); + } +} + +fn void encode_test(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_test); + + encode_instruction(opcode(0xa8), ops(op_al, op_imm8)); + encode_instruction(opcode(0xa9), ops(op_ax, op_imm16)); + encode_instruction(opcode(0xa9), ops(op_eax, op_imm32)); + encode_instruction(opcode(0xa9), ops(op_rax, op_imm32)); + + encode_instruction(extension_and_opcode(0, 0xf6), ops(op_rm8, op_imm8)); + encode_instruction(extension_and_opcode(0, 0xf7), ops(op_rm16, op_imm16)); + encode_instruction(extension_and_opcode(0, 0xf7), ops(op_rm32, op_imm32)); + encode_instruction(extension_and_opcode(0, 0xf7), ops(op_rm64, op_imm32)); + + encode_instruction(opcode(0x84), ops(op_rm8, op_r8)); + encode_instruction(opcode(0x85), ops(op_rm16, op_r16)); + encode_instruction(opcode(0x85), ops(op_rm32, op_r32)); + encode_instruction(opcode(0x85), ops(op_rm64, op_r64)); + + batch_end(builder, batch); +} + +fn void encode_ud(TestBuilder* builder) +{ + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_ud0); + encode_instruction(opcode(0x0f, 0xff), ops(op_r32, op_rm32)); + batch_end(builder, batch); + } + + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_ud1); + encode_instruction(opcode(0x0f, 0xb9), ops(op_r32, op_rm32)); + batch_end(builder, batch); + } + + { + Batch batch = batch_start(builder, MNEMONIC_x86_64_ud2); + Operands operands = {}; + encode_instruction(opcode(0x0f, 0x0b), operands); + batch_end(builder, batch); + } +} + +fn void encode_xadd(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_xadd); + encode_instruction(opcode(0x0f, 0xc0), ops(op_rm8, op_r8)); + encode_instruction(opcode(0x0f, 0xc1), ops(op_rm16, op_r16)); + encode_instruction(opcode(0x0f, 0xc1), ops(op_rm32, op_r32)); + encode_instruction(opcode(0x0f, 0xc1), ops(op_rm64, op_r64)); + batch_end(builder, batch); +} + +fn void encode_xchg(TestBuilder* builder) +{ + Batch batch = batch_start(builder, MNEMONIC_x86_64_xchg); + + Opcode opcode = { + .bytes = { 0x90 }, + .length = 1, + .plus_register = 1, + }; + + encode_instruction(opcode, ops(op_ax, op_r16)); + encode_instruction(opcode, ops(op_r16, op_ax)); + encode_instruction(opcode, ops(op_eax, op_r32)); + encode_instruction(opcode, ops(op_r32, op_eax)); + encode_instruction(opcode, ops(op_rax, op_r64)); + encode_instruction(opcode, ops(op_r64, op_rax)); + + encode_instruction(opcode(0x86), ops(op_r8, op_rm8)); + encode_instruction(opcode(0x87), ops(op_r16, op_rm16)); + encode_instruction(opcode(0x87), ops(op_r32, op_rm32)); + encode_instruction(opcode(0x87), ops(op_r64, op_rm64)); + + encode_instruction(opcode(0x86), ops(op_rm8, op_r8)); + encode_instruction(opcode(0x87), ops(op_rm16, op_r16)); + encode_instruction(opcode(0x87), ops(op_rm32, op_r32)); + encode_instruction(opcode(0x87), ops(op_rm64, op_r64)); + + batch_end(builder, batch); +} + +fn TestDataset construct_test_cases() +{ + TestBuilder builder = {}; + + encode_arithmetic(adc, .ra_imm = opcode(0x15), .rm_imm = extension_and_opcode(0x02, 0x81), .rm_imm8 = extension_and_opcode(0x02, 0x83), .rm_r = opcode(0x11), .r_rm = opcode(0x13)); + encode_unsigned_add_flag(&builder, MNEMONIC_x86_64_adcx); + encode_arithmetic(add, .ra_imm = opcode(0x05), .rm_imm = extension_and_opcode(0x00, 0x81), .rm_imm8 = extension_and_opcode(0x00, 0x83), .rm_r = opcode(0x01), .r_rm = opcode(0x03)); + encode_unsigned_add_flag(&builder, MNEMONIC_x86_64_adox); + encode_arithmetic(and, .ra_imm = opcode(0x25), .rm_imm = extension_and_opcode(0x04, 0x81), .rm_imm8 = extension_and_opcode(0x04, 0x83), .rm_r = opcode(0x21), .r_rm = opcode(0x23)); + encode_bit_scan(&builder, BIT_SCAN_FORWARD); + encode_bit_scan(&builder, BIT_SCAN_REVERSE); + encode_bswap(&builder); + encode_bit_test(&builder, MNEMONIC_x86_64_bt, 0xa3, 0x04); + encode_bit_test(&builder, MNEMONIC_x86_64_btc, 0xbb, 0x07); + encode_bit_test(&builder, MNEMONIC_x86_64_btr, 0xb3, 0x06); + encode_bit_test(&builder, MNEMONIC_x86_64_bts, 0xab, 0x05); + encode_call(&builder); + encode_convert(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_clc, opcode(0xf8)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_cld, opcode(0xfc)); + encode_clflush(&builder); + encode_clflushopt(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_cli, opcode(0xfa)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_clts, opcode(0x0f, 0x06)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_cmc, opcode(0xf5)); + encode_cmov_instructions(&builder); + encode_arithmetic(cmp, .ra_imm = opcode(0x3d), .rm_imm = extension_and_opcode(0x07, 0x81), .rm_imm8 = extension_and_opcode(0x07, 0x83), .rm_r = opcode(0x39), .r_rm = opcode(0x3b)); + encode_cmps(&builder); + encode_cmpxchg(&builder); + encode_cmpxchg_bytes(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_cpuid, opcode(0x0f, 0xa2)); + encode_crc32(&builder); + encode_dec_inc(&builder, OP_DEC); + encode_div(&builder, SIGNEDNESS_UNSIGNED); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_hlt, opcode(0xf4)); + encode_div(&builder, SIGNEDNESS_SIGNED); + encode_imul(&builder); + encode_in(&builder); + encode_dec_inc(&builder, OP_INC); + encode_ins(&builder); + encode_int(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_invd, opcode(0x0f, 0x08)); + encode_invlpg(&builder); + encode_iret(&builder); + encode_jmp(&builder); + encode_jcc(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_lahf, opcode(0x9f)); + encode_lea(&builder); + encode_lods(&builder); + encode_loop(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_monitor, opcode(0x0f, 0x01, 0xc8)); + encode_mov(&builder); + encode_movs(&builder); + encode_movsx(&builder); + encode_movzx(&builder); + encode_mul(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_mwait, opcode(0x0f, 0x01, 0xc9)); + encode_neg(&builder); + encode_nop(&builder); + encode_not(&builder); + encode_arithmetic(or, .ra_imm = opcode(0x0d), .rm_imm = extension_and_opcode(1, 0x81), .rm_imm8 = extension_and_opcode(1, 0x83), .rm_r = opcode(0x09), .r_rm = opcode(0x0b)); + encode_out(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_pause, opcode(0xf3, 0x90)); + encode_pop(&builder); + encode_popcnt(&builder); + encode_popf(&builder); + encode_prefetch(&builder); + encode_push(&builder); + encode_pushf(&builder); + encode_rotate(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_rdmsr, opcode(0x0f, 0x32)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_rdpmc, opcode(0x0f, 0x33)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_rdtsc, opcode(0x0f, 0x31)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_rdtscp, opcode(0x0f, 0x01, 0xf9)); + encode_ret(&builder, RETURN_TYPE_NEAR); + encode_ret(&builder, RETURN_TYPE_FAR); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_rsm, opcode(0x0f, 0xaa)); + encode_shift(&builder); + encode_arithmetic(sbb, .ra_imm = opcode(0x1d), .rm_imm = extension_and_opcode(3, 0x81), .rm_imm8 = extension_and_opcode(3, 0x83), .rm_r = opcode(0x19), .r_rm = opcode(0x1b)); + encode_scas(&builder); + encode_setcc(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_stc, opcode(0xf9)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_std, opcode(0xfd)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_sti, opcode(0xfb)); + encode_stos(&builder); + encode_arithmetic(sub, .ra_imm = opcode(0x2d), .rm_imm = extension_and_opcode(5, 0x81), .rm_imm8 = extension_and_opcode(5, 0x83), .rm_r = opcode(0x29), .r_rm = opcode(0x2b)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_syscall, opcode(0x0f, 0x05)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_sysenter, opcode(0x0f, 0x34)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_sysexit, opcode(0x0f, 0x35)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_sysret, opcode(0x0f, 0x07)); + encode_test(&builder); + encode_ud(&builder); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_wbinvd, opcode(0x0f, 0x09)); + encode_no_operand_instruction(&builder, MNEMONIC_x86_64_wrmsr, opcode(0x0f, 0x30)); + encode_xadd(&builder); + encode_xchg(&builder); + encode_arithmetic(xor, .ra_imm = opcode(0x35), .rm_imm = extension_and_opcode(6, 0x81), .rm_imm8 = extension_and_opcode(6, 0x83), .rm_r = opcode(0x31), .r_rm = opcode(0x33)); + + TestDataset result = { + .batches = builder.batches.pointer, + .batch_count = builder.batches.length, + .encodings = builder.encodings.pointer, + .encoding_count = builder.encodings.length, + }; + + return result; +} + +fn u8 is_asm_space(u8 ch) +{ + return ch == '\t' || ch == ' ' || ch == '\n' || ch == '\r'; +} + +fn u64 find_next_space(String string) +{ + return MIN(MIN(string_first_ch(string, ' '), string_first_ch(string, '\t')), MIN(string_first_ch(string, '\n'), string_first_ch(string, '\r'))); +} + +fn void n_word_mask(CStringSlice words, u8* mask, u8 length) +{ + for (u8 i = 0; i < length; i += 1) + { + mask[i] = 0xff; + } + + for (u8 byte = 0; byte < length; byte += 1) + { + for (u8 bit = 0; bit < 8; bit += 1) + { + u8 old = mask[byte]; + mask[byte] &= ~(u8)(1 << bit); + + u8 map[16*16][16] = {}; + u32 map_item_count = 0; + u8 candidate[16] = {}; + + for (u64 word_index = 0; word_index < words.length; word_index += 1) + { + char* word = words.pointer[word_index]; + for (u8 mask_index = 0; mask_index < length; mask_index += 1) + { + candidate[mask_index] = word[mask_index] & mask[mask_index]; + } + + u8 map_index; + for (map_index = 0; map_index < map_item_count; map_index += 1) + { + if (memcmp(map[map_index], candidate, length) == 0) + { + break; + } + } + + if (map_index != map_item_count) + { + mask[byte] = old; + break; + } + + memcpy(map[map_item_count], candidate, length); + map_item_count += 1; + } + } + } +} + +typedef enum OperandKind : u8 +{ + OPERAND_KIND_REGISTER, + OPERAND_KIND_IMMEDIATE, + OPERAND_KIND_INDIRECT, +} OperandKind; +STRUCT(OperandWork) +{ + OperandKind kind:2; + u8 size:6; +}; +static_assert(sizeof(OperandWork) == 1); + +STRUCT(OperandIndirect) +{ + x86_64_Register base; + s8 displacement8; + s32 displacement32; +}; + +STRUCT(MnemonicEncoding) +{ + OperandId operands[4]; +}; + +String assemble(String text) +{ + u8* buffer = os_reserve(0, align_forward_u64(text.length + 0x4000, 0x1000), (OSReserveProtectionFlags) { .read = 1, .write = 1 }, (OSReserveMapFlags) { .priv = 1, .anon = 1, .populate = 1 }); + u8* source = text.pointer; + + u8* top = source + text.length; + u8* destination = buffer; + + while (source < top) + { + u64 instruction_count = 0; + u8* base = source; + u32 mnemonic_offsets[64]; + u32 mnemonic_lengths[64]; +#define operand_buffer_count (64*4) + u32 operand_offsets[operand_buffer_count]; + u8 operand_lengths[operand_buffer_count]; + u32 instruction_operand_offsets[64]; + u8 instruction_operand_counters[64]; + OperandWork operand_works[operand_buffer_count]; + u64 immediates[operand_buffer_count]; + x86_64_Register registers[operand_buffer_count]; + OperandIndirect indirects[operand_buffer_count]; + u32 operand_count = 0; + + u64 operand_length_error_mask = 0; + u64 operand_count_error_mask = 0; + + while (instruction_count < 64) + { + while (is_asm_space(*source)) + { + source += 1; + } + + if (source == top) + { + break; + } + + let(instruction_length, MIN(string_first_ch((String) { .pointer = source, .length = top - source }, '\n'), (u64)(top - source))); + + String instruction = { .pointer = source, .length = instruction_length }; + let(instruction_top, source + instruction_length + (*(source + instruction_length) == '\n')); + + u32 mnemonic_offset = source - base; + u32 mnemonic_length = find_next_space(instruction); + mnemonic_offsets[instruction_count] = mnemonic_offset; + mnemonic_lengths[instruction_count] = mnemonic_length; + + source += mnemonic_length; + + u32 instruction_operand_offset = operand_count; + instruction_operand_offsets[instruction_count] = instruction_operand_offset; + + while (1) + { + while (is_asm_space(*source)) + { + source += 1; + } + + if (source == instruction_top) + { + break; + } + + String whats_left = { .pointer = source, .length = instruction_top - source }; + u32 operand_offset = source - base; + u32 operand_length = MIN(whats_left.length, MIN(string_first_ch(whats_left, ','), string_first_ch(whats_left, '\n'))); + operand_offsets[operand_count] = operand_offset; + operand_lengths[operand_count] = operand_length; + operand_length_error_mask |= operand_length >= UINT8_MAX; + source += operand_length; + source += *source == ','; + + operand_count += 1; + } + + let_cast(u8, instruction_operand_count, operand_count - instruction_operand_offset); + operand_count_error_mask |= instruction_operand_count > 4; + instruction_operand_counters[instruction_count] = instruction_operand_count; + + instruction_count += 1; + } + + if (unlikely((operand_length_error_mask | operand_count_error_mask | (source == top)) != 0)) + { + if (source == top) + { + break; + } + + todo(); + } + + let(lookup_result, pext_lookup_mnemonic_batch(base, mnemonic_offsets, mnemonic_lengths)); + + __mmask32 lookup_error_m0 = _mm512_cmpeq_epi16_mask(lookup_result.v[0], _mm512_set1_epi16(0xffff)); + __mmask32 lookup_error_m1 = _mm512_cmpeq_epi16_mask(lookup_result.v[1], _mm512_set1_epi16(0xffff)); + __mmask32 lookup_error_mask = _kor_mask32(lookup_error_m0, lookup_error_m1); + u32 lookup_error_mask_int = _cvtmask32_u32(lookup_error_mask); + + // Operand parsing + // GPR + // xmm{0-31} + // ymm{0-31} + // zmm{0-31} + // indirect + // immediate: decimal, hexadecimal, binary + + if (likely(operand_count != 0)) + { + for (u32 operand_index = 0; operand_index < operand_count; operand_index += 1) + { + let(operand_string_pointer, base + operand_offsets[operand_index]); + let(operand_string_length, operand_lengths[operand_index]); + u8 first_ch = operand_string_pointer[0]; + if ((first_ch >= 'a') & (first_ch <= 'z')) + { + let(value, pext_lookup_register_single(operand_string_pointer, operand_string_length)); + if (value == 0xffff) + { + todo(); + } + + registers[operand_index] = value; + operand_works[operand_index].kind = OPERAND_KIND_REGISTER; + } + else if ((first_ch >= '0') & (first_ch <= '9')) + { + switch (operand_string_pointer[1]) + { + case 'x': + { + u8* it = &operand_string_pointer[2]; + + u8* hex_it = it; + while (is_hex_digit(*hex_it)) + { + hex_it += 1; + } + + String operand_string = { .pointer = it, .length = hex_it - it }; + u8 is_error; + u64 result = parse_hexadecimal(operand_string, &is_error); + if (is_error) + { + todo(); + } + immediates[operand_index] = result; + + it = hex_it; + } break; + case 'b': + { + todo(); + } break; + case 'o': + { + todo(); + } break; + default: todo(); + } + + operand_works[operand_index].kind = OPERAND_KIND_IMMEDIATE; + } + else if (first_ch == '[') + { + u8* end = &operand_string_pointer[operand_string_length - 1]; + + while (is_asm_space(*end)) + { + end -= 1; + } + + if (*end != ']') + { + todo(); + } + + u8* it = operand_string_pointer + 1; + s8 bias8 = 1; + s32 bias32 = 1; + s8 displacement8 = 0; + s32 displacement32 = 0; + x86_64_Register base = 0; + + while (1) + { + while (is_asm_space(*it)) + { + it += 1; + } + + if (it == end) + { + break; + } + + u8 first_ch = *it; + if ((first_ch >= 'a') & (first_ch <= 'z')) + { + let(suboperand_it, it); + while (is_alphanumeric(*suboperand_it)) + { + suboperand_it += 1; + } + + u8* operand_string_pointer = it; + u8 operand_string_length = suboperand_it - it; + + let(value, pext_lookup_register_single(operand_string_pointer, operand_string_length)); + if (value == 0xffff) + { + todo(); + } + + base = value; + + it = suboperand_it; + } + else if (first_ch == '0') + { + it += 1; + + switch (*it) + { + case 'x': + { + it += 1; + + u8* hex_it = it; + while (is_hex_digit(*hex_it)) + { + hex_it += 1; + } + + String operand_string = { .pointer = it, .length = hex_it - it }; + u8 is_error; + u64 result = parse_hexadecimal(operand_string, &is_error); + if (is_error) + { + todo(); + } + unused(result); + + it = hex_it; + } break; + case 'b': + { + it += 1; + todo(); + } break; + case 'o': + { + it += 1; + todo(); + } break; + default: todo(); + } + } + else + { + todo(); + } + + while (is_asm_space(*it)) + { + it += 1; + } + + switch (*it) + { + case '+': + { + it += 1; + } break; + case '-': + { + bias8 = -1; + bias32 = -1; + it += 1; + } break; + case ']': + { + } break; + default: todo(); + } + } + + assert(*it == ']'); + it += 1; + + indirects[operand_index].displacement8 = displacement8 * bias8; + indirects[operand_index].displacement32 = displacement32 * bias32; + indirects[operand_index].base = base; + operand_works[operand_index].kind = OPERAND_KIND_INDIRECT; + } + else + { + todo(); + } + } + } + + if (unlikely(lookup_error_mask_int != 0)) + { + todo(); + } + + // for (u64 instruction_index = 0; instruction_index < 64; instruction_index += 1) + // { + // u8 instruction_operand_count = instruction_operand_counters[instruction_index]; + // u32 instruction_operand_offset = instruction_operand_offsets[instruction_index]; + // for (u8 instruction_operand_index = 0; instruction_operand_index < instruction_operand_count; instruction_operand_index += 1) + // { + // // u32 operand_index = instruction_operand_offset + instruction_operand_index; + // // OperandWork work = operand_works[operand_index]; + // // switch (work.kind) + // // { + // // // default: todo(); + // // } + // } + // for (u8 instruction_operand_index = instruction_operand_count; instruction_operand_index < 4; instruction_operand_index += 1) + // { + // // Check encoding operand is zero + // } + // } + + __mmask64 prefix_masks[LEGACY_PREFIX_COUNT] = {}; // TODO + __mmask64 is_immediate[4] = {}; // TODO + __mmask64 is_plus_register = {}; // TODO + __mmask64 is_rm_register = {}; // TODO + __mmask64 is_reg_register = {}; // TODO + __mmask64 is_implicit_register = {}; // TODO + __mmask64 is_displacement8 = {}; // TODO + __mmask64 is_displacement32 = {}; // TODO + __mmask64 is_relative8 = {}; // TODO + __mmask64 is_relative32 = {}; // TODO + __mmask64 is_rex_w = {}; // TODO + + __m256i rm_register_mask_256 = {}; // TODO + __m256i reg_register_mask_256 = {}; // TODO + __m128i opcode_lengths_128 = {}; // TODO + __m512i opcode_extension = {}; // TODO + __m512i opcode0_pre = {}; // TODO + __m512i opcode1_pre = {}; // TODO + __m512i opcode2_pre = {}; // TODO + __m512i displacement8 = {}; // TODO + // + u8 immediate[8][64] = {}; // TODO + u8 displacement[4][64] = {}; // TODO + u8 relative[4][64] = {}; // TODO + + __m512i prefixes[LEGACY_PREFIX_COUNT]; + for (LegacyPrefix prefix = 0; prefix < LEGACY_PREFIX_COUNT; prefix += 1) + { + prefixes[prefix] = _mm512_maskz_set1_epi8(prefix_masks[prefix], legacy_prefixes[prefix]); + } + + __m512i instruction_length; + + u8 prefix_group1_bytes[64]; + u8 prefix_group1_positions[64]; + { + __mmask64 prefix_group1_mask = _kor_mask64(_kor_mask64(prefix_masks[LEGACY_PREFIX_F0], prefix_masks[LEGACY_PREFIX_F2]), prefix_masks[LEGACY_PREFIX_F3]); + __m512i prefix_group1 = _mm512_or_epi32(_mm512_or_epi32(prefixes[LEGACY_PREFIX_F0], prefixes[LEGACY_PREFIX_F2]), prefixes[LEGACY_PREFIX_F3]); + __m512i prefix_group1_position = _mm512_maskz_set1_epi8(_knot_mask64(prefix_group1_mask), 0x0f); + instruction_length = _mm512_maskz_set1_epi8(prefix_group1_mask, 0x01); + + _mm512_storeu_epi8(prefix_group1_bytes, prefix_group1); + _mm512_storeu_epi8(prefix_group1_positions, prefix_group1_position); + } + + u8 prefix_group2_bytes[64]; + u8 prefix_group2_positions[64]; + { + __mmask64 prefix_group2_mask = _kor_mask64(_kor_mask64(_kor_mask64(prefix_masks[LEGACY_PREFIX_2E], prefix_masks[LEGACY_PREFIX_36]), _kor_mask64(prefix_masks[LEGACY_PREFIX_3E], prefix_masks[LEGACY_PREFIX_26])), _kor_mask64(prefix_masks[LEGACY_PREFIX_64], prefix_masks[LEGACY_PREFIX_65])); + __m512i prefix_group2 = _mm512_or_epi32(_mm512_or_epi32(_mm512_or_epi32(prefixes[LEGACY_PREFIX_2E], prefixes[LEGACY_PREFIX_36]), _mm512_or_epi32(prefixes[LEGACY_PREFIX_3E], prefixes[LEGACY_PREFIX_26])), _mm512_or_epi32(prefixes[LEGACY_PREFIX_64], prefixes[LEGACY_PREFIX_65])); + __m512i prefix_group2_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group2_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group2_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group2_bytes, prefix_group2); + _mm512_storeu_epi8(prefix_group2_positions, prefix_group2_position); + } + + u8 prefix_group3_bytes[64]; + u8 prefix_group3_positions[64]; + { + __mmask64 prefix_group3_mask = prefix_masks[LEGACY_PREFIX_66]; + __m512i prefix_group3 = prefixes[LEGACY_PREFIX_66]; + __m512i prefix_group3_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group3_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group3_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group3_bytes, prefix_group3); + _mm512_storeu_epi8(prefix_group3_positions, prefix_group3_position); + } + + u8 prefix_group4_bytes[64]; + u8 prefix_group4_positions[64]; + { + __mmask64 prefix_group4_mask = prefix_masks[LEGACY_PREFIX_67]; + __m512i prefix_group4 = prefixes[LEGACY_PREFIX_67]; + __m512i prefix_group4_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), prefix_group4_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(prefix_group4_mask, 0x01)); + + _mm512_storeu_epi8(prefix_group4_bytes, prefix_group4); + _mm512_storeu_epi8(prefix_group4_positions, prefix_group4_position); + } + + __m512i rm_register; + { + __m256i selecting_mask = _mm256_set1_epi8(0x0f); + __m256i low_bits = _mm256_and_si256(rm_register_mask_256, selecting_mask); + __m256i high_bits = _mm256_and_si256(_mm256_srli_epi64(rm_register_mask_256, 4), selecting_mask); + __m256i low_bytes = _mm256_unpacklo_epi8(low_bits, high_bits); + __m256i high_bytes = _mm256_unpackhi_epi8(low_bits, high_bits); + rm_register = _mm512_inserti64x4(_mm512_castsi256_si512(low_bytes), high_bytes, 1); + } + + __m512i reg_register; + { + __m256i selecting_mask = _mm256_set1_epi8(0x0f); + __m256i low_bits = _mm256_and_si256(reg_register_mask_256, selecting_mask); + __m256i high_bits = _mm256_and_si256(_mm256_srli_epi64(rm_register_mask_256, 4), selecting_mask); + __m256i low_bytes = _mm256_unpacklo_epi8(low_bits, high_bits); + __m256i high_bytes = _mm256_unpackhi_epi8(low_bits, high_bits); + reg_register = _mm512_inserti64x4(_mm512_castsi256_si512(low_bytes), high_bytes, 1); + } + + __mmask64 is_reg_direct_addressing_mode = _knot_mask64(_kor_mask64(is_displacement8, is_displacement32)); + __mmask64 has_base_register = _kor_mask64(_kor_mask64(is_rm_register, is_reg_register), is_implicit_register); + + __m512i rex_b = _mm512_maskz_set1_epi8(_mm512_test_epi8_mask(rm_register, _mm512_set1_epi8(0b1000)), 1 << 0); + __m512i rex_x = _mm512_set1_epi8(0); // TODO + __m512i rex_r = _mm512_maskz_set1_epi8(_mm512_test_epi8_mask(reg_register, _mm512_set1_epi8(0b1000)), 1 << 2); + __m512i rex_w = _mm512_maskz_set1_epi8(is_rex_w, 1 << 3); + __m512i rex_byte = _mm512_or_epi32(_mm512_set1_epi32(0x40), _mm512_or_epi32(_mm512_or_epi32(rex_b, rex_x), _mm512_or_epi32(rex_r, rex_w))); + __mmask64 rex_mask = _mm512_test_epi8_mask(rex_byte, _mm512_set1_epi8(0x0f)); + __m512i rex_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), rex_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(rex_mask, 0x01)); + + u8 rex_bytes[64]; + u8 rex_positions[64]; + _mm512_storeu_epi8(rex_bytes, rex_byte); + _mm512_storeu_epi8(rex_positions, rex_position); + + __m128i selecting_mask = _mm_set1_epi8(0x03); + __m128i opcode_length_nibbles_0 = _mm_and_si128(opcode_lengths_128, selecting_mask); + __m128i opcode_length_nibbles_1 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 1), selecting_mask); + __m128i opcode_length_nibbles_2 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 2), selecting_mask); + __m128i opcode_length_nibbles_3 = _mm_and_si128(_mm_srli_epi64(opcode_lengths_128, 2 * 3), selecting_mask); + + __m512i opcode_lengths_512 = _mm512_inserti64x4(_mm512_castsi256_si512(_mm256_inserti32x4(_mm256_castsi128_si256(_mm_unpacklo_epi8(opcode_length_nibbles_0, opcode_length_nibbles_1)), _mm_unpackhi_epi8(opcode_length_nibbles_0, opcode_length_nibbles_1), 1)), _mm256_inserti32x4(_mm256_castsi128_si256(_mm_unpacklo_epi8(opcode_length_nibbles_2, opcode_length_nibbles_3)), _mm_unpackhi_epi8(opcode_length_nibbles_2, opcode_length_nibbles_3), 1), 1); + + __mmask64 opcode_is_length_1 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(1)); + __mmask64 opcode_is_length_2 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(2)); + __mmask64 opcode_is_length_3 = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(3)); + + __m512i plus_register = _mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)); + + __m512i opcode0 = _mm512_or_epi32(opcode0_pre, _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_is_length_1), plus_register)); + __m512i opcode0_position = instruction_length; + instruction_length = _mm512_add_epi8(instruction_length, _mm512_set1_epi8(0x01)); + + u8 opcode0_bytes[64]; + u8 opcode0_positions[64]; + _mm512_storeu_epi8(opcode0_bytes, opcode0); + _mm512_storeu_epi8(opcode0_positions, opcode0_position); + + __m512i opcode1 = _mm512_or_epi32(opcode1_pre, _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_is_length_2), plus_register)); + __mmask64 opcode1_mask = _mm512_test_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(0b10)); + __m512i opcode1_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), opcode1_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(opcode1_mask, 0x01)); + + u8 opcode1_bytes[64]; + u8 opcode1_positions[64]; + _mm512_storeu_epi8(opcode1_bytes, opcode1); + _mm512_storeu_epi8(opcode1_positions, opcode1_position); + + __m512i opcode2 = _mm512_or_epi32(opcode2_pre, _mm512_maskz_mov_epi8(_kand_mask64(is_plus_register, opcode_is_length_3), plus_register)); + __mmask64 opcode2_mask = _mm512_cmpeq_epi8_mask(opcode_lengths_512, _mm512_set1_epi8(0b11)); + __m512i opcode2_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), opcode2_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(opcode2_mask, 0x01)); + + u8 opcode2_bytes[64]; + u8 opcode2_positions[64]; + _mm512_storeu_epi8(opcode2_bytes, opcode2); + _mm512_storeu_epi8(opcode2_positions, opcode2_position); + + __mmask64 mod_is_displacement32 = is_displacement32; + __mmask64 mod_is_displacement8 = _kand_mask64(is_displacement8, _kor_mask64(_mm512_test_epi8_mask(displacement8, displacement8), _kand_mask64(is_rm_register, _mm512_cmpeq_epi8_mask(_mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)), _mm512_set1_epi8(REGISTER_X86_64_BP))))); + + __mmask64 mod_rm_mask = _kor_mask64(_kand_mask64(_kor_mask64(is_rm_register, is_reg_register), _knot_mask64(is_plus_register)), _kor_mask64(is_displacement8, is_displacement32)); + __m512i register_direct_address_mode = _mm512_maskz_set1_epi8(is_reg_direct_addressing_mode, 1); + __m512i mod = _mm512_or_epi32(_mm512_or_epi32(_mm512_slli_epi32(_mm512_maskz_set1_epi8(_kand_mask64(mod_is_displacement32, has_base_register), 1), 1), _mm512_maskz_set1_epi8(mod_is_displacement8, 1)), _mm512_or_epi32(_mm512_slli_epi32(register_direct_address_mode, 1), register_direct_address_mode)); + __m512i rm = _mm512_or_epi32(_mm512_and_si512(rm_register, _mm512_set1_epi8(0b111)), _mm512_maskz_set1_epi8(_knot_mask64(has_base_register), 0b100)); + __m512i reg = _mm512_or_epi32(_mm512_and_si512(reg_register, _mm512_set1_epi8(0b111)), opcode_extension); + __m512i mod_rm = _mm512_or_epi32(_mm512_or_epi32(rm, _mm512_slli_epi32(reg, 3)), _mm512_slli_epi32(mod, 6)); + __m512i mod_rm_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_rm_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_rm_mask, 0x01)); + + u8 mod_rm_bytes[64]; + u8 mod_rm_positions[64]; + _mm512_storeu_epi8(mod_rm_bytes, mod_rm); + _mm512_storeu_epi8(mod_rm_positions, mod_rm_position); + + __mmask64 sib_mask = _kand_mask64(_mm512_cmpneq_epi8_mask(mod, _mm512_set1_epi8(0b11)), _mm512_cmpeq_epi8_mask(rm, _mm512_set1_epi8(0b100))); + __m512i sib_scale = _mm512_set1_epi8(0); + __m512i sib_index = _mm512_maskz_set1_epi8(sib_mask, 0b100 << 3); + __m512i sib_base = _mm512_or_epi32(_mm512_and_si512(rm_register, _mm512_maskz_set1_epi8(is_rm_register, 0b111)), _mm512_maskz_set1_epi8(_knot_mask64(is_rm_register), 0b101)); + __m512i sib = _mm512_or_epi32(_mm512_or_epi32(sib_index, sib_base), sib_scale); + __m512i sib_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), sib_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(sib_mask, 0x01)); + + u8 sib_bytes[64]; + u8 sib_positions[64]; + _mm512_storeu_epi8(sib_bytes, sib); + _mm512_storeu_epi8(sib_positions, sib_position); + + __m512i displacement8_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_is_displacement8, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_is_displacement8, sizeof(s8))); + u8 displacement8_positions[64]; + _mm512_storeu_epi8(displacement8_positions, displacement8_position); + + __m512i displacement32_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), mod_is_displacement32, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(mod_is_displacement32, sizeof(s32))); + u8 displacement32_positions[64]; + _mm512_storeu_epi8(displacement32_positions, displacement32_position); + + __m512i relative8_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), is_relative8, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(is_relative8, sizeof(s8))); + u8 relative8_positions[64]; + _mm512_storeu_epi8(relative8_positions, relative8_position); + + __m512i relative32_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), is_relative32, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(is_relative32, sizeof(s32))); + u8 relative32_positions[64]; + _mm512_storeu_epi8(relative32_positions, relative32_position); + + u8 immediate_positions[array_length(is_immediate)][64]; + for (u8 i = 0; i < array_length(immediate_positions); i += 1) + { + __mmask64 immediate_mask = is_immediate[i]; + __m512i immediate_position = _mm512_mask_mov_epi8(_mm512_set1_epi8(0x0f), immediate_mask, instruction_length); + instruction_length = _mm512_add_epi8(instruction_length, _mm512_maskz_set1_epi8(immediate_mask, 1 << i)); + _mm512_storeu_epi8(immediate_positions[i], immediate_position); + } + + u8 separate_buffers[64][max_instruction_byte_count]; + u8 separate_lengths[64]; + _mm512_storeu_epi8(separate_lengths, instruction_length); + + for (u32 i = 0; i < array_length(separate_lengths); i += 1) + { + separate_buffers[i][prefix_group1_positions[i]] = prefix_group1_bytes[i]; + separate_buffers[i][prefix_group2_positions[i]] = prefix_group2_bytes[i]; + separate_buffers[i][prefix_group3_positions[i]] = prefix_group3_bytes[i]; + separate_buffers[i][prefix_group4_positions[i]] = prefix_group4_bytes[i]; + + separate_buffers[i][rex_positions[i]] = rex_bytes[i]; + + separate_buffers[i][opcode0_positions[i]] = opcode0_bytes[i]; + separate_buffers[i][opcode1_positions[i]] = opcode1_bytes[i]; + separate_buffers[i][opcode2_positions[i]] = opcode2_bytes[i]; + + separate_buffers[i][mod_rm_positions[i]] = mod_rm_bytes[i]; + + separate_buffers[i][sib_positions[i]] = sib_bytes[i]; + + for (u8 immediate_position_index = 0; immediate_position_index < array_length(immediate_positions); immediate_position_index += 1) + { + u8 start_position = immediate_positions[immediate_position_index][i]; + for (u32 byte = 0; byte < 1 << immediate_position_index; byte += 1) + { + u8 destination_index = start_position + byte * (start_position != 0xf); + separate_buffers[i][destination_index] = immediate[byte][i]; + } + } + + separate_buffers[i][displacement8_positions[i]] = displacement[0][i]; + + u8 displacement32_start = displacement32_positions[i]; + for (u8 byte = 0; byte < 4; byte += 1) + { + u8 destination_index = displacement32_start + byte * (displacement32_start != 0xf); + separate_buffers[i][destination_index] = displacement[byte][i]; + } + + separate_buffers[i][relative8_positions[i]] = relative[0][i]; + + u8 relative32_start = relative32_positions[i]; + for (u8 byte = 0; byte < 4; byte += 1) + { + u8 destination_index = relative32_start + byte * (relative32_start != 0xf); + separate_buffers[i][destination_index] = relative[byte][i]; + } + } + + for (u32 i = 0; i < array_length(separate_lengths); i += 1) + { + let(separate_length, separate_lengths[i]); + + if (separate_length == 0) unreachable(); + if (separate_length > 15) unreachable(); + + memcpy(destination, &separate_buffers[i], separate_length); + destination += separate_length; + } + } + + String result = {}; + return result; +} + +fn String assemble_file(Arena* arena, String path) +{ + String assembly_file = file_read(arena, path); + String result = assemble(assembly_file); + return result; +} + int main(int argc, char** argv, char** envp) { unused(argc); unused(argv); - unused(envp); + + environment_pointer = envp; + Arena* arena = arena_initialize_default(MB(2)); + assemble_file(arena, strlit("large_assembly.s")); + + // TestDataset dataset = construct_test_cases(); + // EncodingTestOptions options = { + // .scalar = 1, + // .wide = 1, + // }; + // u8 result = encoding_test_instruction_batches(arena, dataset, options); + return 0; } -#endif diff --git a/bootstrap/bloat-buster/data/instructions.dat b/bootstrap/bloat-buster/data/instructions.dat new file mode 100644 index 0000000..9aa2a5e --- /dev/null +++ b/bootstrap/bloat-buster/data/instructions.dat @@ -0,0 +1,281 @@ +adc: class base_arithmetic(/2, 15, 11, 13) +adcx: class unsigned_add_flag(66) +add: class base_arithmetic(/0, 05, 01, 03) +adox: class unsigned_add_flag(f3) +and: class base_arithmetic(/4, 25, 21, 23) +bsf: + r16, rm16 [rm: rex.r 0f bc /r] + r32, rm32 [rm: 0f bc /r] + r64, rm64 [rm: rex.w 0f bc /r] +bsr: + r16, rm16 [rm: rex.r 0f bd /r] + r32, rm32 [rm: 0f bd /r] + r64, rm64 [rm: rex.w 0f bd /r] +bswap: + r32 [o: 0f c8+r] + r64 [o: rex.w 0f c8+r] +bt: class bittest(/4, a3) +btc: class bittest(/7, bb) +btr: class bittest(/6, b3) +bts: class bittest(/5, ab) +call: + imm [d: e8 rel32] + rm64 [m: ff /2] +cbw: [zo: rex.r 98] +cwde: [zo: 98] +cwqe: [zo: rex.w 98] +clc: [zo: f8] +cld: [zo: fd] +clflush: m8 [m: 0f ae /7] +clflushopt: m8 [m: 66 0f ae /7] +cli: [zo: fa] +clts: [zo: 0f 06] +cmc: [zo: f5] +cmovcc: class cmov +cmp: class base_arithmetic(/7, 3d, 39, 3b) +cmpsb: [zo: a6] +cmpsw: [zo: a7] +cmpsd: [zo: a7] +cmpsq: [zo: a7] +cmpxchg: + rm8, r8 [mr: 0f b0] + rm16, r16 [mr: 0f b1] + rm32, r32 [mr: 0f b1] + rm64, r64 [mr: 0f b1] +cmpxchg8b: m64 [m: 0f c7 /1] +cmpxchg16b: m64 [m: rex.w 0f c7 /1] +cpuid: [zo: 0f a2] +crc32: + r32, rm8 [rm: f2 0f 38 f0] + r32, rm16 [rm: 66 f2 0f 38 f1] + r32, rm32 [rm: f2 0f 38 f1] + r64, rm8 [rm: f2 rex.w 0f 38 f0] + r64, rm64 [rm: f2 rex.w 0f 38 f1] +dec: + rm8 [m: fe /1] + rm16 [m: fe /1] + rm32 [m: fe /1] + rm64 [m: fe /1] +div: + rm8 [m: f6 /6] + rm16 [m: f7 /6] + rm32 [m: f7 /6] + rm64 [m: f7 /6] +hlt: [zo: f4] +idiv: + rm8 [m: f6 /7] + rm16 [m: f7 /7] + rm32 [m: f7 /7] + rm64 [m: f7 /7] +imul: + rm8 [m: f6 /5] + rm16 [m: f7 /5] + rm32 [m: f7 /5] + rm64 [m: f7 /5] + r16, rm16 [rm: 0f af] + r32, rm32 [rm: 0f af] + r64, rm64 [rm: 0f af] + r16, rm16, imm [rmi: 6b ib] + r32, rm32, imm [rmi: 6b ib] + r64, rm64, imm [rmi: 6b ib] + r16, rm16, imm16 [rmi: 69 iw] + r32, rm32, imm32 [rmi: 69 id] + r64, rm64, imm32 [rmi: 69 id] +in: + al, imm8 [-i: e4 ib] + ax, imm8 [-i: e5 ib] + eax, imm8 [-i: e5 ib] + al, dx [--: ec] + ax, dx [--: ed] + eax, dx [--: ed] +inc: + rm8 [m: fe /0] + rm16 [m: fe /0] + rm32 [m: fe /0] + rm64 [m: fe /0] +insb: [zo: 6c] +insw: [zo: 6d] +insd: [zo: 6d] +int: imm [i: cd ib] +int3: [zo: cc] +invd: [zo: 0f 08] +invlpg: m8 [m: 0f 01 /7] +iret: [zo: 66 cf] +iretd: [zo: cf] +iretq: [zo: rex.w cf] +jmp: + imm [d: eb rel8] + imm [d: e9 rel32] + rm64 [m: ff /4] +jcc: class jcc +jrcxz: imm [d: e3 rel8] +lahf: [zo: 9f] +lea: + r16, m16 [rm: 8d /r] + r32, m32 [rm: 8d /r] + r64, m64 [rm: 8d /r] +lodsb: [zo: ac] +lodsw: [zo: ad] +lodsd: [zo: ad] +lodsq: [zo: ad] +loop: imm [d: e2 rel8] +loope: imm [d: e1 rel8] +loopne: imm [d: e0 rel8] +monitor: [zo: 0f 01 c8] +mov: + rm8, r8 [mr: 88 /r] + rm16, r16 [mr: 89 /r] + rm32, r32 [mr: 89 /r] + rm64, r64 [mr: 89 /r] + r8, rm8 [rm: 8a /r] + r16, rm16 [rm: 8b /r] + r32, rm32 [rm: 8b /r] + r64, rm64 [rm: 8b /r] + r8, imm [ri: b0+r ib] + r16, imm [ri: b8+r iw] + r32, imm [ri: b8+r id] + r64, imm [ri: b8+r iq] + r8, imm [ri: c6 /0 ib] + r16, imm [ri: c7 /0 iw] + r32, imm [ri: c7 /0 id] + r64, imm [ri: c7 /0 id] +movsb: [zo: a4] +movsw: [zo: a5] +movsd: [zo: a5] +movsq: [zo: a5] +movsx: + r16, rm8 [rm: 0f be /r] + r32, rm8 [rm: 0f be /r] + r64, rm8 [rm: 0f be /r] + r32, rm16 [rm: 0f bf /r] + r64, rm16 [rm: 0f bf /r] +movsxd: r64, rm32 [rm: rex.w 63 /r] +movzx: + r16, rm8 [rm: 0f b6 /r] + r32, rm8 [rm: 0f b6 /r] + r64, rm8 [rm: 0f b6 /r] + r32, rm16 [rm: 0f b7 /r] + r64, rm16 [rm: 0f b7 /r] +mul: + rm8 [m: f6 /4] + rm16 [m: f7 /4] + rm32 [m: f7 /4] + rm64 [m: f7 /4] +mwait: [zo: 0f 01 c9] +neg: + rm8 [m: f6 /3] + rm16 [m: f7 /3] + rm32 [m: f7 /3] + rm64 [m: f7 /3] +nop: + [zo: 90] + rm16 [m: 0f 1f /0] + rm32 [m: 0f 1f /0] +not: + rm8 [m: f6 /2] + rm16 [m: f7 /2] + rm32 [m: f7 /2] + rm64 [m: f7 /2] +or: class base_arithmetic(/1, 0d, 09, 0b) +out: + imm, al [i-: e6 ib] + imm, ax [i-: e7 ib] + imm, ax [i-: e7 ib] +pause: [zo: f3 90] +pop: + rm16 [m: 8f /0] + rm64 [m: 8f /0] + r16 [o: 58+r] + r64 [o: 58+r] +popcnt: + r16, rm16 [rm: f3 0f b8 /r] + r32, rm32 [rm: f3 0f b8 /r] + r64, rm64 [rm: f3 0f b8 /r] +popf: [zo: 66 9d] +popfq: [zo: 9d] +prefetcht0: m8 [m: 0f 18 /1] +prefetcht1: m8 [m: 0f 18 /2] +prefetcht2: m8 [m: 0f 18 /3] +prefetchnta: m8 [m: 0f 18 /0] +push: + rm16 [m: ff /6] + rm64 [m: ff /6] + r16 [o: 50+r] + r64 [o: 50+r] + imm [i: 6a ib] + imm [i: 68 iw] + imm [i: 68 id] +pushf: [zo: 66 9c] +pushfq: [zo: 9c] +rol: class rotate(/0) +ror: class rotate(/1) +rcl: class rotate(/2) +rcr: class rotate(/3) +rdmsr: [zo: 0f 32] +rdpmc: [zo: 0f 33] +rdtsc: [zo: 0f 31] +rdtscp: [zo: 0f 01 f9] +ret: + [zo: c3] + imm [i: c2 iw] +retf: + [zo: cb] + imm [i: ca iw] +rsm: [zo: 0f aa] +sal: class shift(/4) +sar: class shift(/7) +shl: class shift(/4) +shr: class shift(/5) +scasb: [zo: ae] +scasw: [zo: af] +scasd: [zo: af] +scasq: [zo: af] +setcc: class setcc +stc: [zo: f9] +std: [zo: fd] +sti: [zo: fb] +stosb: [zo: aa] +stosw: [zo: ab] +stosd: [zo: ab] +stosq: [zo: ab] +sub: class base_arithmetic(/5, 2d, 29, 2b) +syscall: [zo: 0f 05] +sysenter: [zo: 0f 34] +sysexit: [zo: 0f 35] +sysret: [zo: 0f 07] +test: + al, imm8 [-i: a8 ib] + ax, imm16 [-i: a9 iw] + eax, imm32 [-i: a9 id] + rax, imm32 [-i: a9 id] + rm8, imm8 [mi: f6 /0 ib] + rm16, imm8 [mi: f7 /0 ib] + rm32, imm8 [mi: f7 /0 ib] + rm64, imm8 [mi: f7 /0 ib] + rm8, r8 [mr: 84 /r] + rm16, r16 [mr: 85 /r] + rm32, r32 [mr: 85 /r] + rm64, r64 [mr: 85 /r] +ud0: r32, rm32 [rm: 0f ff /r] +ud1: r32, rm32 [rm: 0f ff /r] +ud2: [zo: 0f 0b] +xadd: + rm8, r8 [mr: 0f c0 /r] + rm16, r16 [mr: 0f c1 /r] + rm32, r32 [mr: 0f c1 /r] + rm64, r64 [mr: 0f c1 /r] +xchg: + ax, r16 [-o: 90+r] + r16, ax [o-: 90+r] + eax, r32 [-o: 90+r] + r32, eax [o-: 90+r] + rax, r64 [-o: 90+r] + r64, rax [o-: 90+r] + rm8, r8 [mr: 86 /r] + r8, rm8 [rm: 86 /r] + rm16, r16 [mr: 87 /r] + r16, rm16 [rm: 87 /r] + rm32, r32 [mr: 87 /r] + r32, rm32 [rm: 87 /r] + rm64, r64 [mr: 87 /r] + r64, rm64 [rm: 87 /r] diff --git a/bootstrap/bloat-buster/data/x86_mnemonic.dat b/bootstrap/bloat-buster/data/x86_mnemonic.dat new file mode 100644 index 0000000..7e665e9 --- /dev/null +++ b/bootstrap/bloat-buster/data/x86_mnemonic.dat @@ -0,0 +1,226 @@ +adc +adcx +add +adox +and +bsf +bsr +bswap +bt +btc +btr +bts +call +cbw +cwde +cdqe +cwd +cdq +cqo +clc +cld +clflush +clflushopt +cli +clts +cmc +cmova +cmovae +cmovb +cmovbe +cmovc +cmove +cmovg +cmovge +cmovl +cmovle +cmovna +cmovnae +cmovnb +cmovnbe +cmovnc +cmovne +cmovng +cmovnge +cmovnl +cmovnle +cmovno +cmovnp +cmovns +cmovnz +cmovo +cmovp +cmovpe +cmovpo +cmovs +cmovz +cmp +cmpsb +cmpsw +cmpsd +cmpsq +cmpxchg +cmpxchg8b +cmpxchg16b +cpuid +crc32 +dec +div +hlt +idiv +imul +in +inc +insb +insw +insd +int +int3 +invd +invlpg +iret +iretd +iretq +jmp +ja +jae +jb +jbe +jc +je +jg +jge +jl +jle +jna +jnae +jnb +jnbe +jnc +jne +jng +jnge +jnl +jnle +jno +jnp +jns +jnz +jo +jp +jpe +jpo +js +jz +jrcxz +lahf +lea +lodsb +lodsw +lodsd +lodsq +loop +loope +loopne +monitor +mov +movsb +movsw +movsd +movsq +movsx +movsxd +movzx +mul +mwait +neg +nop +not +or +out +outsb +outsw +outsd +pause +pop +popcnt +popf +popfq +prefetcht0 +prefetcht1 +prefetcht2 +prefetchnta +push +pushf +pushfq +rcl +rcr +rol +ror +rdmsr +rdpmc +rdtsc +rdtscp +ret +retf +rsm +sal +sar +shl +shr +sbb +scasb +scasw +scasd +scasq +seta +setae +setb +setbe +setc +sete +setg +setge +setl +setle +setna +setnae +setnb +setnbe +setnc +setne +setng +setnge +setnl +setnle +setno +setnp +setns +setnz +seto +setp +setpe +setpo +sets +setz +stc +std +sti +stosb +stosw +stosd +stosq +sub +syscall +sysenter +sysexit +sysret +test +ud0 +ud1 +ud2 +wbinvd +wrmsr +xadd +xchg +xor diff --git a/bootstrap/std/base.c b/bootstrap/std/base.c index 5804f95..2b864e6 100644 --- a/bootstrap/std/base.c +++ b/bootstrap/std/base.c @@ -1,24 +1,63 @@ #pragma once #if _MSC_VER +extern u32 _lzcnt_u32(u32); +extern u32 _tzcnt_u32(u32); extern u64 _lzcnt_u64(u64); extern u64 _tzcnt_u64(u64); #endif +fn u8 leading_zeroes_u32(u32 value) +{ +#if _MSC_VER + return (u8)_lzcnt_u32(value); +#else + return __builtin_clz(value); +#endif +} + +fn u8 leading_zeroes_u64(u64 value) +{ +#if _MSC_VER + return (u8)_lzcnt_u64(value); +#else + return __builtin_clzll(value); +#endif +} + fn u8 log2_alignment(u64 alignment) { assert(alignment != 0); assert((alignment & (alignment - 1)) == 0); - u64 left = (sizeof(alignment) * 8) - 1; -#if _MSC_VER - let_cast(u64, right, _lzcnt_u64(alignment)); -#else - let_cast(u64, right, __builtin_clzll(alignment)); -#endif + u8 left = (sizeof(alignment) * 8) - 1; + u8 right = leading_zeroes_u64(alignment); let_cast(u8, result, left - right); return result; } +fn u8 log2_u64(u64 v) +{ + assert(v != 0); + return (sizeof(u64) * 8 - 1) - leading_zeroes_u64(v); +} + +fn u8 log2_u32(u32 v) +{ + assert(v != 0); + return (sizeof(u32) * 8 - 1) - leading_zeroes_u32(v); +} + +fn u8 hex_digit_count(u64 v) +{ + u8 result = 1; + if (v) + { + result = log2_u64(v) / log2_u64(16) + 1; + } + + return result; +} + fn u128 u128_from_u64(u64 n) { #if defined(__TINYC__) || defined(_MSC_VER) @@ -237,9 +276,49 @@ fn u64 is_decimal_digit(u8 ch) return (ch >= '0') & (ch <= '9'); } +fn u64 is_alphanumeric(u8 ch) +{ + return is_alphabetic(ch) | is_decimal_digit(ch); +} + +fn u64 is_hex_digit_alpha_lower(u8 ch) +{ + return (ch >= 'a') & (ch <= 'f'); +} + +fn u64 is_hex_digit_alpha_upper(u8 ch) +{ + return (ch >= 'A') & (ch <= 'F'); +} + +fn u64 is_hex_digit_alpha(u8 ch) +{ + return is_hex_digit_alpha_lower(ch) | is_hex_digit_alpha_upper(ch); +} + fn u64 is_hex_digit(u8 ch) { - return (is_decimal_digit(ch) | (((ch == 'a') | (ch == 'A')) | ((ch == 'b') | (ch == 'B')))) | ((((ch == 'c') | (ch == 'C')) | ((ch == 'd') | (ch == 'D'))) | (((ch == 'e') | (ch == 'E')) | ((ch == 'f') | (ch == 'F')))); + return is_decimal_digit(ch) | is_hex_digit_alpha(ch); +} + +fn u8 hex_ch_to_int(u8 ch) +{ + if ((ch >= '0') & (ch <= '9')) + { + return ch - '0'; + } + else if ((ch >= 'a') & (ch <= 'f')) + { + return ch - 'a' + 10; + } + else if ((ch >= 'A') & (ch <= 'F')) + { + return ch - 'A' + 10; + } + else + { + unreachable(); + } } fn u64 is_identifier_start(u8 ch) @@ -282,25 +361,38 @@ fn Hash32 hash64_to_hash32(Hash64 hash64) return result; } -fn u64 align_forward(u64 value, u64 alignment) +fn u64 align_forward_u32(u32 value, u32 alignment) +{ + u32 mask = alignment - 1; + u32 result = (value + mask) & ~mask; + return result; +} + +fn u32 align_backward_u32(u32 value, u32 alignment) +{ + u32 result = value & ~(alignment - 1); + return result; +} + +fn u64 align_forward_u64(u64 value, u64 alignment) { u64 mask = alignment - 1; u64 result = (value + mask) & ~mask; return result; } -fn u64 align_backward(u64 value, u64 alignment) +fn u64 align_backward_u64(u64 value, u64 alignment) { u64 result = value & ~(alignment - 1); return result; } -fn u8 is_power_of_two(u64 value) +fn u8 is_power_of_two_u64(u64 value) { return (value & (value - 1)) == 0; } -fn u8 first_bit_set_32(u32 value) +fn u8 first_bit_set_u32(u32 value) { #if _MSC_VER DWORD result_dword; @@ -314,7 +406,7 @@ fn u8 first_bit_set_32(u32 value) return result; } -fn u64 first_bit_set_64(u64 value) +fn u64 first_bit_set_u64(u64 value) { #if _MSC_VER DWORD result_dword; @@ -339,3 +431,33 @@ fn Hash32 hash64_fib_end(Hash64 hash) let(result, TRUNCATE(Hash32, ((hash + 1) * 11400714819323198485ull) >> 32)); return result; } + +fn u64 parse_hexadecimal(String string, u8* error) +{ + u8* it = &string.pointer[string.length - 1]; + u8 is_error = 0; + + u64 result = 0; + + while (it >= string.pointer) + { + u8 ch = *it; + + u8 is_error_it = !is_hex_digit(ch); + is_error |= is_error_it; + if (is_error_it) + { + break; + } + + u8 sub = is_decimal_digit(ch) ? '0' : (is_hex_digit_alpha_lower(ch) ? 'a' : 'A'); + u8 hex_value = ch - sub + 10 * is_hex_digit_alpha(ch); + assert((hex_value & 0xf) == hex_value); + result = (result << 4) | hex_value; + + it -= 1; + } + + *error = is_error; + return result; +} diff --git a/bootstrap/std/base.h b/bootstrap/std/base.h index 43ed4e7..9a50b27 100644 --- a/bootstrap/std/base.h +++ b/bootstrap/std/base.h @@ -1,5 +1,10 @@ #pragma once +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +#define USE_MEMCPY 1 + #if _WIN32 #define _CRT_SECURE_NO_WARNINGS #endif @@ -10,16 +15,6 @@ #define BB_DEBUG 1 #endif -#define BB_INCLUDE_INTRINSIC 0 -#if BB_DEBUG == 0 -#undef BB_INCLUDE_INTRINSIC -#define BB_INCLUDE_INTRINSIC 1 -#endif -#if BB_INCLUDE_INTRINSIC -#if defined(__x86_64__) -#include -#endif -#endif #include #include #include @@ -140,6 +135,7 @@ declare_slice(s32); declare_slice(s64); declare_slice_p(char); +declare_slice_p(u8); declare_slice_p(void); typedef Slice(u8) String; @@ -194,7 +190,7 @@ declare_slice(String); #define unlikely(x) expect(x, 0) #define breakpoint() __builtin_debugtrap() #define failed_execution() panic("Failed execution at {cstr}:{u32}\n", __FILE__, __LINE__) -#define todo() panic("TODO at {cstr}:{u32}\n", __FILE__, __LINE__); fix_unreachable() +#define todo() os_is_being_debugged() ? trap() : panic("TODO at {cstr}:{u32}\n", __FILE__, __LINE__); fix_unreachable() fn void print(const char* format, ...); BB_NORETURN BB_COLD fn void os_exit(u32 exit_code); @@ -216,7 +212,8 @@ fn BB_NORETURN BB_COLD void trap_ext() #define trap() (trap_ext(), __builtin_unreachable()) #endif -#define panic(format, ...) (print(format, __VA_ARGS__), os_exit(1)) +fn u8 os_is_being_debugged(); +#define panic(format, ...) (!os_is_being_debugged() ? print(format, __VA_ARGS__), os_exit(1) : os_exit(1)) #define let_pointer_cast(PointerChildType, var_name, value) PointerChildType* var_name = (PointerChildType*)(value) #if defined(__TINYC__) || defined(_MSC_VER) @@ -323,12 +320,12 @@ global_variable const u8 bracket_close = ']'; #define s_get_slice(T, s, start, end) (Slice(T)){ .pointer = ((s).pointer) + (start), .length = (end) - (start) } #define s_equal(a, b) ((a).length == (b).length && memcmp((a).pointer, (b).pointer, sizeof(*((a).pointer)) * (a).length) == 0) -fn u64 align_forward(u64 value, u64 alignment); -fn u64 align_backward(u64 value, u64 alignment); -fn u8 log2_alignment(u64 alignment); -fn u8 is_power_of_two(u64 value); -fn u8 first_bit_set_32(u32 value); -fn u64 first_bit_set_64(u64 value); +fn u64 align_forward_u64(u64 value, u64 alignment); +fn u64 align_backward_u64(u64 value, u64 alignment); +fn u8 log2_alignment_u64(u64 alignment); +fn u8 is_power_of_two_u64(u64 value); +fn u8 first_bit_set_u32(u32 value); +fn u64 first_bit_set_u64(u64 value); fn u32 format_decimal(String buffer, u64 decimal); fn u32 format_hexadecimal(String buffer, u64 hexadecimal); @@ -340,6 +337,7 @@ fn u8 get_next_ch_safe(String string, u64 index); fn u64 is_identifier_start(u8 ch); fn u64 is_identifier_ch(u8 ch); fn u64 is_alphabetic(u8 ch); +fn u64 is_alphanumeric(u8 ch); fn u64 parse_decimal(String string); diff --git a/bootstrap/std/format.c b/bootstrap/std/format.c index f9d8f1c..672c59b 100644 --- a/bootstrap/std/format.c +++ b/bootstrap/std/format.c @@ -1075,6 +1075,72 @@ typedef enum IntegerFormat INTEGER_FORMAT_BINARY, } IntegerFormat; +STRUCT(IntegerFormatOptions) +{ + IntegerFormat format; + u32 width; +}; + +fn IntegerFormatOptions integer_format_options(u8** it) +{ + IntegerFormatOptions options = { + .format = INTEGER_FORMAT_DECIMAL, + }; + + if (**it == ':') + { + *it += 1; + + while (**it != brace_close) + { + switch (**it) + { + case 'x': + options.format = INTEGER_FORMAT_HEXADECIMAL; + *it += 1; + break; + case 'd': + options.format = INTEGER_FORMAT_DECIMAL; + *it += 1; + break; + case 'o': + options.format = INTEGER_FORMAT_OCTAL; + *it += 1; + break; + case 'b': + options.format = INTEGER_FORMAT_BINARY; + *it += 1; + break; + case 'w': + { + *it += 1; + + if (**it != '=') + { + todo(); + } + + *it += 1; + + let(start, *it); + while (is_decimal_digit(**it)) + { + *it += 1; + } + let(end, *it); + assign_cast(options.width, parse_decimal(slice_from_pointer_range(u8, start, end))); + } break; + default: + unreachable(); + } + + *it += **it == ','; + } + } + + return options; +} + fn String format_string_va(String buffer, const char* format, va_list args) { u8* it = (u8*)format; @@ -1181,31 +1247,7 @@ fn String format_string_va(String buffer, const char* format, va_list args) u8* bit_count_end = it; u64 bit_count = parse_decimal(slice_from_pointer_range(u8, (u8*)bit_count_start, (u8*)bit_count_end)); - IntegerFormat integer_format = INTEGER_FORMAT_DECIMAL; - - if (*it == ':') - { - it += 1; - switch (*it) - { - case 'x': - integer_format = INTEGER_FORMAT_HEXADECIMAL; - break; - case 'd': - integer_format = INTEGER_FORMAT_DECIMAL; - break; - case 'o': - integer_format = INTEGER_FORMAT_OCTAL; - break; - case 'b': - integer_format = INTEGER_FORMAT_BINARY; - break; - default: - unreachable(); - } - - it += 1; - } + IntegerFormatOptions options = integer_format_options(&it); s64 original_value; switch (bit_count) @@ -1224,11 +1266,21 @@ fn String format_string_va(String buffer, const char* format, va_list args) String buffer_slice = s_get_slice(u8, buffer, buffer_i, buffer.length); - switch (integer_format) + switch (options.format) { case INTEGER_FORMAT_HEXADECIMAL: { + u32 expected_characters = hex_digit_count(original_value); + + if (expected_characters < options.width) + { + u32 extra_characters = options.width - expected_characters; + memset(buffer.pointer, '0', extra_characters); + buffer_i += extra_characters; + } + let(written_characters, format_hexadecimal(buffer_slice, original_value)); + assert(expected_characters == written_characters); buffer_i += written_characters; } break; case INTEGER_FORMAT_DECIMAL: @@ -1281,39 +1333,7 @@ fn String format_string_va(String buffer, const char* format, va_list args) u8* bit_count_end = it; u64 bit_count = parse_decimal(slice_from_pointer_range(u8, (u8*)bit_count_start, (u8*)bit_count_end)); - typedef enum IntegerFormat - { - INTEGER_FORMAT_HEXADECIMAL, - INTEGER_FORMAT_DECIMAL, - INTEGER_FORMAT_OCTAL, - INTEGER_FORMAT_BINARY, - } IntegerFormat; - - IntegerFormat integer_format = INTEGER_FORMAT_DECIMAL; - - if (*it == ':') - { - it += 1; - switch (*it) - { - case 'x': - integer_format = INTEGER_FORMAT_HEXADECIMAL; - break; - case 'd': - integer_format = INTEGER_FORMAT_DECIMAL; - break; - case 'o': - integer_format = INTEGER_FORMAT_OCTAL; - break; - case 'b': - integer_format = INTEGER_FORMAT_BINARY; - break; - default: - unreachable(); - } - - it += 1; - } + IntegerFormatOptions options = integer_format_options(&it); u64 original_value; switch (bit_count) @@ -1330,26 +1350,41 @@ fn String format_string_va(String buffer, const char* format, va_list args) unreachable(); } - let(buffer_slice, s_get_slice(u8, buffer, buffer_i, buffer.length)); - switch (integer_format) + switch (options.format) { case INTEGER_FORMAT_HEXADECIMAL: { + u32 expected_characters = hex_digit_count(original_value); + + if (expected_characters < options.width) + { + u32 extra_characters = options.width - expected_characters; + memset(buffer.pointer + buffer_i, '0', extra_characters); + buffer_i += extra_characters; + } + + let(buffer_slice, s_get_slice(u8, buffer, buffer_i, buffer.length)); let(written_characters, format_hexadecimal(buffer_slice, original_value)); + assert(expected_characters == written_characters); buffer_i += written_characters; } break; case INTEGER_FORMAT_DECIMAL: { + let(buffer_slice, s_get_slice(u8, buffer, buffer_i, buffer.length)); let(written_characters, format_decimal(buffer_slice, original_value)); buffer_i += written_characters; } break; case INTEGER_FORMAT_OCTAL: { + let(buffer_slice, s_get_slice(u8, buffer, buffer_i, buffer.length)); + unused(buffer_slice); todo(); } break; case INTEGER_FORMAT_BINARY: { + let(buffer_slice, s_get_slice(u8, buffer, buffer_i, buffer.length)); + unused(buffer_slice); todo(); } break; } @@ -1381,3 +1416,27 @@ fn String format_string(String buffer, const char* format, ...) va_end(args); return result; } + +fn void formatter_append(StringFormatter* formatter, const char* format, ...) +{ + va_list args; + va_start(args, format); + String buffer = s_get_slice(u8, formatter->buffer, formatter->index, formatter->buffer.length); + let(result, format_string_va(buffer, format, args)); + va_end(args); + formatter->index += result.length; +} + +fn void formatter_append_string(StringFormatter* formatter, String string) +{ + assert(string.length + formatter->index <= formatter->buffer.length); + memcpy(formatter->buffer.pointer + formatter->index, string.pointer, string.length); + formatter->index += string.length; +} + +fn void formatter_append_character(StringFormatter* formatter, u8 ch) +{ + assert(formatter->index < formatter->buffer.length); + formatter->buffer.pointer[formatter->index] = ch; + formatter->index += 1; +} diff --git a/bootstrap/std/format.h b/bootstrap/std/format.h index 3e90721..1eeca03 100644 --- a/bootstrap/std/format.h +++ b/bootstrap/std/format.h @@ -1,6 +1,13 @@ #pragma once -#include +STRUCT(StringFormatter) +{ + String buffer; + u64 index; +}; +fn void formatter_append(StringFormatter* formatter, const char* format, ...); +fn void formatter_append_string(StringFormatter* formatter, String string); +fn void formatter_append_character(StringFormatter* formatter, u8 ch); fn String format_string(String buffer, const char* format, ...); fn String format_string_va(String buffer, const char* format, va_list args); diff --git a/bootstrap/std/os.c b/bootstrap/std/os.c index 45a3207..0d1b68d 100644 --- a/bootstrap/std/os.c +++ b/bootstrap/std/os.c @@ -2,9 +2,11 @@ #include #include +#include #include #include +#include #if _WIN32 global_variable u64 cpu_frequency; @@ -774,6 +776,15 @@ fn u64 os_timer_get() #endif } +FileDescriptor os_file_descriptor_invalid() +{ +#if _WIN32 + return INVALID_HANDLE_VALUE; +#else + return -1; +#endif +} + fn u8 os_file_descriptor_is_valid(FileDescriptor fd) { #if _WIN32 @@ -852,6 +863,8 @@ fn void os_file_write(FileDescriptor fd, String content) assert(result != 0); #else let(result, syscall_write(fd, content.pointer, content.length)); + let(my_errno, strerror(errno)); + unused(my_errno); assert(cast_to(u64, result) == content.length); #endif } @@ -937,6 +950,9 @@ fn u8* os_reserve(u64 base, u64 size, OSReserveProtectionFlags protection, OSRes #else int protection_flags = (protection.read * PROT_READ) | (protection.write * PROT_WRITE) | (protection.execute * PROT_EXEC); int map_flags = (map.anon * MAP_ANONYMOUS) | (map.priv * MAP_PRIVATE) | (map.noreserve * MAP_NORESERVE); +#ifdef __linux__ + map_flags |= (map.populate * MAP_POPULATE); +#endif u8* result = (u8*)posix_mmap((void*)base, size, protection_flags, map_flags, -1, 0); assert(result != MAP_FAILED); return result; @@ -963,8 +979,36 @@ fn void os_directory_make(String path) #endif } +fn u8 os_is_being_debugged() +{ + u8 result = 0; +#if _WIN32 + result = IsDebuggerPresent() != 0; +#else +#ifdef __APPLE__ + let(request, PT_TRACE_ME); +#else + let(request, PTRACE_TRACEME); +#endif + if (ptrace(request, 0, 0, 0) == -1) + { + let(error, errno); + if (error == EPERM) + { + result = 1; + } + } +#endif + + return result; +} + BB_NORETURN BB_COLD fn void os_exit(u32 exit_code) { + if (exit_code != 0 && os_is_being_debugged()) + { + trap(); + } exit(exit_code); } @@ -1016,12 +1060,12 @@ fn Arena* arena_initialize_default(u64 initial_size) fn u8* arena_allocate_bytes(Arena* arena, u64 size, u64 alignment) { - u64 aligned_offset = align_forward(arena->position, alignment); + u64 aligned_offset = align_forward_u64(arena->position, alignment); u64 aligned_size_after = aligned_offset + size; if (aligned_size_after > arena->os_position) { - u64 committed_size = align_forward(aligned_size_after, arena->granularity); + u64 committed_size = align_forward_u64(aligned_size_after, arena->granularity); u64 size_to_commit = committed_size - arena->os_position; void* commit_pointer = (u8*)arena + arena->os_position; os_commit(commit_pointer, size_to_commit); @@ -1057,6 +1101,18 @@ fn String arena_join_string(Arena* arena, Slice(String) pieces) return (String) { .pointer = pointer, .length = size }; } +fn String arena_duplicate_string(Arena* arena, String string) +{ + u8* result = arena_allocate(arena, u8, string.length + 1); + memcpy(result, string.pointer, string.length); + result[string.length] = 0; + + return (String) { + .pointer = result, + .length = string.length, + }; +} + fn void arena_reset(Arena* arena) { arena->position = minimum_position; @@ -1105,7 +1161,6 @@ fn String file_read(Arena* arena, String path) fn void file_write(FileWriteOptions options) { - print("Writing file \"{s}\"...\n", options.path); let(fd, os_file_open(options.path, (OSFileOpenFlags) { .write = 1, .truncate = 1, @@ -1122,11 +1177,18 @@ fn void file_write(FileWriteOptions options) os_file_close(fd); } -fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunCommandOptions run_options) +fn RunCommandResult run_command(Arena* arena, CStringSlice arguments, char* envp[], RunCommandOptions run_options) { + unused(arena); assert(arguments.length > 0); assert(arguments.pointer[arguments.length - 1] == 0); + RunCommandResult result = {}; + Timestamp start_timestamp = {}; + Timestamp end_timestamp = {}; + f64 ms = 0.0; + u64 measure_time = run_options.debug; + if (run_options.debug) { print("Running command:\n"); @@ -1139,8 +1201,6 @@ fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunComma } #if _WIN32 - let(start_timestamp, os_timestamp()); - u32 length = 0; for (u32 i = 0; i < arguments.length; i += 1) { @@ -1167,7 +1227,6 @@ fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunComma } } bytes[byte_i - 1] = 0; - let(end_timestamp, os_timestamp()); PROCESS_INFORMATION process_information = {}; STARTUPINFOA startup_info = {}; @@ -1175,14 +1234,22 @@ fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunComma startup_info.dwFlags |= STARTF_USESTDHANDLES; startup_info.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE); startup_info.hStdError = GetStdHandle(STD_ERROR_HANDLE); - let(handle_inheritance, 1); - let(start, os_timestamp()); + + if (measure_time) + { + start_timestamp = os_timestamp(); + } + if (CreateProcessA(0, bytes, 0, 0, handle_inheritance, 0, 0, 0, &startup_info, &process_information)) { WaitForSingleObject(process_information.hProcess, INFINITE); - let(end, os_timestamp()); - let(ms, os_resolve_timestamps(start, end, TIME_UNIT_MILLISECONDS)); + if (measure_time) + { + end_timestamp = os_timestamp(); + ms = os_resolve_timestamps(start_timestamp, end_timestamp, TIME_UNIT_MILLISECONDS); + } + if (run_options.debug) { @@ -1227,74 +1294,179 @@ fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunComma print("CreateProcessA call failed: {cstr}\n", lpMsgBuf); todo(); } - - unused(start_timestamp); - unused(end_timestamp); - unused(envp); #else - unused(arena); - pid_t pid = syscall_fork(); + int null_fd; + if (run_options.use_null_file_descriptor) + { + null_fd = run_options.null_file_descriptor; + assert(os_file_descriptor_is_valid(null_fd)); + } + else if (run_options.stdout_stream.policy == CHILD_PROCESS_STREAM_IGNORE || run_options.stderr_stream.policy == CHILD_PROCESS_STREAM_IGNORE) + { + null_fd = open("/dev/null", O_WRONLY); + assert(os_file_descriptor_is_valid(null_fd)); + } + + int stdout_pipe[2]; + int stderr_pipe[2]; + + if (run_options.stdout_stream.policy == CHILD_PROCESS_STREAM_PIPE && pipe(stdout_pipe) == -1) + { + todo(); + } + + if (run_options.stderr_stream.policy == CHILD_PROCESS_STREAM_PIPE && pipe(stderr_pipe) == -1) + { + todo(); + } + + pid_t pid = syscall_fork(); if (pid == -1) { todo(); } - let(start_timestamp, os_timestamp()); + if (measure_time) + { + start_timestamp = os_timestamp(); + } if (pid == 0) { - // close(pipes[0]); + switch (run_options.stdout_stream.policy) + { + case CHILD_PROCESS_STREAM_PIPE: + { + close(stdout_pipe[0]); + dup2(stdout_pipe[1], STDOUT_FILENO); + close(stdout_pipe[1]); + } break; + case CHILD_PROCESS_STREAM_IGNORE: + { + dup2(null_fd, STDOUT_FILENO); + close(null_fd); + } break; + case CHILD_PROCESS_STREAM_INHERIT: + { + } break; + } + + switch (run_options.stderr_stream.policy) + { + case CHILD_PROCESS_STREAM_PIPE: + { + close(stderr_pipe[0]); + dup2(stderr_pipe[1], STDERR_FILENO); + close(stderr_pipe[1]); + } break; + case CHILD_PROCESS_STREAM_IGNORE: + { + dup2(null_fd, STDERR_FILENO); + close(null_fd); + } break; + case CHILD_PROCESS_STREAM_INHERIT: + { + } break; + } + // fcntl(pipes[1], F_SETFD, FD_CLOEXEC); let(result, syscall_execve(arguments.pointer[0], arguments.pointer, envp)); unused(result); -#if LINK_LIBC panic("Execve failed! Error: {cstr}\n", strerror(errno)); -#else - todo(); -#endif } else { + if (run_options.stdout_stream.policy == CHILD_PROCESS_STREAM_PIPE) + { + close(stdout_pipe[1]); + } + + if (run_options.stderr_stream.policy == CHILD_PROCESS_STREAM_PIPE) + { + close(stderr_pipe[1]); + } + + if (run_options.stdout_stream.policy == CHILD_PROCESS_STREAM_PIPE) + { + assert(run_options.stdout_stream.capacity); + ssize_t byte_count = read(stdout_pipe[0], run_options.stdout_stream.buffer, run_options.stdout_stream.capacity); + assert(byte_count >= 0); + *run_options.stdout_stream.length = byte_count; + + close(stdout_pipe[0]); + } + + if (run_options.stderr_stream.policy == CHILD_PROCESS_STREAM_PIPE) + { + assert(run_options.stderr_stream.capacity); + ssize_t byte_count = read(stderr_pipe[0], run_options.stderr_stream.buffer, run_options.stderr_stream.capacity); + assert(byte_count >= 0); + *run_options.stderr_stream.length = byte_count; + + close(stderr_pipe[0]); + } + int status = 0; int options = 0; - pid_t result = syscall_waitpid(pid, &status, options); - let(end_timestamp, os_timestamp()); - int success = 0; - if (result == pid) + pid_t waitpid_result = syscall_waitpid(pid, &status, options); + + if (measure_time) + { + end_timestamp = os_timestamp(); + } + + if (waitpid_result == pid) { if (run_options.debug) { print("{cstr} ", arguments.pointer[0]); - - if (WIFEXITED(status)) - { - let(exit_code, WEXITSTATUS(status)); - print("exited with code {u32}\n", exit_code); - } - else if (WIFSIGNALED(status)) - { - let(signal_code, WTERMSIG(status)); - print("was signaled: {u32}\n", signal_code); - } - else if (WIFSTOPPED(status)) - { - let(stopped_code, WSTOPSIG(status)); - print("was stopped: {u32}\n", stopped_code); - } - else - { - print("terminated unexpectedly with status {u32}\n", status); - } } if (WIFEXITED(status)) { let(exit_code, WEXITSTATUS(status)); - success = exit_code == 0; + result.termination_code = exit_code; + result.termination_kind = PROCESS_TERMINATION_EXIT; + + if (run_options.debug) + { + print("exited with code {u32}\n", exit_code); + } + } + else if (WIFSIGNALED(status)) + { + let(signal_code, WTERMSIG(status)); + result.termination_code = signal_code; + result.termination_kind = PROCESS_TERMINATION_SIGNAL; + + if (run_options.debug) + { + print("was signaled: {u32}\n", signal_code); + } + } + else if (WIFSTOPPED(status)) + { + let(stop_code, WSTOPSIG(status)); + result.termination_code = stop_code; + result.termination_kind = PROCESS_TERMINATION_STOP; + + if (run_options.debug) + { + print("was stopped: {u32}\n", stop_code); + } + } + else + { + result.termination_kind = PROCESS_TERMINATION_UNKNOWN; + + if (run_options.debug) + { + print("terminated unexpectedly with status {u32}\n", status); + } } } - else if (result == -1) + else if (waitpid_result == -1) { let(waitpid_error, errno); print("Error waiting for process termination: {u32}\n", waitpid_error); @@ -1305,42 +1477,25 @@ fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunComma todo(); } - if (!success) + let(success, result.termination_kind == PROCESS_TERMINATION_EXIT && result.termination_code == 0); + if (run_options.debug && !success) { - print("Program failed to run successfully!\n"); - failed_execution(); + print("{cstr} failed to run successfully!\n", arguments.pointer[0]); } if (run_options.debug) { - let(ms, os_resolve_timestamps(start_timestamp, end_timestamp, TIME_UNIT_MILLISECONDS)); + ms = os_resolve_timestamps(start_timestamp, end_timestamp, TIME_UNIT_MILLISECONDS); u32 ticks = 0; #if LINK_LIBC == 0 ticks = cpu_frequency != 0; #endif - print("Command run successfully in {f64} {cstr}\n", ms, ticks ? "ticks" : "ms"); + print("Command run {cstr} in {f64} {cstr}\n", success ? "successfully" : "with errors", ms, ticks ? "ticks" : "ms"); } - } -#endif -} -fn u8 os_is_being_debugged() -{ - u8 result = 0; -#if _WIN32 - result = IsDebuggerPresent() != 0; -#else -#ifdef __APPLE__ - let(request, PT_TRACE_ME); -#else - let(request, PTRACE_TRACEME); -#endif - if (ptrace(request, 0, 0, 0) == -1) - { - let(error, errno); - if (error == EPERM) + if (!run_options.use_null_file_descriptor && os_file_descriptor_is_valid(null_fd)) { - result = 1; + close(null_fd); } } #endif @@ -1462,3 +1617,85 @@ fn u8 os_library_is_valid(OSLibrary library) { return library.handle != 0; } + +fn String file_find_in_path(Arena* arena, String file, String path_env, String extension) +{ + String result = {}; + assert(path_env.pointer); + + String path_it = path_env; + u8 buffer[4096]; + +#if _WIN32 + u8 env_path_separator = ';'; + u8 path_separator = '\\'; +#else + u8 env_path_separator = ':'; + u8 path_separator = '/'; +#endif + + while (path_it.length) + { + let(index, string_first_ch(path_it, env_path_separator)); + index = unlikely(index == STRING_NO_MATCH) ? path_it.length : index; + let(path_chunk, s_get_slice(u8, path_it, 0, index)); + + u64 i = 0; + + memcpy(&buffer[i], path_chunk.pointer, path_chunk.length); + i += path_chunk.length; + + buffer[i] = path_separator; + i += 1; + + memcpy(&buffer[i], file.pointer, file.length); + i += file.length; + + if (extension.length) + { + memcpy(&buffer[i], extension.pointer, extension.length); + i += extension.length; + } + + buffer[i] = 0; + i += 1; + + let(total_length, i - 1); + OSFileOpenFlags flags = { + .read = 1, + }; + OSFilePermissions permissions = { + .readable = 1, + .writable = 1, + }; + + String path = { .pointer = buffer, .length = total_length }; + + FileDescriptor fd = os_file_open(path, flags, permissions); + + if (os_file_descriptor_is_valid(fd)) + { + os_file_close(fd); + result.pointer = arena_allocate(arena, u8, total_length + 1); + memcpy(result.pointer, buffer, total_length + 1); + result.length = total_length; + break; + } + + String new_path = s_get_slice(u8, path_it, index + (index != path_it.length), path_it.length); + assert(new_path.length < path_env.length); + path_it = new_path; + } + + return result; +} + +fn String executable_find_in_path(Arena* arena, String executable, String path_env) +{ + String extension = {}; +#if _WIN32 + extension = strlit(".exe"); +#endif + return file_find_in_path(arena, executable, path_env, extension); +} + diff --git a/bootstrap/std/os.h b/bootstrap/std/os.h index 541881a..fb4879f 100644 --- a/bootstrap/std/os.h +++ b/bootstrap/std/os.h @@ -27,9 +27,43 @@ typedef enum TimeUnit TIME_UNIT_SECONDS, } TimeUnit; +ENUM(ProcessTerminationKind, u8, + PROCESS_TERMINATION_UNKNOWN, + PROCESS_TERMINATION_EXIT, + PROCESS_TERMINATION_SIGNAL, + PROCESS_TERMINATION_STOP, +); + +STRUCT(RunCommandResult) +{ + u32 termination_code; + ProcessTerminationKind termination_kind; + u8 reserved[3]; +}; + +typedef enum ChildProcessStreamPolicy +{ + CHILD_PROCESS_STREAM_INHERIT, + CHILD_PROCESS_STREAM_PIPE, + CHILD_PROCESS_STREAM_IGNORE, +} ChildProcessStreamPolicy; + +STRUCT(ChildProcessStream) +{ + u8* buffer; + u32* length; + u32 capacity; + ChildProcessStreamPolicy policy; +}; + STRUCT(RunCommandOptions) { + ChildProcessStream stdout_stream; + ChildProcessStream stderr_stream; + FileDescriptor null_file_descriptor; + u64 use_null_file_descriptor:1; u64 debug:1; + u64 reserved:62; }; STRUCT(Timestamp) @@ -66,6 +100,7 @@ STRUCT(OSReserveMapFlags) { u32 priv:1; u32 anon:1; + u32 populate:1; u32 noreserve:1; u32 reserved:29; }; @@ -99,7 +134,7 @@ global_variable u64 default_size = GB(4); fn void vprint(const char* format, va_list args); fn void print(const char* format, ...); -fn void run_command(Arena* arena, CStringSlice arguments, char* envp[], RunCommandOptions options); +fn RunCommandResult run_command(Arena* arena, CStringSlice arguments, char* envp[], RunCommandOptions options); fn String file_read(Arena* arena, String path); fn void file_write(FileWriteOptions options); diff --git a/bootstrap/std/project.h b/bootstrap/std/project.h index 8f53275..1bea07d 100644 --- a/bootstrap/std/project.h +++ b/bootstrap/std/project.h @@ -31,7 +31,7 @@ fn u8 rendering_backend_is_valid(RenderingBackend rendering_backend) { #ifdef __linux__ valid = rendering_backend == RENDERING_BACKEND_VULKAN; -#elif __APPLE__ +#elif defined(__APPLE__) valid = rendering_backend == RENDERING_BACKEND_METAL || rendering_backend == RENDERING_BACKEND_VULKAN; #elif _WIN32 valid = rendering_backend == RENDERING_BACKEND_DIRECTX12 || rendering_backend == RENDERING_BACKEND_VULKAN; diff --git a/bootstrap/std/virtual_buffer.c b/bootstrap/std/virtual_buffer.c index d646e0f..7df3de0 100644 --- a/bootstrap/std/virtual_buffer.c +++ b/bootstrap/std/virtual_buffer.c @@ -1,5 +1,4 @@ -#include -#include +#pragma once fn void vb_generic_ensure_capacity(VirtualBuffer(u8)* vb, u32 item_size, u32 item_count) { @@ -13,8 +12,8 @@ fn void vb_generic_ensure_capacity(VirtualBuffer(u8)* vb, u32 item_size, u32 ite vb->pointer = os_reserve(0, item_size * UINT32_MAX, (OSReserveProtectionFlags) {}, (OSReserveMapFlags) { .priv = 1, .anon = 1, .noreserve = 1 }); } - let_cast(u32, old_page_capacity, align_forward(old_capacity * item_size, minimum_granularity)); - let_cast(u32, new_page_capacity, align_forward(wanted_capacity * item_size, minimum_granularity)); + let_cast(u32, old_page_capacity, align_forward_u64(old_capacity * item_size, minimum_granularity)); + let_cast(u32, new_page_capacity, align_forward_u64(wanted_capacity * item_size, minimum_granularity)); let(commit_size, new_page_capacity - old_page_capacity); void* commit_pointer = vb->pointer + old_page_capacity; @@ -49,11 +48,13 @@ fn u8* vb_append_bytes(VirtualBuffer(u8*) vb, Slice(u8) bytes) return pointer; } -fn void vb_copy_string(VirtualBuffer(u8)* buffer, String string) +fn u32 vb_copy_string(VirtualBuffer(u8)* buffer, String string) { + let(offset, buffer->length); let_cast(u32, length, string.length); let(pointer, vb_add(buffer, length)); memcpy(pointer, string.pointer, length); + return offset; } fn u64 vb_copy_string_zero_terminated(VirtualBuffer(u8)* buffer, String string) @@ -65,3 +66,23 @@ fn u64 vb_copy_string_zero_terminated(VirtualBuffer(u8)* buffer, String string) return string.length; } + +fn void vb_copy_byte_repeatedly(VirtualBuffer(u8)* buffer, u8 byte, u32 times) +{ + u8* ptr = vb_generic_add(buffer, 1, times); + memset(ptr, byte, times); +} + +fn u64 vb_format(VirtualBuffer(u8)* vb, const char* format, ...) +{ + u8 buffer[4096]; + va_list args; + va_start(args, format); + let(result, format_string_va((String)array_to_slice(buffer), format, args)); + va_end(args); + + assert(result.length <= array_length(buffer)); + vb_copy_string(vb, result); + + return result.length; +} diff --git a/bootstrap/std/virtual_buffer.h b/bootstrap/std/virtual_buffer.h index 5de5eac..cf0e2e3 100644 --- a/bootstrap/std/virtual_buffer.h +++ b/bootstrap/std/virtual_buffer.h @@ -49,5 +49,6 @@ fn void vb_generic_ensure_capacity(VirtualBuffer(u8)* vb, u32 item_size, u32 ite fn u8* vb_generic_add_assume_capacity(VirtualBuffer(u8)* vb, u32 item_size, u32 item_count); fn u8* vb_generic_add(VirtualBuffer(u8)* vb, u32 item_size, u32 item_count); fn u8* vb_append_bytes(VirtualBuffer(u8*) vb, Slice(u8) bytes); -fn void vb_copy_string(VirtualBuffer(u8)* buffer, String string); +fn u32 vb_copy_string(VirtualBuffer(u8)* buffer, String string); fn u64 vb_copy_string_zero_terminated(VirtualBuffer(u8)* buffer, String string); +fn u64 vb_format(VirtualBuffer(u8)* vb, const char* format, ...); diff --git a/bootstrap/std/vulkan_rendering.c b/bootstrap/std/vulkan_rendering.c index 0bf5ffb..a14300a 100644 --- a/bootstrap/std/vulkan_rendering.c +++ b/bootstrap/std/vulkan_rendering.c @@ -312,7 +312,6 @@ fn void buffer_copy_to_host(VulkanBuffer buffer, Slice(HostBufferCopy) regions) let(region, regions.pointer[i]); let(destination, buffer_pointer + region.destination_offset); assert(destination + region.source.length <= (u8*)buffer.address + buffer.size); -#define USE_MEMCPY 1 #if USE_MEMCPY memcpy(destination, region.source.pointer, region.source.length); #else diff --git a/build.bat b/build.bat index c0a732e..4a037b9 100644 --- a/build.bat +++ b/build.bat @@ -22,10 +22,10 @@ mkdir %BUILD_DIR% > NUL 2>&1 set BUILD_OUT=cache\build.exe set BB_ERROR_ON_WARNINGS=%BB_CI% -if "%BB_CI%" == "0" ( - %VK_SDK_PATH%\Bin\glslangValidator.exe -V bootstrap\std\shaders\rect.vert -o cache\rect.vert.spv --quiet || exit /b 1 - %VK_SDK_PATH%\Bin\glslangValidator.exe -V bootstrap\std\shaders\rect.frag -o cache\rect.frag.spv --quiet || exit /b 1 -) +REM if "%BB_CI%" == "0" ( +REM %VK_SDK_PATH%\Bin\glslangValidator.exe -V bootstrap\std\shaders\rect.vert -o cache\rect.vert.spv --quiet || exit /b 1 +REM %VK_SDK_PATH%\Bin\glslangValidator.exe -V bootstrap\std\shaders\rect.frag -o cache\rect.frag.spv --quiet || exit /b 1 +REM ) cl /Zi /Y- /Gm- /std:clatest /diagnostics:caret -FC /nologo build.c /Fd%BUILD_DIR%\ /Fo%BUILD_DIR%\ /Fe%BUILD_OUT% -Ibootstrap -DBB_TIMETRACE=0 -DBB_BUILD_TYPE=\"%BB_BUILD_TYPE%\" -DBB_CI=%BB_CI% -DBB_ERROR_ON_WARNINGS=%BB_ERROR_ON_WARNINGS% -DBB_ERROR_LIMIT=%BB_ERROR_LIMIT% /link /INCREMENTAL:NO || exit /b 1 diff --git a/build.c b/build.c index 5516ffe..bb0c4f1 100644 --- a/build.c +++ b/build.c @@ -97,79 +97,6 @@ global_variable char* compiler_switches[COMPILER_ARGUMENT_STYLE_COUNT][COMPILER_ }, }; -fn String file_find_in_path(Arena* arena, String file, String path_env, String extension) -{ - String result = {}; - assert(path_env.pointer); - - String path_it = path_env; - u8 buffer[4096]; - -#if _WIN32 - u8 env_path_separator = ';'; - u8 path_separator = '\\'; -#else - u8 env_path_separator = ':'; - u8 path_separator = '/'; -#endif - - while (path_it.length) - { - let(index, string_first_ch(path_it, env_path_separator)); - index = unlikely(index == STRING_NO_MATCH) ? path_it.length : index; - let(path_chunk, s_get_slice(u8, path_it, 0, index)); - - u64 i = 0; - - memcpy(&buffer[i], path_chunk.pointer, path_chunk.length); - i += path_chunk.length; - - buffer[i] = path_separator; - i += 1; - - memcpy(&buffer[i], file.pointer, file.length); - i += file.length; - - if (extension.length) - { - memcpy(&buffer[i], extension.pointer, extension.length); - i += extension.length; - } - - buffer[i] = 0; - i += 1; - - let(total_length, i - 1); - OSFileOpenFlags flags = { - .read = 1, - }; - OSFilePermissions permissions = { - .readable = 1, - .writable = 1, - }; - - String path = { .pointer = buffer, .length = total_length }; - - FileDescriptor fd = os_file_open(path, flags, permissions); - - if (os_file_descriptor_is_valid(fd)) - { - os_file_close(fd); - result.pointer = arena_allocate(arena, u8, total_length + 1); - memcpy(result.pointer, buffer, total_length + 1); - result.length = total_length; - break; - } - - String new_path = s_get_slice(u8, path_it, index + (index != path_it.length), path_it.length); - assert(new_path.length < path_env.length); - path_it = new_path; - } - - return result; -} - - fn C_Compiler c_compiler_from_path(String path) { C_Compiler result = C_COMPILER_COUNT; @@ -234,7 +161,7 @@ fn String c_compiler_to_string(C_Compiler c_compiler) } // Returns the absolute path of a C compiler -fn String get_c_compiler_path(Arena* arena) +fn String get_c_compiler_path(Arena* arena, BuildType build_type) { String cc_path = {}; String cc_env = os_get_environment_variable("CC"); @@ -247,12 +174,6 @@ fn String get_c_compiler_path(Arena* arena) { cc_path = cc_env; } -#ifndef _WIN32 - else - { - cc_path = file_find_in_path(arena, strlit("cc"), path_env, extension); - } -#endif if (!cc_path.pointer) { @@ -307,6 +228,14 @@ fn String get_c_compiler_path(Arena* arena) } } + if (!BB_CI) + { + if (build_type != BUILD_TYPE_DEBUG) + { + return strlit("/usr/lib/llvm18/bin/clang-18"); + } + } + return cc_path; } @@ -485,6 +414,13 @@ fn void compile_program(Arena* arena, CompileOptions options) add_arg("/nologo"); } + u8 llvm_mca = 0; + if (llvm_mca) + { + add_arg("-S"); + add_arg("-masm=intel"); + } + #if __APPLE__ add_arg("-x"); add_arg("objective-c"); @@ -492,26 +428,45 @@ fn void compile_program(Arena* arena, CompileOptions options) add_arg(string_to_c(options.source_path)); - if (c_compiler == C_COMPILER_MSVC) + switch (c_compiler) { - String strings[] = { - strlit("/Fe"), - options.output_path, - }; - String arg = arena_join_string(arena, (Slice(String))array_to_slice(strings)); - add_arg(string_to_c(arg)); + case C_COMPILER_MSVC: + { + String strings[] = { + strlit("/Fe"), + options.output_path, + }; + String arg = arena_join_string(arena, (Slice(String))array_to_slice(strings)); + add_arg(string_to_c(arg)); - add_arg("/Fo" BUILD_DIR "\\"); - add_arg("/Fd" BUILD_DIR "\\"); + add_arg("/Fo" BUILD_DIR "\\"); + add_arg("/Fd" BUILD_DIR "\\"); + } break; + case C_COMPILER_GCC: + { + } break; + case C_COMPILER_CLANG: + { + // add_arg("-working-directory"); + // add_arg(BUILD_DIR); + // add_arg("-save-temps"); + } break; + default: break; } - else + + if (c_compiler != C_COMPILER_MSVC) { add_arg("-o"); add_arg(string_to_c(options.output_path)); } +#ifdef __linux__ + add_arg("-fuse-ld=mold"); +#endif + add_arg("-Ibootstrap"); add_arg("-Idependencies/stb"); + add_arg("-I" BUILD_DIR); // Include the build dir for generated files char* c_include_path = getenv("C_INCLUDE_PATH"); if (c_include_path) @@ -536,7 +491,7 @@ fn void compile_program(Arena* arena, CompileOptions options) add_arg(string_to_c(arg)); } - let(debug_info, options.build_type != BUILD_TYPE_RELEASE_SMALL); + let(debug_info, options.build_type != BUILD_TYPE_RELEASE_SMALL && !llvm_mca); if (debug_info) { add_arg(compiler_switches[c_compiler == C_COMPILER_MSVC][COMPILER_SWITCH_DEBUG_INFO]); @@ -551,12 +506,34 @@ fn void compile_program(Arena* arena, CompileOptions options) { case BUILD_TYPE_COUNT: unreachable(); case BUILD_TYPE_DEBUG: - case BUILD_TYPE_RELEASE_SAFE: add_arg("-DBB_DEBUG=1"); add_arg("-D_DEBUG=1"); break; + case BUILD_TYPE_RELEASE_SAFE: + { + add_arg("-DBB_DEBUG=1"); + add_arg("-D_DEBUG=1"); + } break; case BUILD_TYPE_RELEASE_FAST: - case BUILD_TYPE_RELEASE_SMALL: add_arg("-DBB_DEBUG=0"); add_arg("-DNDEBUG=1"); break; + case BUILD_TYPE_RELEASE_SMALL: + { + add_arg("-DBB_DEBUG=0"); + add_arg("-DNDEBUG=1"); + if (c_compiler != C_COMPILER_MSVC) + { + add_arg("-fno-stack-protector"); + } + } break; } - // Inmutable options + // TODO: careful. If handing binaries built by CI to people, we need to be specially careful about this + if (c_compiler == C_COMPILER_MSVC) + { + add_arg("/arch:AVX512"); + } + else + { + add_arg("-march=native"); + } + + // Immutable options switch (c_compiler) { case C_COMPILER_MSVC: @@ -566,6 +543,7 @@ fn void compile_program(Arena* arena, CompileOptions options) add_arg("/WX"); #endif add_arg("/wd4255"); + add_arg("/J"); } break; default: { @@ -577,15 +555,18 @@ fn void compile_program(Arena* arena, CompileOptions options) add_arg("-Wno-nested-anon-types"); add_arg("-Wno-keyword-macro"); add_arg("-Wno-gnu-auto-type"); + add_arg("-Wno-gnu-binary-literal"); #ifndef __APPLE__ add_arg("-Wno-auto-decl-extensions"); #endif add_arg("-Wno-gnu-empty-initializer"); add_arg("-Wno-fixed-enum-extension"); + add_arg("-Wno-overlength-strings"); #if BB_ERROR_ON_WARNINGS add_arg("-Werror"); #endif + add_arg("-fno-signed-char"); add_arg("-fno-strict-aliasing"); add_arg("-fwrapv"); } break; @@ -693,6 +674,94 @@ fn void compile_program(Arena* arena, CompileOptions options) #ifndef _WIN32 add_arg("-lm"); + + String path_env = cstr(getenv("PATH")); + String llvm_config_path = executable_find_in_path(arena, strlit("llvm-config"), path_env); + u8 buffer[16*1024]; + u32 length = 0; + char* llvm_config_c = string_to_c(llvm_config_path); + { + char* arguments[] = { + llvm_config_c, + "--components", + 0, + }; + RunCommandOptions run_options = { + .stdout_stream = { + .buffer = buffer, + .length = &length, + .capacity = sizeof(buffer), + .policy = CHILD_PROCESS_STREAM_PIPE, + }, + .debug = options.flags.debug, + }; + RunCommandResult result = run_command(arena, (CStringSlice)array_to_slice(arguments), environment_pointer, run_options); + let(success, result.termination_kind == PROCESS_TERMINATION_EXIT && result.termination_code == 0); + if (!success) + { + os_exit(1); + } + } + + { + char* argv_buffer[4096]; + argv_buffer[0] = llvm_config_c; + argv_buffer[1] = "--libs"; + u32 arg_i = 2; + + String llvm_components = { .pointer = buffer, .length = length }; + u32 i = 0; + while (i < length) + { + String slice = s_get_slice(u8, llvm_components, i, llvm_components.length); + u64 space_index = string_first_ch(slice, ' '); + u8 there_is_space = space_index != STRING_NO_MATCH; + u64 argument_length = unlikely(there_is_space) ? space_index : slice.length; + + String argument_slice = s_get_slice(u8, slice, 0, argument_length - !there_is_space); + argv_buffer[arg_i] = string_to_c(arena_duplicate_string(arena, argument_slice)); + arg_i += 1; + + i += argument_length + there_is_space; + } + + argv_buffer[arg_i] = 0; + arg_i += 1; + + length = 0; + + RunCommandOptions run_options = { + .stdout_stream = { + .buffer = buffer, + .length = &length, + .capacity = sizeof(buffer), + .policy = CHILD_PROCESS_STREAM_PIPE, + }, + .debug = options.flags.debug, + }; + CStringSlice arguments = { .pointer = argv_buffer, .length = arg_i }; + RunCommandResult result = run_command(arena, arguments, environment_pointer, run_options); + let(success, result.termination_kind == PROCESS_TERMINATION_EXIT && result.termination_code == 0); + if (!success) + { + os_exit(1); + } + + i = 0; + + while (i < length) + { + String slice = s_get_slice(u8, llvm_components, i, llvm_components.length); + u64 space_index = string_first_ch(slice, ' '); + u8 there_is_space = space_index != STRING_NO_MATCH; + u64 argument_length = unlikely(there_is_space) ? space_index : slice.length; + + String argument_slice = s_get_slice(u8, slice, 0, argument_length - !there_is_space); + add_arg(string_to_c(arena_duplicate_string(arena, argument_slice))); + + i += argument_length + there_is_space; + } + } #endif switch (options.windowing_backend) @@ -748,21 +817,1707 @@ fn void compile_program(Arena* arena, CompileOptions options) RunCommandOptions run_options = { .debug = options.flags.debug, }; - run_command(arena, arguments, environment_pointer, run_options); + RunCommandResult result = run_command(arena, arguments, environment_pointer, run_options); + let(success, result.termination_kind == PROCESS_TERMINATION_EXIT && result.termination_code == 0); + if (!success) + { + os_exit(1); + } +} + +STRUCT(Load) +{ + u64 mask; + u8 index; + u8 size; +}; +decl_vb(Load); +declare_slice(Load); + +STRUCT(Combine) +{ + Slice(Load) loads; + u64 size; +}; + +STRUCT(Merge) +{ + Load values[2]; + u8 is_valid[2]; +}; + +typedef enum ProgramId +{ + PROGRAM_MERGE, + PROGRAM_COMBINE, + PROGRAM_LOAD, +} ProgramId; + +STRUCT(Program) +{ + union + { + Combine combine; + Load load; + Merge merge; + }; + ProgramId id; +}; + +STRUCT(Lookup) +{ + Slice(s32) indices; + SliceP(u8) words; +}; + +declare_slice(SliceP(u8)); +declare_slice(SliceP(void)); + +fn u64 pext(u64 w, u64 m) +{ + u64 result = 0; + u64 bit = 1; + + while (w != 0) + { + if ((m & 1) == 1) + { + if ((w & 1) == 1) + { + result |= bit; + } + + bit <<= 1; + } + + w >>= 1; + m >>= 1; + } + + return result; +} + +fn void n_word_mask(SliceP(u8) words, u8* mask, u8 length) +{ + for (u8 i = 0; i < length; i += 1) + { + mask[i] = 0xff; + } + + for (u8 byte = 0; byte < length; byte += 1) + { + for (u8 bit = 0; bit < 8; bit += 1) + { + u8 old = mask[byte]; + mask[byte] &= ~(u8)(1 << bit); + + u8 map[16*16][16] = {}; + u32 map_item_count = 0; + u8 candidate[16] = {}; + + for (u64 word_index = 0; word_index < words.length; word_index += 1) + { + let(word, words.pointer[word_index]); + for (u8 mask_index = 0; mask_index < length; mask_index += 1) + { + candidate[mask_index] = word[mask_index] & mask[mask_index]; + } + + u8 map_index; + for (map_index = 0; map_index < map_item_count; map_index += 1) + { + if (memcmp(map[map_index], candidate, length) == 0) + { + break; + } + } + + if (map_index != map_item_count) + { + mask[byte] = old; + break; + } + + memcpy(map[map_item_count], candidate, length); + map_item_count += 1; + } + } + } +} + +fn u64 program_lookup_size(Program program) +{ + u64 n = 0; + + switch (program.id) + { + case PROGRAM_COMBINE: + { + for (u64 i = 0; i < program.combine.loads.length; i += 1) + { + n += __builtin_popcountll(program.combine.loads.pointer[i].mask); + } + } break; + case PROGRAM_LOAD: + { + n = __builtin_popcountll(program.load.mask); + } break; + case PROGRAM_MERGE: todo(); + } + + return n; +} + +fn u64 load_load(Load load, u8* word) +{ + u64 result; + + switch (load.size) + { + case 1: result = word[load.index]; break; + case 2: result = *(u16*)&word[load.index]; break; + case 4: result = *(u32*)&word[load.index]; break; + case 8: result = *(u64*)&word[load.index]; break; + default: unreachable(); + } + + return result; +} + +fn u64 program_evaluate(Program program, u8* word) +{ + u64 result; + + switch (program.id) + { + case PROGRAM_COMBINE: + { + u64 q = 0; + u64 m = 0; + u64 shift = 0; + + for (u64 i = 0; i < program.combine.loads.length; i += 1) + { + Load load = program.combine.loads.pointer[i]; + let(qi, load_load(load, word)); + let(mi, load.mask); + + q |= qi << shift; + m |= mi << shift; + + shift += 8 * load.size; + } + + result = pext(q, m); + } break; + case PROGRAM_LOAD: + { + let(q, load_load(program.load, word)); + result = pext(q, program.load.mask); + } break; + case PROGRAM_MERGE: todo(); + } + + return result; +} + +fn Slice(s32) pdep_lookup(Arena* arena, Program program, SliceP(u8) words) +{ + let(length, 1 << program_lookup_size(program)); + let(result, arena_allocate(arena, s32, length)); + for (u64 i = 0; i < length; i += 1) + { + result[i] = -1; + } + + for (u64 i = 0; i < words.length; i += 1) + { + let(value, program_evaluate(program, words.pointer[i])); + result[value] = i; + } + + return (Slice(s32)) { .pointer = result, .length = length }; +} + +fn u8 load_trim(Load in, Load* out) +{ + u8 result = 0; + Load l; + switch (in.size) + { + case 8: + { + todo(); + } break; + case 4: + { + if ((in.mask & 0xffff) == 0) + { + l = (Load) { + .index = in.index + 4, + .size = 4, + .mask = in.mask >> 32, + }; + } + } break; + case 2: + { + if ((in.mask & 0xff) == 0) + { + todo(); + } + } break; + } + + if (result) + { + *out = l; + } + + return result; +} + +fn u8 can_merge(Load* load, u8* is_valid) +{ + if (!is_valid[0] || !is_valid[1]) + { + return 0; + } + + if ((load[0].mask & load[1].mask) == 0) + { + return 1; + } + + return 0; +} + +fn u8 new_merge(Load* loads, u8* is_valid, Merge* out) +{ + u8 result = 0; + + if (can_merge(loads, is_valid)) + { + result = 1; + *out = (Merge) + { + .values = { loads[0], loads[1] }, + .is_valid = { is_valid[0], is_valid[1] }, + }; + } + + return result; +} + +fn Program compile_mask(u8* mask, u8 mask_length) +{ + Program result = {}; + VirtualBuffer(Load) loads = {}; + const u8 load_sizes[] = {8, 4, 2, 1}; + + while (1) + { + u32 active = 0; + for (u8 i = 0; i < mask_length; i += 1) + { + active += mask[i] != 0; + } + + if (active == 0) + { + break; + } + + if (active == 1) + { + for (u8 i = 0; i < mask_length; i += 1) + { + u8 mask_byte = mask[i]; + if (mask_byte != 0) + { + *vb_add(&loads, 1) = (Load) { + .index = i, + .size = 1, + .mask = mask_byte, + }; + break; + } + } + + break; + } + + for (u8 size_index = 0; size_index < array_length(load_sizes); size_index += 1) + { + u8 size = load_sizes[size_index]; + if (size > mask_length) + { + continue; + } + + u8 best_count = 0; + u8 best_index = 0; + + for (u8 i = 0; i < mask_length - size + 1; i += 1) + { + u8 k = 0; + for (u8 mask_i = 0; mask_i < size; mask_i += 1) + { + k += mask[mask_i + i] != 0; + } + + if (k > best_count) + { + best_count = k; + best_index = i; + } + } + + if (best_count > 0) + { + Load load = { + .index = best_index, + .size = size, + }; + + for (u8 i = 0; i < size; i += 1) + { + load.mask |= (u64)mask[best_index + i] << (i * 8); + } + + *vb_add(&loads, 1) = load; + + for (u8 i = 0; i < size; i += 1) + { + mask[best_index + i] = 0; + } + + break; + } + } + } + + if (loads.length == 1) + { + while (1) + { + Load l; + if (!load_trim(loads.pointer[0], &l)) + { + break; + } + + loads.pointer[0] = l; + } + + return (Program) { + .load = loads.pointer[0], + .id = PROGRAM_LOAD, + }; + } + else if (loads.length == 2) + { + let(first, loads.pointer[0]); + let(second, loads.pointer[1]); + + let(trimmed, first); + u8 trimmed_is_valid = 1; + Merge merge_memory; + Load load_memory; + Merge merge = {}; + u8 merge_is_valid = 0; + + while (trimmed_is_valid) + { + Load merge_loads[2] = { trimmed, second }; + u8 is_valid[2] = { 1, 1 }; + Merge merge_candidate = {}; + if (new_merge(merge_loads, is_valid, &merge_candidate)) + { + merge = merge_candidate; + } + + trimmed_is_valid = load_trim(trimmed, &load_memory); + if (trimmed_is_valid) + { + trimmed = load_memory; + } + } + + if (merge_is_valid) + { + todo(); + } + } + + u64 total = 0; + for (u64 i = 0; i < loads.length; i += 1) + { + total += loads.pointer[i].size; + } + + u64 size = total < 4 ? 4 : 8; + result = (Program) { + .combine = { + .loads = { .pointer = loads.pointer, .length = loads.length }, + .size = size, + }, + .id = PROGRAM_COMBINE, + }; + return result; +} + +typedef enum TypeKind +{ + TYPE_KIND_U16, +} TypeKind; + +STRUCT(PerfectHashArguments) +{ + VirtualBuffer(u8)* file_h; + VirtualBuffer(u8)* file_c; + Slice_SliceP_u8 words_by_length; + u8* mask; + Lookup* lookups; + Program* programs; + Arena* arena; + String kind; + void** values_by_length; + TypeKind value_type; +}; + +fn void vb_indent(VirtualBuffer(u8)* buffer, u32 indentation_level) +{ + if (likely(indentation_level > 0)) + { + vb_copy_byte_repeatedly(buffer, ' ', 4 * indentation_level); + } +} + +fn String type_from_size(u8 size) +{ + String type; + switch (size) + { + case 8: type = strlit("u64"); break; + case 4: type = strlit("u32"); break; + case 2: type = strlit("u16"); break; + case 1: type = strlit("u8"); break; + default: unreachable(); + } + + return type; +} + +fn void load_write(Load load, VirtualBuffer(u8)* buffer, u32 load_name_index, u32 indentation_level, u32 length) +{ + String load_type = type_from_size(load.size); + + vb_indent(buffer, indentation_level); + vb_format(buffer, "{s} v{u32}_{u32};\n", load_type, load_name_index, length); + + vb_indent(buffer, indentation_level); + vb_format(buffer, "memcpy(&v{u32}_{u32}, &string_pointer[{u32}], {u32});\n", load_name_index, length, load.index, load.size); +} + +fn void program_write(VirtualBuffer(u8)* buffer, Program program, u32 indentation_level, u32 length) +{ + switch (program.id) + { + case PROGRAM_LOAD: + { + // Write the load + Load load = program.load; + u32 load_name_index = 0; + + load_write(load, buffer, load_name_index, indentation_level, length); + + // Write result + vb_indent(buffer, indentation_level); + vb_format(buffer, "u64 index_{u32} = _pext_u64(v{u32}_{u32}, 0x{u64:x});\n", length, load_name_index, length, load.mask); + } break; + case PROGRAM_COMBINE: + { + Combine combine = program.combine; + String combine_type_string = type_from_size(combine.size); + vb_indent(buffer, indentation_level); + vb_format(buffer, "{s} v_{u32} = 0;\n", combine_type_string, length); + + u64 shift = 0; + u64 mask = 0; + + for (u64 i = 0; i < combine.loads.length; i += 1) + { + Load load = combine.loads.pointer[i]; + mask |= load.mask << shift; + + load_write(load, buffer, (u32)i, indentation_level, length); + + vb_indent(buffer, indentation_level); + vb_format(buffer, "v_{u32} |= ({s})(v{u64}_{u32}) << {u64};\n", length, combine_type_string, i, length, shift); + + shift += 8 * load.size; + } + + // Write result + vb_indent(buffer, indentation_level); + vb_format(buffer, "u64 index_{u32} = _pext_u64(v_{u32}, 0x{u64:x});\n", length, length, mask); + } break; + case PROGRAM_MERGE: + { + todo(); + } break; + } + + // vb_indent(buffer, indentation_level); + // vb_format(buffer, "const char* word_{u32} = words_{u32}[index_{u32}];\n", length, length, length); + // vb_indent(buffer, indentation_level); + // vb_format(buffer, "let(value_{u32}, values_{u32}[index_{u32}]);\n", length, length, length); +} + +fn void perfect_hash_generate(PerfectHashArguments arguments) +{ + VirtualBuffer(u8)* h = arguments.file_h; + VirtualBuffer(u8)* c = arguments.file_c; + + String type_string; + switch (arguments.value_type) + { + case TYPE_KIND_U16: type_string = strlit("u16"); break; + } + + u64 word_character_count; + u64 value_count; + + { + u64 word_character_offset = 0; + u64 value_offset = 0; + for (u64 length = 0; length < arguments.words_by_length.length; length += 1) + { + SliceP(u8) words = arguments.words_by_length.pointer[length]; + n_word_mask(words, arguments.mask, length); + Program program = compile_mask(arguments.mask, length); + arguments.programs[length] = program; + arguments.lookups[length].indices = pdep_lookup(arguments.arena, program, words); + arguments.lookups[length].words = words; + value_offset += arguments.lookups[length].indices.length; + word_character_offset += arguments.lookups[length].indices.length * length; + } + + word_character_count = word_character_offset; + value_count = value_offset; + } + + u32 indentation_level = 0; + vb_indent(h, indentation_level); + vb_format(h, "global_variable const {s} {s}_value_lut[] = {\n", type_string, arguments.kind); + + indentation_level += 1; + + for (u64 length = 0; length < arguments.words_by_length.length; length += 1) + { + SliceP(u8) words = arguments.words_by_length.pointer[length]; + Lookup lookup = arguments.lookups[length]; + let(generic_values_by_length, arguments.values_by_length[length]); + + vb_indent(h, indentation_level); + vb_format(h, "// Values [{u64}]\n", length); + + if (words.length > 0) + { + for (u64 i = 0; i < lookup.indices.length; i += 1) + { + let(index, lookup.indices.pointer[i]); + vb_indent(h, indentation_level); + + if (index == -1) + { + switch (arguments.value_type) + { + case TYPE_KIND_U16: + { + vb_copy_string(h, strlit("0xffff,\n")); + } break; + } + } + else + { + switch (arguments.value_type) + { + case TYPE_KIND_U16: + { + let(value, ((u16*)generic_values_by_length)[index]); + String word = { .pointer = words.pointer[index], .length = length }; + vb_format(h, "0x{u32:x,w=4}, // {s}\n", value, word); + } break; + } + } + } + } + else + { + vb_indent(h, indentation_level); + switch (arguments.value_type) + { + case TYPE_KIND_U16: + { + vb_copy_string(h, strlit("0xffff,\n")); + } break; + } + } + } + + indentation_level -= 1; + + vb_indent(h, indentation_level); + vb_copy_string(h, strlit("};\n")); + + vb_indent(h, indentation_level); + vb_format(h, "static_assert(array_length({s}_value_lut) == {u64});\n", arguments.kind, value_count); + + assert(is_power_of_two_u64(arguments.words_by_length.length)); + u64 epi = 512 / arguments.words_by_length.length; + + String upper_names_by_batch_flag[] = { strlit("Single"), strlit("Batch") }; + String lower_names_by_batch_flag[] = { strlit("single"), strlit("batch") }; + + switch (arguments.value_type) + { + case TYPE_KIND_U16: + { + vb_format(h, "STRUCT(PextLookup{s}Result_{s})\n{\n __m512i v[2];\n};\n", upper_names_by_batch_flag[1], arguments.kind); + } break; + } + + for (u8 is_batch = 0; is_batch < 2; is_batch += 1) + { + u64 signature_length; + u8 result_type_buffer[256]; + String result_type_string; + if (is_batch) + { + result_type_string = format_string((String)array_to_slice(result_type_buffer), "PextLookupBatchResult_{s}", arguments.kind); + } + else + { + result_type_string = type_string; + } + + if (is_batch) + { + signature_length = vb_format(h, "fn {s} pext_lookup_{s}_{s}(const u8* const restrict string_base, const u32* const restrict string_offsets, const u32* const restrict string_lengths)", result_type_string, arguments.kind, lower_names_by_batch_flag[is_batch], type_string); + } + else + { + signature_length = vb_format(h, "fn {s} pext_lookup_{s}_{s}(const u8* const restrict string_pointer, u8 string_length)", result_type_string, arguments.kind, lower_names_by_batch_flag[is_batch], type_string); + } + + vb_copy_string(c, (String) { h->pointer + h->length - signature_length, .length = signature_length, }); + + vb_copy_string(h, strlit(";\n")); + + vb_copy_string(c, strlit("\n{\n")); + u32 indentation_level = 1; + + vb_indent(c, indentation_level); + if (is_batch) + { + vb_format(c, "PextLookupBatchResult_{s} result = {};\n", arguments.kind); + } + + assert(is_power_of_two_u64(arguments.words_by_length.length)); + { + vb_indent(c, indentation_level); + vb_format(c, "__m512i lengths = _mm512_set_epi{u64}(", epi); + for (s64 length = arguments.words_by_length.length - 1; length >= 0; length -= 1) + { + vb_format(c, "{u64}, ", length); + } + + c->length -= 2; + + vb_copy_string(c, strlit(");\n")); + + { + vb_indent(c, indentation_level); + assert(is_power_of_two_u64(arguments.words_by_length.length)); + vb_format(c, "__m512i raw_value_offsets = _mm512_set_epi{u64}(", epi); + let(value_offset, value_count); + for (s64 length = arguments.words_by_length.length - 1; length >= 0; length -= 1) + { + value_offset -= arguments.lookups[length].indices.length; + vb_format(c, "{u64}, ", value_offset); + } + c->length -= 2; + vb_copy_string(c, strlit(");\n")); + } + +#if 0 + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("global_variable const char words[] = {\n")); + // + // indentation_level += 1; + // + // for (u64 length = 0; length < arguments.words_by_length.length; length += 1) + // { + // Lookup lookup = arguments.lookups[length]; + // SliceP(u8) words = arguments.words_by_length.pointer[length]; + // + // vb_indent(c, indentation_level); + // vb_format(c, "// Words [{u64}]\n", length); + // + // if (words.length > 0) + // { + // for (u64 i = 0; i < lookup.indices.length; i += 1) + // { + // let(index, lookup.indices.pointer[i]); + // + // vb_indent(c, indentation_level); + // + // if (index == -1) + // { + // *vb_add(c, 1) = '\"'; + // for (u64 i = 0; i < length; i += 1) + // { + // vb_copy_string(c, strlit("\\x00")); + // } + // + // *vb_add(c, 1) = '\"'; + // *vb_add(c, 1) = '\n'; + // } + // else + // { + // String word = { .pointer = words.pointer[index], .length = length }; + // *vb_add(c, 1) = '\"'; + // vb_copy_string(c, word); + // *vb_add(c, 1) = '\"'; + // *vb_add(c, 1) = '\n'; + // } + // } + // } + // else + // { + // vb_indent(c, indentation_level); + // *vb_add(c, 1) = '\"'; + // for (u64 i = 0; i < length; i += 1) + // { + // vb_copy_string(c, strlit("\\x00")); + // } + // + // *vb_add(c, 1) = '\"'; + // *vb_add(c, 1) = '\n'; + // } + // } + // + // indentation_level -= 1; + // + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("};\n")); + // + // vb_indent(c, indentation_level); + // vb_format(c, "static_assert(array_length(words) == {u64} + 1);\n", word_character_count); +#endif + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("u64 error_mask = 0;\n")); + + if (is_batch) + { + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("for (u32 string_index = 0; string_index < 64; string_index += 1)\n")); + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("{\n")); + + indentation_level += 1; + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("const u8* const restrict string_pointer = string_base + string_offsets[string_index];\n")); + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("u32 string_length = string_lengths[string_index];\n")); + } + + vb_indent(c, indentation_level); + vb_format(c, "__mmask{u64} length_compare_mask = _mm512_cmpeq_epi{u64}_mask(_mm512_set1_epi{u64}(string_length), lengths);\n", arguments.words_by_length.length, epi, epi); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__mmask64 length_mask = _cvtu64_mask64(_cvtmask16_u32(length_compare_mask) - 1);\n")); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__m512i word_offsets = _mm512_permutexvar_epi32(_mm512_setzero(), _mm512_maskz_compress_epi32(length_compare_mask, raw_word_offsets));\n")); + + vb_indent(c, indentation_level); + vb_format(c, "__m512i value_offsets = _mm512_permutexvar_epi{u64}(_mm512_setzero(), _mm512_maskz_compress_epi{u64}(length_compare_mask, raw_value_offsets));\n", epi, epi); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__m512i candidate_string_in_memory = _mm512_maskz_loadu_epi8(length_mask, &string_pointer[0]);\n")); + } + + for (u64 length = 0; length < arguments.words_by_length.length; length += 1) + { + SliceP(u8) words = arguments.words_by_length.pointer[length]; + + if (words.length != 0) + { + program_write(c, arguments.programs[length], indentation_level, length); + + *vb_add(c, 1) = '\n'; + } + } + + vb_indent(c, indentation_level); + assert(is_power_of_two_u64(arguments.words_by_length.length)); + vb_format(c, "__m512i raw_indices = _mm512_set_epi{u64}(", epi); + + for (s64 length = arguments.words_by_length.length - 1; length >= 0; length -= 1) + { + SliceP(u8) words = arguments.words_by_length.pointer[length]; + if (words.length != 0) + { + vb_format(c, "index_{u64}, ", length); + } + else + { + vb_copy_string(c, strlit("0, ")); + } + } + + c->length -= 2; + vb_copy_string(c, strlit(");\n")); + + vb_indent(c, indentation_level); + vb_format(c, "__m512i indices = _mm512_permutexvar_epi{u64}(_mm512_setzero(), _mm512_maskz_compress_epi{u64}(length_compare_mask, raw_indices));\n", epi, epi); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__m512i word_indices = _mm512_add_epi32(word_offsets, indices);\n")); + + vb_indent(c, indentation_level); + vb_format(c, "__m512i value_indices = _mm512_add_epi{u64}(value_offsets, indices);\n", epi); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("let(word_index, _mm_extract_epi32(_mm512_extracti32x4_epi32(word_indices, 0), 0));\n")); + + vb_indent(c, indentation_level); + vb_format(c, "let(value_index, _mm_extract_epi{u64}(_mm512_extracti{u64}x{u64}_epi{u64}(value_indices, 0), 0));\n", epi, epi, (512 / 4) / epi, epi); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__m512i string_in_memory = _mm512_maskz_loadu_epi8(length_mask, &words[word_index]);\n")); + + vb_indent(c, indentation_level); + vb_format(c, "{s} value = {s}_value_lut[value_index];\n", type_string, arguments.kind); + + if (is_batch) + { + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("__mmask32 index_mask = _cvtu32_mask32(1 << string_index);\n")); + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("result.v[string_index > 31] = _mm512_mask_blend_epi16(index_mask, result.v[string_index > 31], _mm512_set1_epi16(value));\n")); + + indentation_level -= 1; + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("}\n")); + + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("return result;\n")); + } + else + { + vb_indent(c, indentation_level); + vb_copy_string(c, strlit("return value;\n")); + } + // vb_copy_string(c, strlit("u16 asd[32];\nif (string_index == 31) { _mm512_storeu_epi16(asd, result.v[0]); breakpoint(); }\n")); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("out_values[value_index] = value;\n")); + + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("__mmask64 string_compare_mask = _mm512_cmpeq_epi8_mask(candidate_string_in_memory, string_in_memory);\n")); + // + // vb_indent(c, indentation_level); + // vb_copy_string(c, strlit("error_mask |= (_cvtmask64_u64(_knot_mask64(string_compare_mask)) != 0) << string_index;\n")); + + + vb_copy_string(c, strlit("}\n")); + } +} + +STRUCT(x86_64_Register) +{ + String name; + u16 value; +}; + +typedef enum x86_64_RegisterClass : u8 +{ + REGISTER_CLASS_GPR, + REGISTER_CLASS_VECTOR, + REGISTER_CLASS_CONTROL, + REGISTER_CLASS_DEBUG, +} x86_64_RegisterClass; + +STRUCT(RegisterSpec) +{ + String name; + x86_64_RegisterClass class; + u8 raw_value; + u8 is_high:1; + u8 size; +}; + +fn x86_64_Register define_register(RegisterSpec spec) +{ + x86_64_Register reg = { + .name = spec.name, + .value = spec.raw_value, + }; + return reg; +} + +fn void metaprogram(Arena* arena) +{ + let(file, file_read(arena, strlit("bootstrap/bloat-buster/data/x86_mnemonic.dat"))); + String enum_prefix = strlit("MNEMONIC_x86_64_"); + String it = file; + VirtualBuffer(u8) generated_h = {}; + VirtualBuffer(u8) generated_c = {}; + + vb_copy_string(&generated_h, strlit("#pragma once\n\n")); + vb_copy_string(&generated_c, strlit("#pragma once\n\n")); + + vb_copy_string(&generated_h, strlit("#if defined(__x86_64__)\n")); + vb_copy_string(&generated_h, strlit("#include \n\n")); + vb_copy_string(&generated_h, strlit("#endif\n")); + + vb_copy_string(&generated_h, strlit("typedef enum Mnemonic_x86_64\n{\n")); + VirtualBufferP(u8) mnemonic_names_by_length_buffer[16] = {}; + VirtualBuffer(u16) mnemonic_values_by_length_buffer[array_length(mnemonic_names_by_length_buffer)] = {}; + SliceP(u8) mnemonic_names_by_length[array_length(mnemonic_names_by_length_buffer)] = {}; + void* mnemonic_values_by_length[array_length(mnemonic_names_by_length_buffer)] = {}; + vb_copy_string(&generated_c, strlit("fn String mnemonic_x86_64_to_string(Mnemonic_x86_64 mnemonic)\n{\n switch (mnemonic)\n {\n")); + + u16 mnemonic_index = 0; + + while (it.length) + { + let(next_eol_index, string_first_ch(it, '\n')); + if (next_eol_index == STRING_NO_MATCH) + { + todo(); + } + + String mnemonic = { .pointer = it.pointer, .length = next_eol_index }; + *vb_add(&mnemonic_names_by_length_buffer[mnemonic.length], 1) = mnemonic.pointer; + *vb_add(&mnemonic_values_by_length_buffer[mnemonic.length], 1) = mnemonic_index; + + // Generated h + vb_copy_string(&generated_h, strlit(" ")); + vb_copy_string(&generated_h, enum_prefix); + vb_copy_string(&generated_h, mnemonic); + vb_format(&generated_h, " = 0x{u32:x,w=4},\n", mnemonic_index); + mnemonic_index += 1; + + // Generated c + vb_copy_string(&generated_c, strlit(" case ")); + vb_copy_string(&generated_c, enum_prefix); + vb_copy_string(&generated_c, mnemonic); + vb_copy_string(&generated_c, strlit(": return strlit(\"")); + vb_copy_string(&generated_c, mnemonic); + vb_copy_string(&generated_c, strlit("\");\n")); + + it = s_get_slice(u8, it, next_eol_index + 1, it.length); + } + + vb_copy_string(&generated_h, strlit("} Mnemonic_x86_64;\n")); + vb_format(&generated_h, "#define mnemonic_x86_64_count ({u32})\n", mnemonic_index); + + vb_copy_string(&generated_c, strlit(" }\n}\n")); + + for (u32 i = 0; i < array_length(mnemonic_names_by_length_buffer); i += 1) + { + mnemonic_names_by_length[i] = (SliceP(u8)) { .pointer = mnemonic_names_by_length_buffer[i].pointer, .length = mnemonic_names_by_length_buffer[i].length }; + mnemonic_values_by_length[i] = mnemonic_values_by_length_buffer[i].pointer; + } + + { + { + u8 mask[array_length(mnemonic_names_by_length)]; + Lookup lookups[array_length(mnemonic_names_by_length)]; + Program programs[array_length(mnemonic_names_by_length)]; + PerfectHashArguments perfect_hash = { + .file_h = &generated_h, + .file_c = &generated_c, + .words_by_length = array_to_slice(mnemonic_names_by_length), + .values_by_length = mnemonic_values_by_length, + .value_type = TYPE_KIND_U16, + .mask = mask, + .lookups = lookups, + .programs = programs, + .arena = arena, + .kind = strlit("mnemonic"), + }; + + perfect_hash_generate(perfect_hash); + } + + { +#define reg(n, v, c, ...) define_register((RegisterSpec) { .name = strlit(n), .raw_value = (v), .class = REGISTER_CLASS_ ## c, __VA_ARGS__ }) +#define regs(n, v, c, s, ...) define_register((RegisterSpec) { .name = strlit(n), .raw_value = (v), .class = (REGISTER_CLASS_ ## c), .size = (s), __VA_ARGS__ }) + x86_64_Register gpr_registers[] = { + regs("al", 0b000, GPR, 0), + regs("ah", 0b000, GPR, 1, .is_high = 1), + regs("ax", 0b000, GPR, 1), + regs("eax", 0b000, GPR, 2), + regs("rax", 0b000, GPR, 3), + + regs("cl", 0b0001, GPR, 0), + regs("ch", 0b0001, GPR, 1, .is_high = 1), + regs("cx", 0b0001, GPR, 1), + regs("ecx", 0b0001, GPR, 2), + regs("rcx", 0b0001, GPR, 3), + + regs("dl", 0b0010, GPR, 0), + regs("dh", 0b0010, GPR, 1, .is_high = 1), + regs("dx", 0b0010, GPR, 1), + regs("edx", 0b0010, GPR, 2), + regs("rdx", 0b0010, GPR, 3), + + regs("bl", 0b0011, GPR, 0), + regs("bh", 0b0011, GPR, 1, .is_high = 1), + regs("bx", 0b0011, GPR, 1), + regs("ebx", 0b0011, GPR, 2), + regs("rbx", 0b0011, GPR, 3), + + regs("spl", 0b0100, GPR, 0), + regs("sp", 0b0100, GPR, 1), + regs("esp", 0b0100, GPR, 2), + regs("rsp", 0b0100, GPR, 3), + + regs("bpl", 0b0101, GPR, 0), + regs("bp", 0b0101, GPR, 1), + regs("ebp", 0b0101, GPR, 2), + regs("rbp", 0b0101, GPR, 3), + + regs("sil", 0b0110, GPR, 0), + regs("si", 0b0110, GPR, 1), + regs("esi", 0b0110, GPR, 2), + regs("rsi", 0b0110, GPR, 3), + + regs("dil", 0b0111, GPR, 0), + regs("di", 0b0111, GPR, 1), + regs("edi", 0b0111, GPR, 2), + regs("rdi", 0b0111, GPR, 3), + + regs("r8l", 0b1000, GPR, 0), + regs("r8w", 0b1000, GPR, 1), + regs("r8d", 0b1000, GPR, 2), + regs("r8", 0b1000, GPR, 3), + + regs("r9l", 0b1001, GPR, 0), + regs("r9w", 0b1001, GPR, 1), + regs("r9d", 0b1001, GPR, 2), + regs("r9", 0b1001, GPR, 3), + + regs("r10l", 0b1010, GPR, 0), + regs("r10w", 0b1010, GPR, 1), + regs("r10d", 0b1010, GPR, 2), + regs("r10", 0b1010, GPR, 3), + + regs("r11l", 0b1011, GPR, 0), + regs("r11w", 0b1011, GPR, 1), + regs("r11d", 0b1011, GPR, 2), + regs("r11", 0b1011, GPR, 3), + + regs("r12l", 0b1100, GPR, 0), + regs("r12w", 0b1100, GPR, 1), + regs("r12d", 0b1100, GPR, 2), + regs("r12", 0b1100, GPR, 3), + + regs("r13l", 0b1101, GPR, 0), + regs("r13w", 0b1101, GPR, 1), + regs("r13d", 0b1101, GPR, 2), + regs("r13", 0b1101, GPR, 3), + + regs("r14l", 0b1110, GPR, 0), + regs("r14w", 0b1110, GPR, 1), + regs("r14d", 0b1110, GPR, 2), + regs("r14", 0b1110, GPR, 3), + + regs("r15l", 0b1111, GPR, 0), + regs("r15w", 0b1111, GPR, 1), + regs("r15d", 0b1111, GPR, 2), + regs("r15", 0b1111, GPR, 3), + }; + + VirtualBufferP(u8) register_names_by_length_buffer[8] = {}; + VirtualBuffer(u16) register_values_by_length_buffer[array_length(register_names_by_length_buffer)] = {}; + SliceP(u8) register_names_by_length[array_length(register_names_by_length_buffer)] = {}; + void* register_values_by_length[array_length(register_names_by_length_buffer)] = {}; + + vb_copy_string(&generated_h, strlit("typedef enum x86_64_Register : u16\n{\n")); + + for (u32 i = 0; i < array_length(gpr_registers); i += 1) + { + x86_64_Register reg = gpr_registers[i]; + *vb_add(®ister_names_by_length_buffer[reg.name.length], 1) = reg.name.pointer; + *vb_add(®ister_values_by_length_buffer[reg.name.length], 1) = reg.value; + vb_format(&generated_h, " REGISTER_X86_64_{s} = 0x{u32:x,w=4},\n", reg.name, reg.value); + } + + u8 vector_registers[32][3][5]; + + for (u8 i = 0; i < 32; i += 1) + { + for (u8 size = 0; size < 3; size += 1) + { + u8 decimal_digit_high = i / 10; + u8 decimal_digit_low = i % 10; + u8 decimal_digit_high_character = decimal_digit_high + '0'; + u8 decimal_digit_low_character = decimal_digit_low + '0'; + + vector_registers[i][size][0] = 'x' + size; + vector_registers[i][size][1] = 'm'; + vector_registers[i][size][2] = 'm'; + vector_registers[i][size][3] = decimal_digit_high ? decimal_digit_high_character : decimal_digit_low_character; + vector_registers[i][size][4] = decimal_digit_low_character; + RegisterSpec spec = { .name = { .pointer = vector_registers[i][size], .length = 4 + (decimal_digit_high != 0) }, .raw_value = i, .class = REGISTER_CLASS_VECTOR, .size = size, }; + let(reg, define_register(spec)); + *vb_add(®ister_names_by_length_buffer[reg.name.length], 1) = reg.name.pointer; + *vb_add(®ister_values_by_length_buffer[reg.name.length], 1) = reg.value; + vb_format(&generated_h, " REGISTER_X86_64_{s} = 0x{u32:x,w=4},\n", reg.name, reg.value); + } + } + + vb_copy_string(&generated_h, strlit("} x86_64_Register;\n")); + + for (u32 i = 0; i < array_length(register_names_by_length_buffer); i += 1) + { + register_names_by_length[i] = (SliceP(u8)) { .pointer = register_names_by_length_buffer[i].pointer, .length = register_names_by_length_buffer[i].length }; + register_values_by_length[i] = register_values_by_length_buffer[i].pointer; + } + + u8 mask[array_length(register_names_by_length)]; + Lookup lookups[array_length(register_names_by_length)]; + Program programs[array_length(register_names_by_length)]; + PerfectHashArguments perfect_hash = { + .file_h = &generated_h, + .file_c = &generated_c, + .words_by_length = array_to_slice(register_names_by_length), + .values_by_length = register_values_by_length, + .value_type = TYPE_KIND_U16, + .mask = mask, + .lookups = lookups, + .programs = programs, + .arena = arena, + .kind = strlit("register"), + }; + + perfect_hash_generate(perfect_hash); + } + } + + String generated_h_slice = { .pointer = generated_h.pointer, .length = generated_h.length }; + String generated_c_slice = { .pointer = generated_c.pointer, .length = generated_c.length }; + + { + FileWriteOptions options = { + .path = strlit(BUILD_DIR "/generated.h"), + .content = generated_h_slice, + }; + file_write(options); + } + + { + FileWriteOptions options = { + .path = strlit(BUILD_DIR "/generated.c"), + .content = generated_c_slice, + }; + file_write(options); + } +} + +STRUCT(Parser) +{ + u8* pointer; + u32 length; + u32 i; +}; + +fn String parse_mnemonic(Parser* parser) +{ + u32 start = parser->i; + u8* pointer = parser->pointer; + String result = { .pointer = pointer + start }; + + while (1) + { + u32 i = parser->i; + u8 ch = pointer[i]; + u8 ch_is_alphanumeric = is_alphanumeric(ch); + parser->i = i + ch_is_alphanumeric; + if (!ch_is_alphanumeric) + { + break; + } + } + + result.length = parser->i - start; + + return result; +} + +fn String parse_identifier(Parser* parser) +{ + u32 start = parser->i; + u8* pointer = parser->pointer; + String result = { .pointer = parser->pointer + parser->i }; + + while (1) + { + u32 i = parser->i; + u8 ch = pointer[i]; + u8 is_identifier_ch = is_alphanumeric(ch) | (ch == '_'); + parser->i = i + is_identifier_ch; + if (!is_identifier_ch) + { + break; + } + } + + result.length = parser->i - start; + + return result; +} + +fn u8 consume_character(Parser* parser, u8 expected_ch) +{ + u32 i = parser->i; + u8 ch = parser->pointer[i]; + let(is_expected_ch, unlikely((ch == expected_ch) & (i < parser->length))); + let(new_parser_i, i + is_expected_ch); + parser->i = new_parser_i; + return new_parser_i - i; +} + +fn void expect_character(Parser* parser, u8 expected_ch) +{ + if (!likely(consume_character(parser, expected_ch))) + { + print("Expected character failed!\n"); + os_exit(1); + } +} + + +fn u8 get_ch(Parser* parser) +{ + assert(parser->i < parser->length); + return parser->pointer[parser->i]; +} + +fn u8 expect_decimal_digit(Parser* parser) +{ + u32 i = parser->i; + assert(i < parser->length); + u8 ch = parser->pointer[i]; + u8 is_decimal_digit = (ch >= '0') & (ch <= '9'); + parser->i = i + is_decimal_digit; + if (likely(is_decimal_digit)) + { + return ch - '0'; + } + else + { + print("Expect integer digit failed!\n"); + os_exit(1); + } +} + +fn u8 consume_hex_byte(Parser* parser, u8* hex_byte) +{ + u32 i = parser->i; + assert(i < parser->length - 1); + u8* pointer = parser->pointer; + u8 high_ch = pointer[i]; + u8 low_ch = pointer[i + 1]; + u8 is_high_digit_hex = is_hex_digit(high_ch); + u8 is_low_digit_hex = is_hex_digit(low_ch); + u8 is_hex_byte = is_high_digit_hex & is_low_digit_hex; + parser->i = i + (2 * is_hex_byte); + u8 result = is_hex_byte; + if (likely(result)) + { + u8 high_int = hex_ch_to_int(high_ch); + u8 low_int = hex_ch_to_int(low_ch); + u8 byte = (high_int << 4) | low_int; + *hex_byte = byte; + } + + return result; +} + +fn u8 expect_hex_byte(Parser* parser) +{ + u8 result; + if (!consume_hex_byte(parser, &result)) + { + print("Expect hex byte failed!\n"); + os_exit(1); + } + + return result; +} + +// TODO: this might be a perf bottleneck +fn u8 consume_tab(Parser* parser) +{ + u8 space0 = consume_character(parser, ' '); + u8 space1 = consume_character(parser, ' '); + u8 space2 = consume_character(parser, ' '); + u8 space3 = consume_character(parser, ' '); + u8 result = (space0 + space1) + (space2 + space3); + return result == 4; +} + +typedef enum InstructionClass +{ + INSTRUCTION_CLASS_BASE_ARITHMETIC, + INSTRUCTION_CLASS_UNSIGNED_ADD_FLAG, + INSTRUCTION_CLASS_BITTEST, + INSTRUCTION_CLASS_CMOV, + INSTRUCTION_CLASS_JCC, + INSTRUCTION_CLASS_ROTATE, + INSTRUCTION_CLASS_SHIFT, + INSTRUCTION_CLASS_SETCC, +} InstructionClass; + +fn String parse_encoding_type(Parser* parser) +{ + u32 i = parser->i; + while (1) + { + u8 ch = get_ch(parser); + u8 is_valid_encoding_type_ch = is_lower(ch) | (ch == '-'); + parser->i += is_valid_encoding_type_ch; + if (is_valid_encoding_type_ch) + { + if (parser->i - i > 4) + { + todo(); + } + } + else + { + break; + } + } + + u64 length = parser->i - i; + if (length == 0) + { + todo(); + } + if (length > 4) + { + todo(); + } + + String result = { .pointer = parser->pointer + i, .length = length }; + return result; +} + +fn void parse_encoding_details(Parser* parser) +{ + expect_character(parser, '['); + String encoding_type = parse_encoding_type(parser); + expect_character(parser, ':'); + expect_character(parser, ' '); + + while (!consume_character(parser, ']')) + { + // Parser encoding atom + u8 byte; + if (consume_hex_byte(parser, &byte)) + { + u8 ch = get_ch(parser); + u8 is_plus = ch == '+'; + parser->i += is_plus; + if (unlikely(is_plus)) + { + expect_character(parser, 'r'); + } + } + else + { + String identifier = parse_identifier(parser); + if (identifier.length) + { + if (identifier.pointer[0] == 'i') + { + assert(identifier.length == 2); + u8 imm_byte = identifier.pointer[1]; + u8 is_valid_imm_byte = ((imm_byte == 'b') | (imm_byte == 'w')) | ((imm_byte == 'd') | (imm_byte == 'q')); + if (!likely(is_valid_imm_byte)) + { + print("Bad immediate value\n"); + os_exit(1); + } + } + else if (s_equal(identifier, strlit("rex"))) + { + expect_character(parser, '.'); + u8 rex_ch = get_ch(parser); + u8 is_valid_rex_ch = ((rex_ch == 'w') | (rex_ch == 'r')) | ((rex_ch == 'x') | (rex_ch == 'b')); + parser->i += is_valid_rex_ch; + if (!likely(is_valid_rex_ch)) + { + todo(); + } + } + else if (string_starts_with(identifier, strlit("rel"))) + { + // todo + } + else + { + todo(); + } + } + else + { + u8 ch = get_ch(parser); + switch (ch) + { + case '/': + { + parser->i += 1; + if (consume_character(parser, 'r')) + { + // TODO + } + else + { + expect_decimal_digit(parser); + } + } break; + default: + todo(); + } + } + } + + consume_character(parser, ' '); + } +} + +fn void parse_encoding(Parser* parser) +{ + u8 first_ch = get_ch(parser); + u32 start = parser->i; + if (first_ch != '[') + { + while (1) + { + u32 i = parser->i; + String operand = parse_mnemonic(parser); + assert(operand.length); + if (consume_character(parser, ',')) + { + expect_character(parser, ' '); + } + else + { + break; + } + } + + expect_character(parser, ' '); + } + + parse_encoding_details(parser); +} + +fn void parse_instruction_table(Arena* arena) +{ + String file = file_read(arena, strlit("bootstrap/bloat-buster/data/instructions.dat")); + Parser parser_memory = { + .pointer = file.pointer, + .length = file.length, + }; + Parser* parser = &parser_memory; + + let_cast(u32, file_length, file.length); + while (parser->i < file_length) + { + String mnemonic = parse_mnemonic(parser); + expect_character(parser, ':'); + + if (consume_character(parser, '\n')) + { + while (consume_tab(parser)) + { + parse_encoding(parser); + expect_character(parser, '\n'); + } + } + else if (consume_character(parser, ' ')) + { + u8 next_ch = get_ch(parser); + switch (next_ch) + { + case '[': + { + parse_encoding_details(parser); + } break; + default: + { + String identifier = parse_identifier(parser); + if (s_equal(identifier, strlit("class"))) + { + expect_character(parser, ' '); + String class_identifier = parse_identifier(parser); + InstructionClass instruction_class; + + if (s_equal(class_identifier, strlit("base_arithmetic"))) + { + instruction_class = INSTRUCTION_CLASS_BASE_ARITHMETIC; + } + else if (s_equal(class_identifier, strlit("unsigned_add_flag"))) + { + instruction_class = INSTRUCTION_CLASS_UNSIGNED_ADD_FLAG; + } + else if (s_equal(class_identifier, strlit("bittest"))) + { + instruction_class = INSTRUCTION_CLASS_BITTEST; + } + else if (s_equal(class_identifier, strlit("cmov"))) + { + instruction_class = INSTRUCTION_CLASS_CMOV; + } + else if (s_equal(class_identifier, strlit("jcc"))) + { + instruction_class = INSTRUCTION_CLASS_JCC; + } + else if (s_equal(class_identifier, strlit("rotate"))) + { + instruction_class = INSTRUCTION_CLASS_ROTATE; + } + else if (s_equal(class_identifier, strlit("shift"))) + { + instruction_class = INSTRUCTION_CLASS_SHIFT; + } + else if (s_equal(class_identifier, strlit("setcc"))) + { + instruction_class = INSTRUCTION_CLASS_SETCC; + } + else + { + todo(); + } + + switch (instruction_class) + { + case INSTRUCTION_CLASS_BASE_ARITHMETIC: + { + u8 opcodes[3]; + expect_character(parser, '('); + + expect_character(parser, '/'); + u8 imm_digit = expect_decimal_digit(parser); + expect_character(parser, ','); + expect_character(parser, ' '); + + opcodes[0] = expect_hex_byte(parser); + expect_character(parser, ','); + expect_character(parser, ' '); + + opcodes[1] = expect_hex_byte(parser); + expect_character(parser, ','); + expect_character(parser, ' '); + + opcodes[2] = expect_hex_byte(parser); + expect_character(parser, ')'); + } break; + case INSTRUCTION_CLASS_UNSIGNED_ADD_FLAG: + { + expect_character(parser, '('); + u8 opcode = expect_hex_byte(parser); + expect_character(parser, ')'); + } break; + case INSTRUCTION_CLASS_BITTEST: + { + expect_character(parser, '('); + + expect_character(parser, '/'); + u8 imm_digit = expect_decimal_digit(parser); + expect_character(parser, ','); + expect_character(parser, ' '); + + u8 opcode = expect_hex_byte(parser); + expect_character(parser, ')'); + } break; + case INSTRUCTION_CLASS_CMOV: + { + } break; + case INSTRUCTION_CLASS_JCC: + { + } break; + case INSTRUCTION_CLASS_ROTATE: + { + expect_character(parser, '('); + + expect_character(parser, '/'); + u8 imm_digit = expect_decimal_digit(parser); + + expect_character(parser, ')'); + } break; + case INSTRUCTION_CLASS_SHIFT: + { + expect_character(parser, '('); + + expect_character(parser, '/'); + u8 imm_digit = expect_decimal_digit(parser); + + expect_character(parser, ')'); + } break; + case INSTRUCTION_CLASS_SETCC: + { + } break; + } + } + else + { + parser->i -= identifier.length; + parse_encoding(parser); + } + } break; + } + + expect_character(parser, '\n'); + } + else + { + todo(); + } + } } int main(int argc, char* argv[], char** envp) { environment_pointer = envp; Arena* arena = arena_initialize_default(KB(64)); - + metaprogram(arena); + parse_instruction_table(arena); + BuildType build_type = build_type_pick(); CompileOptions compile_options = { - .compiler_path = get_c_compiler_path(arena), + .compiler_path = get_c_compiler_path(arena, build_type), .source_path = strlit("bootstrap/bloat-buster/bb.c"), .output_path = strlit("cache/bb" EXECUTABLE_EXTENSION), .windowing_backend = windowing_backend_pick(), .rendering_backend = rendering_backend_pick(), - .build_type = build_type_pick(), + .build_type = build_type, .flags = { .colored_output = 1, .error_limit = BB_ERROR_LIMIT, diff --git a/build.sh b/build.sh index bbbe010..09b3c32 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -eu +MY_CWD=$PWD + if [[ -z "${BB_CI-}" ]]; then BB_CI=0 fi @@ -17,10 +19,19 @@ if [[ -z "${BB_ERROR_LIMIT-}" ]]; then BB_ERROR_LIMIT=$((1 - BB_CI)) fi +BB_COMPILE_SHADERS=0 + BUILD_DIR=cache +LARGE_ASSET_BASE_URL=https://github.com/birth-software/bloat-buster/releases/download/large-assets mkdir -p $BUILD_DIR -if [[ "${BB_CI}" == "0" ]]; then +if [[ ! -f "$BUILD_DIR/large_assembly.s" ]]; then + cd $BUILD_DIR + wget $LARGE_ASSET_BASE_URL/large_assembly.s -o large_assembly.s + cd $MY_CWD +fi + +if [[ "${BB_COMPILE_SHADERS}" == "1" ]]; then glslangValidator -V bootstrap/std/shaders/rect.vert -o $BUILD_DIR/rect.vert.spv --quiet glslangValidator -V bootstrap/std/shaders/rect.frag -o $BUILD_DIR/rect.frag.spv --quiet fi