more progress

This commit is contained in:
David Gonzalez Martin 2024-05-12 13:34:31 -06:00
parent 3a7ee24509
commit b9df85f55a

View File

@ -1062,6 +1062,18 @@ const Bitcode = struct {
function = 12, function = 12,
identification = 13, identification = 13,
value_symtab = 14, value_symtab = 14,
metadata = 15,
metadata_attachment = 16,
type = 17,
uselist = 18,
module_strtab = 19,
global_value_summary = 20,
operand_bundle_tags = 21,
metadata_kind = 22,
strtab = 23,
full_lto_global_value_summary = 24,
symtab = 25,
sync_scope_names = 26,
}; };
const ValueSymtabAbbreviationId = enum(u8){ const ValueSymtabAbbreviationId = enum(u8){
@ -1279,6 +1291,35 @@ const Bitcode = struct {
block_addr_users = 60, block_addr_users = 60,
}; };
const TypeCode = enum(u8) {
num_entry = 1,
void = 2,
float = 3,
double = 4,
label = 5,
@"opaque" = 6,
integer = 7,
pointer = 8,
function_old = 9,
half = 10,
array = 11,
vector = 12,
x86_fp80 = 13,
fp128 = 14,
ppc_fp128 = 15,
metadata = 16,
x86_mmx = 17,
struct_anon = 18,
struct_name = 19,
struct_named = 20,
function = 21,
token = 22,
bfloat = 23,
x86_amx = 24,
opaque_pointer = 25,
target_type = 26,
};
const BlockInfoCode = enum(u8) { const BlockInfoCode = enum(u8) {
set_bid = 1, set_bid = 1,
block_name = 2, block_name = 2,
@ -1300,6 +1341,7 @@ const Bitcode = struct {
current_abbreviations: PinnedArray(*Abbreviation) = .{}, current_abbreviations: PinnedArray(*Abbreviation) = .{},
abbreviation_buffer: PinnedArray(Abbreviation) = .{}, abbreviation_buffer: PinnedArray(Abbreviation) = .{},
block_info_records: PinnedArray(BlockInfo) = .{}, block_info_records: PinnedArray(BlockInfo) = .{},
vst_offset_placeholder: u64 = 0,
current_bit: u32 = 0, current_bit: u32 = 0,
current_value: u32 = 0, current_value: u32 = 0,
current_codesize: u32 = 2, current_codesize: u32 = 2,
@ -1312,31 +1354,15 @@ const Bitcode = struct {
return final_slice; return final_slice;
} }
pub fn write(writer: *Writer) void { // TODO: fix?
const magic align(4) = [_]u8{ 0x42, 0x43, 0xc0, 0xde }; fn get_byte_position(writer: *Writer) u32 {
writer.append_bytes(&magic); return writer.buffer.length * @sizeOf(u32);
}
writer.write_identification_block(); fn write_module_block(writer: *Writer) void {
const identification_block = .{ const raw = true;
0x35, 0x14, 0x00, 0x00, if (raw) {
0x05, 0x00, 0x00, 0x00, const module_block align(4) = [_]u8{
0x62, 0x0c, 0x30, 0x24,
0x4a, 0x59, 0xbe, 0x66,
0xbd, 0xfb, 0xb4, 0xaf,
0x0b, 0x51, 0x80, 0x4c,
0x01, 0x00, 0x00, 0x00,
};
std.testing.expectEqualSlices(u8, &identification_block, writer.get_byte_slice()[4..]) catch unreachable;
const module_version_start = writer.buffer.length;
_ = module_version_start; // autofix
writer.enter_subblock(.identification, 3);
writer.write_module_version();
writer.write_block_info();
const rest_module_block align(4) = [_]u8{
0x21, 0x0c, 0x00, 0x00, 0x21, 0x0c, 0x00, 0x00,
0xe6, 0x01, 0x00, 0x00, 0xe6, 0x01, 0x00, 0x00,
0x0b, 0x02, 0x21, 0x00, 0x0b, 0x02, 0x21, 0x00,
@ -1826,8 +1852,68 @@ const Bitcode = struct {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}; };
writer.append_bytes(&rest_module_block); writer.append_bytes(&module_block);
} else {
const module_version_start = writer.buffer.length;
_ = module_version_start; // autofix
writer.enter_subblock(.identification, 3);
const block_start_position = writer.get_byte_position();
_ = block_start_position; // autofix
writer.write_module_version();
writer.write_block_info();
writer.write_type_table();
writer.write_attribute_group_table();
writer.write_attribute_table();
// TODO
// writer.write_comdats();
writer.write_module_info();
writer.write_module_constants();
writer.write_module_metadata_kinds();
writer.write_module_metadata();
// TODO:
const should_preserve_use_list_order = false;
if (should_preserve_use_list_order) {
writer.write_use_list_block(null);
}
writer.write_operand_bundle_tags();
writer.write_sync_scope_names();
// TODO: functions
// for (functions) |function| {
// if (!function.is_declaration) {
// write.write_function(function);
// }
// }
//
// TODO: module summary
// if (index) {
// writer.write_per_module_global_value_summary();
// }
// TODO:
// writer.write_global_value_symbol_table(map);
// writer.write_module_hash(block_start_position);
writer.exit_block();
}
}
fn write_symtab(writer: *Writer) void {
// TODO:
const symtab_block align(4) = .{ const symtab_block align(4) = .{
0x65, 0x0c, 0x00, 0x00, 0x65, 0x0c, 0x00, 0x00,
0x1f, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
@ -1864,7 +1950,10 @@ const Bitcode = struct {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}; };
writer.append_bytes(&symtab_block); writer.append_bytes(&symtab_block);
}
fn write_strtab(writer: *Writer) void {
// TODO:
const strtab_block align(4) = .{ const strtab_block align(4) = .{
0x5d, 0x0c, 0x00, 0x00, 0x5d, 0x0c, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
@ -1885,6 +1974,19 @@ const Bitcode = struct {
writer.append_bytes(&strtab_block); writer.append_bytes(&strtab_block);
} }
pub fn write(writer: *Writer) void {
const magic align(4) = [_]u8{ 0x42, 0x43, 0xc0, 0xde };
writer.append_bytes(&magic);
writer.write_identification_block();
writer.write_module_block();
writer.write_symtab();
writer.write_strtab();
}
fn write_module_version(writer: *Writer) void { fn write_module_version(writer: *Writer) void {
writer.emit_record(u64, @intFromEnum(ModuleCode.version), &.{2}, 0); writer.emit_record(u64, @intFromEnum(ModuleCode.version), &.{2}, 0);
} }
@ -2121,6 +2223,201 @@ const Bitcode = struct {
writer.exit_block(); writer.exit_block();
} }
fn write_type_table(writer: *Writer) void {
writer.enter_subblock(.type, 4);
var type_values = std.BoundedArray(u64, 64){};
// TODO: compute
const type_indices_needed_bits = 0;
const type_count = 0;
const opaque_pointer_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.opaque_pointer));
// TODO: this could be wrong?
abbreviation.add_literal(0);
break :blk writer.emit_abbreviation(abbreviation);
};
_ = opaque_pointer_abbreviation; // autofix
const function_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.function));
// TODO: this could be wrong?
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 1 });
abbreviation.add_with_encoding(.{ .encoding = .array });
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = type_indices_needed_bits });
break :blk writer.emit_abbreviation(abbreviation);
};
const struct_anon_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.struct_anon));
// TODO: this could be wrong?
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 1 });
abbreviation.add_with_encoding(.{ .encoding = .array });
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = type_indices_needed_bits });
break :blk writer.emit_abbreviation(abbreviation);
};
_ = struct_anon_abbreviation; // autofix
const struct_name_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.struct_name));
// TODO: this could be wrong?
abbreviation.add_with_encoding(.{ .encoding = .array });
abbreviation.add_with_encoding(.{ .encoding = .char6 });
break :blk writer.emit_abbreviation(abbreviation);
};
const struct_named_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.struct_named));
// TODO: this could be wrong?
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 1 });
abbreviation.add_with_encoding(.{ .encoding = .array });
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = type_indices_needed_bits });
break :blk writer.emit_abbreviation(abbreviation);
};
_ = struct_named_abbreviation; // autofix
const array_abbreviation = blk: {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(TypeCode.array));
// TODO: this could be wrong?
abbreviation.add_with_encoding(.{ .encoding = .vbr, .value = 8 });
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = type_indices_needed_bits });
break :blk writer.emit_abbreviation(abbreviation);
};
_ = array_abbreviation; // autofix
_ = struct_name_abbreviation; // autofix
_ = function_abbreviation; // autofix
//
type_values.appendAssumeCapacity(type_count);
writer.emit_record(u64, @intFromEnum(TypeCode.num_entry), type_values.constSlice(), 0);
type_values.resize(0) catch unreachable;
// TODO:
// Now loop over types and emit records for them
writer.exit_block();
}
fn write_attribute_group_table(writer: *Writer) void {
_ = writer; // autofix
// TODO:
}
fn write_attribute_table(writer: *Writer) void {
_ = writer; // autofix
// TODO:
}
fn write_module_info(writer: *Writer) void {
const target_triple = "x86_64-pc-linux-gnu";
writer.write_string_record(@intFromEnum(ModuleCode.triple), target_triple,
// TODO in LLVM code
0);
const data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128";
writer.write_string_record(@intFromEnum(ModuleCode.data_layout), data_layout,
// TODO in LLVM code
0);
// TODO: global inline assembly
// const global_inline_assembly = "";
// write_string_record(@intFromEnum(ModuleCode.@"asm"), global_inline_assembly,
// // TODO in LLVM code
// 0);
//
// TODO: section names
// TODO: global abbreviation
var values = std.BoundedArray(u32, 64){};
{
const source_filename = "llvm-link";
const source_string_encoding = get_string_encoding(source_filename);
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(ModuleCode.source_filename));
abbreviation.add_with_encoding(.{ .encoding = .array });
switch (source_string_encoding) {
.char6 => abbreviation.add_with_encoding(.{ .encoding = .char6 }),
.fixed7 => abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 7 }),
.fixed8 => abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 8 }),
}
const filename_abbreviation = writer.emit_abbreviation(abbreviation);
for (source_filename) |ch| {
values.appendAssumeCapacity(ch);
}
writer.emit_record(u32, @intFromEnum(ModuleCode.source_filename), values.constSlice(), filename_abbreviation);
values.resize(0) catch unreachable;
}
// TODO: global variables
// TODO: functions
// TODO: global aliases
// TODO: global ifunc
writer.write_value_symbol_table_forward_declaration();
}
fn write_module_constants(writer: *Writer) void {
_ = writer; // autofix
// TODO:
}
fn write_module_metadata_kinds(writer: *Writer) void {
_ = writer; // autofix
// TODO:
}
fn write_module_metadata(writer: *Writer) void {
_ = writer; // autofix
// TODO:
}
fn write_use_list_block(writer: *Writer, function: ?*u32) void {
_ = function; // autofix
_ = writer; // autofix
// TODO:
}
fn write_operand_bundle_tags(writer: *Writer) void {
_ = writer; // autofix
}
fn write_sync_scope_names(writer: *Writer) void {
_ = writer; // autofix
}
fn write_value_symbol_table_forward_declaration(writer: *Writer) void {
const abbreviation = writer.abbreviation_buffer.append(.{});
abbreviation.add_literal(@intFromEnum(ModuleCode.vst_offset));
abbreviation.add_with_encoding(.{ .encoding = .fixed, .value = 32 });
const vst_offset_abbreviation = writer.emit_abbreviation(abbreviation);
const values = [_]u64{@intFromEnum(ModuleCode.vst_offset), 0};
writer.emit_record_with_abbrev(u64, vst_offset_abbreviation, &values);
writer.vst_offset_placeholder = (writer.buffer.length * 32) - 32;
}
fn emit_record_with_abbrev(writer: *Writer, comptime T: type, abbreviation: u32, values: []const T) void {
writer.emit_record_with_abbrev_impl(T, abbreviation, values, null, null);
}
fn switch_to_block_id(writer: *Writer, block_id: u32) void { fn switch_to_block_id(writer: *Writer, block_id: u32) void {
if (block_id != writer.block_info_current_block_id) { if (block_id != writer.block_info_current_block_id) {
const v = [1]u32{block_id}; const v = [1]u32{block_id};
@ -2230,7 +2527,7 @@ const Bitcode = struct {
return writer.current_abbreviations.length - 1 + @intFromEnum(FixedAbbreviationId.first_application_abbrev); return writer.current_abbreviations.length - 1 + @intFromEnum(FixedAbbreviationId.first_application_abbrev);
} }
fn emit_abbreviated_literal(writer: *Writer, operand: *Abbreviation.Op, value: u32) void { fn emit_abbreviated_literal(writer: *Writer, comptime T: type, operand: *Abbreviation.Op, value: T) void {
_ = writer; // autofix _ = writer; // autofix
assert(operand.is_literal); assert(operand.is_literal);
assert(value == operand.value); assert(value == operand.value);
@ -2262,6 +2559,17 @@ const Bitcode = struct {
writer.emit_record(u32, @intFromEnum(IdentificationCode.epoch), &values, epoch_abbreviation); writer.emit_record(u32, @intFromEnum(IdentificationCode.epoch), &values, epoch_abbreviation);
writer.exit_block(); writer.exit_block();
} }
const identification_block = .{
0x35, 0x14, 0x00, 0x00,
0x05, 0x00, 0x00, 0x00,
0x62, 0x0c, 0x30, 0x24,
0x4a, 0x59, 0xbe, 0x66,
0xbd, 0xfb, 0xb4, 0xaf,
0x0b, 0x51, 0x80, 0x4c,
0x01, 0x00, 0x00, 0x00,
};
std.testing.expectEqualSlices(u8, &identification_block, writer.get_byte_slice()[4..]) catch unreachable;
} }
fn exit_block(writer: *Writer) void { fn exit_block(writer: *Writer) void {
@ -2286,7 +2594,7 @@ const Bitcode = struct {
} }
fn write_string_record(writer: *Writer, code: u32, string: []const u8, abbreviation_to_use: u32) void { fn write_string_record(writer: *Writer, code: u32, string: []const u8, abbreviation_to_use: u32) void {
var values = std.BoundedArray(u32, 64){}; var values = std.BoundedArray(u32, 128){};
var a = abbreviation_to_use; var a = abbreviation_to_use;
for (string) |ch| { for (string) |ch| {
if (a != 0 and !is_char6(ch)) { if (a != 0 and !is_char6(ch)) {
@ -2309,11 +2617,11 @@ const Bitcode = struct {
writer.emit_vbr64(v, 6); writer.emit_vbr64(v, 6);
} }
} else { } else {
writer.emit_record_with_abbrev(T, abbreviation, values, null, code); writer.emit_record_with_abbrev_impl(T, abbreviation, values, null, code);
} }
} }
fn emit_record_with_abbrev(writer: *Writer, comptime T: type, abbreviation_int: u32, values: []const T, string: ?[]const u8, code: ?u32) void { fn emit_record_with_abbrev_impl(writer: *Writer, comptime T: type, abbreviation_int: u32, values: []const T, string: ?[]const u8, code: ?u32) void {
const abbreviation_number = abbreviation_int - @intFromEnum(FixedAbbreviationId.first_application_abbrev); const abbreviation_number = abbreviation_int - @intFromEnum(FixedAbbreviationId.first_application_abbrev);
assert(abbreviation_number < writer.current_abbreviations.length); assert(abbreviation_number < writer.current_abbreviations.length);
const abbreviation = writer.current_abbreviations.slice()[abbreviation_number]; const abbreviation = writer.current_abbreviations.slice()[abbreviation_number];
@ -2328,7 +2636,7 @@ const Bitcode = struct {
operand_index += 1; operand_index += 1;
if (operand.is_literal) { if (operand.is_literal) {
writer.emit_abbreviated_literal(operand, c); writer.emit_abbreviated_literal(u32, operand, c);
} else { } else {
unreachable; unreachable;
} }
@ -2339,7 +2647,9 @@ const Bitcode = struct {
const operand = &abbreviation.operands.slice()[operand_index]; const operand = &abbreviation.operands.slice()[operand_index];
if (operand.is_literal) { if (operand.is_literal) {
unreachable; assert(record_index < values.len);
writer.emit_abbreviated_literal(T, operand, values[record_index]);
record_index += 1;
} else if (operand.encoding == .array) { } else if (operand.encoding == .array) {
assert(operand_index + 2 == operand_count); assert(operand_index + 2 == operand_count);
operand_index += 1; operand_index += 1;
@ -2370,7 +2680,9 @@ const Bitcode = struct {
switch (operand.encoding) { switch (operand.encoding) {
else => unreachable, else => unreachable,
.fixed => { .fixed => {
unreachable; if (operand.get_encoding_data()) |v| {
writer.emit(@intCast(value), @intCast(v));
}
}, },
.vbr => { .vbr => {
if (operand.get_encoding_data()) |v| { if (operand.get_encoding_data()) |v| {
@ -2393,6 +2705,27 @@ const Bitcode = struct {
unreachable; unreachable;
} }
const StringEncoding = enum{
char6,
fixed7,
fixed8,
};
fn get_string_encoding(string: []const u8) StringEncoding{
var char6 = true;
for (string) |ch| {
if (char6) {
char6 = is_char6(ch);
}
if (ch & 128 != 0) {
return .fixed8;
}
}
return if (char6) .char6 else .fixed7;
}
fn enter_subblock(writer: *Writer, block_id: BlockId, code_length: u32) void { fn enter_subblock(writer: *Writer, block_id: BlockId, code_length: u32) void {
writer.emit_code(@intFromEnum(FixedAbbreviationId.enter_subblock)); writer.emit_code(@intFromEnum(FixedAbbreviationId.enter_subblock));
writer.emit_vbr(@intFromEnum(block_id), width.block_id); writer.emit_vbr(@intFromEnum(block_id), width.block_id);