Optimization pipeline

This commit is contained in:
David Gonzalez Martin 2025-02-18 17:52:00 -06:00
parent 59b35682c7
commit b7eff075fc
4 changed files with 356 additions and 113 deletions

View File

@ -158,11 +158,125 @@ pub const Target = opaque {
aix = 6,
zos = 7,
},
reserved: u21 = 0,
reserved: PaddingType = 0,
},
loop_alignment: c_uint,
binutils_version: [2]c_int,
mc: MCTargetOptions,
const padding_bit_count = 21;
const PaddingType = @Type(.{
.int = .{
.signedness = .unsigned,
.bits = padding_bit_count,
},
});
comptime {
assert(@sizeOf(Target.Options) == 136);
assert(padding_bit_count == 21);
}
pub fn default() Target.Options {
return .{
.binutils_version = .{ 0, 0 },
.flags0 = .{
.unsafe_fp_math = 0,
.no_infs_fp_math = 0,
.no_nans_fp_math = 0,
.no_trapping_fp_math = 1,
.no_signed_zeroes_fp_math = 0,
.approx_func_fp_match = 0,
.enable_aix_extended_altivec_abi = 0,
.honor_sign_dependent_rounding_fp_math = 0,
.no_zeroes_in_bss = 0,
.guaranteed_tail_call_optimization = 0,
.stack_symbol_ordering = 1,
.enable_fast_isel = 0,
.enable_global_isel = 0,
.global_isel_abort_mode = .enable,
.swift_async_frame_pointer = .always,
.use_init_array = 0,
.disable_integrated_assembler = 0,
.function_sections = 0,
.data_sections = 0,
.ignore_xcoff_visibility = 0,
.xcoff_traceback_table = 1,
.unique_section_names = 1,
.unique_basic_block_section_names = 0,
.separate_named_sections = 0,
.trap_unreachable = 0,
.no_trap_after_noreturn = 0,
.tls_size = 0,
.emulated_tls = 0,
.enable_tls_descriptors = 0,
.enable_ipra = 0,
.emit_stack_size_section = 0,
.enable_machine_outliner = 0,
.enable_machine_function_splitter = 0,
.supports_default_outlining = 0,
.emit_address_significance_table = 0,
.bb_address_map = 0,
.bb_sections = .none,
.emit_call_site_information = 0,
.supports_debug_entry_values = 0,
.enable_debug_entry_values = 0,
.value_tracking_variable_locations = 0,
.force_dwarf_frame_section = 0,
.xray_function_index = 1,
.debug_strict_dwarf = 0,
.hotpatch = 0,
.ppc_gen_scalar_mass_entries = 0,
.jmc_instrument = 0,
.enable_cfi_fixup = 0,
.mis_expect = 0,
.xcoff_read_only_pointers = 0,
.float_abi = .default,
.thread_model = .posix,
},
.flags1 = .{
.fp_op_fusion_mode = .standard,
.eabi_version = .default,
.debugger_kind = .default,
.exception_handling = .none,
},
.loop_alignment = 0,
.mc = .{
.abi_name = .{},
.assembly_language = .{},
.split_dwarf_file = .{},
.as_secure_log_file = .{},
.argv0 = null,
.argv_pointer = null,
.argv_count = 0,
.integrated_assembler_search_path_pointer = null,
.integrated_assembler_search_path_count = 0,
.flags = .{
.relax_all = 0,
.no_exec_stack = 0,
.fatal_warnings = 0,
.no_warn = 0,
.no_deprecated_warn = 0,
.no_type_check = 0,
.save_temp_labels = 0,
.incremental_linker_compatible = 0,
.fdpic = 0,
.show_mc_encoding = 0,
.show_mc_inst = 0,
.asm_verbose = 0,
.preserve_asm_comments = 1,
.dwarf64 = 0,
.crel = 0,
.x86_relax_relocations = 1,
.x86_sse2_avx = 0,
.emit_dwarf_unwind = .default,
.use_dwarf_directory = .default,
.debug_compression_type = .none,
.emit_compact_unwind_non_canonical = 0,
.ppc_use_full_register_names = 0,
},
},
};
}
};
pub const Machine = opaque {
@ -176,12 +290,14 @@ pub const Target = opaque {
relocation_model: RelocationModel,
optimization_level: CodeGenerationOptimizationLevel,
jit: bool,
reserved: u32 = 0,
};
reserved: [padding_byte_count]u8 = [1]u8{0} ** padding_byte_count,
comptime {
assert(@sizeOf(Create) == 192);
}
const padding_byte_count = 4;
comptime {
assert(@sizeOf(Create) == 192);
assert(padding_byte_count == 4);
}
};
pub fn create(options: Create, error_message: *String) ?*Target.Machine {
const target_machine = api.llvm_create_target_machine(&options, error_message);
@ -189,6 +305,7 @@ pub const Target = opaque {
}
};
};
pub const MCTargetOptions = extern struct {
abi_name: String,
assembly_language: String,
@ -234,13 +351,91 @@ pub const MCTargetOptions = extern struct {
},
emit_compact_unwind_non_canonical: u1,
ppc_use_full_register_names: u1,
reserved: u7 = 0,
reserved: PaddingType = 0,
},
const padding_bit_count = 7;
const PaddingType = @Type(.{
.int = .{
.signedness = .unsigned,
.bits = 7,
},
});
comptime {
assert(@sizeOf(MCTargetOptions) == 112);
assert(padding_bit_count == 7);
}
};
comptime {
assert(@sizeOf(MCTargetOptions) == 112);
}
const OptimizationLevel = enum(u3) {
O0 = 0,
O1 = 1,
O2 = 2,
O3 = 3,
Os = 4,
Oz = 5,
fn prefers_size(optimization_level: OptimizationLevel) bool {
return switch (optimization_level) {
.O0, .O1, .Os, .Oz => true,
.O2, .O3 => false,
};
}
fn prefers_speed(optimization_level: OptimizationLevel) bool {
return !prefers_size(optimization_level);
}
};
/// This is ABI-compatible with C++
pub const OptimizationOptions = packed struct(u64) {
optimization_level: OptimizationLevel,
debug_info: u1,
loop_unrolling: u1,
loop_interleaving: u1,
loop_vectorization: u1,
slp_vectorization: u1,
merge_functions: u1,
call_graph_profile: u1,
unified_lto: u1,
assignment_tracking: u1,
verify_module: u1,
reserved: PaddingType = 0,
const padding_bit_count = 51;
const PaddingType = @Type(.{
.int = .{
.signedness = .unsigned,
.bits = padding_bit_count,
},
});
comptime {
assert(@sizeOf(OptimizationOptions) == @sizeOf(u64));
assert(padding_bit_count == 51);
}
const OptimizationOptionsCreate = packed struct {
optimization_level: OptimizationLevel,
debug_info: u1,
};
pub fn default(create: OptimizationOptionsCreate) OptimizationOptions {
const pref_speed = @intFromBool(create.optimization_level.prefers_speed());
return .{
.optimization_level = create.optimization_level,
.debug_info = create.debug_info,
.loop_unrolling = pref_speed,
.loop_interleaving = pref_speed,
.loop_vectorization = pref_speed,
.slp_vectorization = pref_speed,
.merge_functions = pref_speed,
.call_graph_profile = 0,
.unified_lto = 0,
.assignment_tracking = create.debug_info,
.verify_module = @intFromBool(lib.optimization_mode == .ReleaseSafe or lib.optimization_mode == .Debug),
};
}
};
pub const Architecture = enum {
X86,
@ -271,6 +466,8 @@ pub const BasicBlock = opaque {};
pub const Module = opaque {
pub const create_di_builder = api.LLVMCreateDIBuilder;
pub const set_target = api.llvm_module_set_target;
pub const run_optimization_pipeline = api.llvm_module_run_optimization_pipeline;
pub fn to_string(module: *Module) []const u8 {
return api.llvm_module_to_string(module).to_slice().?;
@ -533,105 +730,7 @@ pub fn experiment() void {
var error_message: String = undefined;
const target_machine = Target.Machine.create(.{
.target_options = .{
.binutils_version = .{ 0, 0 },
.flags0 = .{
.unsafe_fp_math = 0,
.no_infs_fp_math = 0,
.no_nans_fp_math = 0,
.no_trapping_fp_math = 1,
.no_signed_zeroes_fp_math = 0,
.approx_func_fp_match = 0,
.enable_aix_extended_altivec_abi = 0,
.honor_sign_dependent_rounding_fp_math = 0,
.no_zeroes_in_bss = 0,
.guaranteed_tail_call_optimization = 0,
.stack_symbol_ordering = 1,
.enable_fast_isel = 0,
.enable_global_isel = 0,
.global_isel_abort_mode = .enable,
.swift_async_frame_pointer = .always,
.use_init_array = 0,
.disable_integrated_assembler = 0,
.function_sections = 0,
.data_sections = 0,
.ignore_xcoff_visibility = 0,
.xcoff_traceback_table = 1,
.unique_section_names = 1,
.unique_basic_block_section_names = 0,
.separate_named_sections = 0,
.trap_unreachable = 0,
.no_trap_after_noreturn = 0,
.tls_size = 0,
.emulated_tls = 0,
.enable_tls_descriptors = 0,
.enable_ipra = 0,
.emit_stack_size_section = 0,
.enable_machine_outliner = 0,
.enable_machine_function_splitter = 0,
.supports_default_outlining = 0,
.emit_address_significance_table = 0,
.bb_address_map = 0,
.bb_sections = .none,
.emit_call_site_information = 0,
.supports_debug_entry_values = 0,
.enable_debug_entry_values = 0,
.value_tracking_variable_locations = 0,
.force_dwarf_frame_section = 0,
.xray_function_index = 1,
.debug_strict_dwarf = 0,
.hotpatch = 0,
.ppc_gen_scalar_mass_entries = 0,
.jmc_instrument = 0,
.enable_cfi_fixup = 0,
.mis_expect = 0,
.xcoff_read_only_pointers = 0,
.float_abi = .default,
.thread_model = .posix,
},
.flags1 = .{
.fp_op_fusion_mode = .standard,
.eabi_version = .default,
.debugger_kind = .default,
.exception_handling = .none,
},
.loop_alignment = 0,
.mc = .{
.abi_name = .{},
.assembly_language = .{},
.split_dwarf_file = .{},
.as_secure_log_file = .{},
.argv0 = null,
.argv_pointer = null,
.argv_count = 0,
.integrated_assembler_search_path_pointer = null,
.integrated_assembler_search_path_count = 0,
.flags = .{
.relax_all = 0,
.no_exec_stack = 0,
.fatal_warnings = 0,
.no_warn = 0,
.no_deprecated_warn = 0,
.no_type_check = 0,
.save_temp_labels = 0,
.incremental_linker_compatible = 0,
.fdpic = 0,
.show_mc_encoding = 0,
.show_mc_inst = 0,
.asm_verbose = 0,
.preserve_asm_comments = 1,
.dwarf64 = 0,
.crel = 0,
.x86_relax_relocations = 1,
.x86_sse2_avx = 0,
.emit_dwarf_unwind = .default,
.use_dwarf_directory = .default,
.debug_compression_type = .none,
.emit_compact_unwind_non_canonical = 0,
.ppc_use_full_register_names = 0,
},
},
},
.target_options = Target.Options.default(),
.cpu_triple = String.from_slice(global.host_triple),
.cpu_model = String.from_slice(global.host_cpu_model),
.cpu_features = String.from_slice(global.host_cpu_features),
@ -642,5 +741,7 @@ pub fn experiment() void {
}, &error_message) orelse {
unreachable;
};
_ = target_machine;
module.set_target(target_machine);
module.run_optimization_pipeline(target_machine, OptimizationOptions.default(.{ .optimization_level = .O3, .debug_info = 1 }));
}

View File

@ -1,4 +1,5 @@
const builtin = @import("builtin");
pub const optimization_mode = builtin.mode;
const VariableArguments = @import("std").builtin.VaList;
extern "c" fn IsDebuggerPresent() bool;
extern "c" fn __errno_location() *c_int;

View File

@ -3,9 +3,19 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Frontend/Driver/CodeGenOptions.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/MC/TargetRegistry.h"
#define EXPORT extern "C"
@ -181,6 +191,8 @@ enum class BBLLVMDebugCompressionType : u8
zstd = 2,
};
#define BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT (7)
struct BBLLVMMCTargetOptions
{
BBLLVMString abi_name;
@ -214,9 +226,11 @@ struct BBLLVMMCTargetOptions
u32 debug_compression_type:2;
u32 emit_compact_unwind_non_canonical:1;
u32 ppc_use_full_register_names:1;
u32 reserved:7;
u32 reserved:BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT;
};
static_assert(sizeof(BBLLVMMCTargetOptions) == 112);
static_assert(BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT == 7);
enum class BBLLVMCodeModel : u8
{
@ -320,6 +334,8 @@ enum class BBLLVMExceptionHandling : u8
zos = 7,
};
#define BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT (21)
struct BBLLVMTargetOptions
{
u64 unsafe_fp_math:1;
@ -378,13 +394,18 @@ struct BBLLVMTargetOptions
u32 eabi_version:3;
u32 debugger_kind:3;
u32 exception_handling:3;
u32 reserved:21;
u32 reserved:BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT;
unsigned loop_alignment;
int binutils_version[2];
BBLLVMMCTargetOptions mc;
};
static_assert(sizeof(BBLLVMTargetOptions) == 136);
static_assert(BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT == 21);
#define BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT (4)
struct BBLLVMTargetMachineCreate
{
BBLLVMTargetOptions target_options;
@ -395,10 +416,11 @@ struct BBLLVMTargetMachineCreate
BBLLVMCodeModel code_model;
BBLLVMCodeGenerationOptimizationLevel optimization_level;
bool jit;
u32 reserved;
u8 reserved[BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT];
};
static_assert(sizeof(BBLLVMTargetMachineCreate) == 192);
static_assert(BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT == 4);
EXPORT TargetMachine* llvm_create_target_machine(const BBLLVMTargetMachineCreate& create, BBLLVMString* error_message)
{
@ -673,3 +695,119 @@ EXPORT TargetMachine* llvm_create_target_machine(const BBLLVMTargetMachineCreate
return target_machine;
}
EXPORT void llvm_module_set_target(Module& module, TargetMachine& target_machine)
{
module.setDataLayout(target_machine.createDataLayout());
auto& triple_string = target_machine.getTargetTriple().getTriple();
module.setTargetTriple(StringRef(triple_string));
}
enum class BBLLVMOptimizationLevel : u8
{
O0 = 0,
O1 = 1,
O2 = 2,
O3 = 3,
Os = 4,
Oz = 5,
};
#define BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT (51)
struct BBLLVMOptimizationOptions
{
u64 optimization_level:3;
u64 debug_info:1;
u64 loop_unrolling:1;
u64 loop_interleaving:1;
u64 loop_vectorization:1;
u64 slp_vectorization:1;
u64 merge_functions:1;
u64 call_graph_profile:1;
u64 unified_lto:1;
u64 assignment_tracking:1;
u64 verify_module:1;
u64 reserved:BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT;
};
static_assert(sizeof(BBLLVMOptimizationOptions) == sizeof(u64));
static_assert(BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT == 51);
EXPORT void llvm_module_run_optimization_pipeline(Module& module, TargetMachine& target_machine, BBLLVMOptimizationOptions options)
{
// TODO: PGO
// TODO: CS profile
PipelineTuningOptions pipeline_tuning_options;
pipeline_tuning_options.LoopUnrolling = options.loop_unrolling;
pipeline_tuning_options.LoopInterleaving = options.loop_interleaving;
pipeline_tuning_options.LoopVectorization = options.loop_vectorization;
pipeline_tuning_options.SLPVectorization = options.slp_vectorization;
pipeline_tuning_options.MergeFunctions = options.merge_functions;
pipeline_tuning_options.CallGraphProfile = options.call_graph_profile;
pipeline_tuning_options.UnifiedLTO = options.unified_lto;
// TODO: instrumentation
LoopAnalysisManager loop_analysis_manager;
FunctionAnalysisManager function_analysis_manager;
CGSCCAnalysisManager cgscc_analysis_manager;
ModuleAnalysisManager module_analysis_manager;
PassBuilder pass_builder(&target_machine, pipeline_tuning_options);
if (options.assignment_tracking && options.debug_info != 0)
{
pass_builder.registerPipelineStartEPCallback([&](ModulePassManager& MPM, OptimizationLevel Level) {
MPM.addPass(AssignmentTrackingPass());
});
}
Triple target_triple = target_machine.getTargetTriple(); // Need to make a copy, incoming bugfix: https://github.com/llvm/llvm-project/pull/127718
// TODO: add library (?)
std::unique_ptr<TargetLibraryInfoImpl> TLII(llvm::driver::createTLII(target_triple, driver::VectorLibrary::NoLibrary));
function_analysis_manager.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
pass_builder.registerModuleAnalyses(module_analysis_manager);
pass_builder.registerCGSCCAnalyses(cgscc_analysis_manager);
pass_builder.registerFunctionAnalyses(function_analysis_manager);
pass_builder.registerLoopAnalyses(loop_analysis_manager);
pass_builder.crossRegisterProxies(loop_analysis_manager, function_analysis_manager, cgscc_analysis_manager, module_analysis_manager);
ModulePassManager module_pass_manager;
if (options.verify_module)
{
module_pass_manager.addPass(VerifierPass());
}
bool thin_lto = false;
bool lto = false;
OptimizationLevel optimization_level;
switch ((BBLLVMOptimizationLevel)options.optimization_level)
{
case BBLLVMOptimizationLevel::O0: optimization_level = OptimizationLevel::O0; break;
case BBLLVMOptimizationLevel::O1: optimization_level = OptimizationLevel::O1; break;
case BBLLVMOptimizationLevel::O2: optimization_level = OptimizationLevel::O2; break;
case BBLLVMOptimizationLevel::O3: optimization_level = OptimizationLevel::O3; break;
case BBLLVMOptimizationLevel::Os: optimization_level = OptimizationLevel::Os; break;
case BBLLVMOptimizationLevel::Oz: optimization_level = OptimizationLevel::Oz; break;
}
// TODO: thin lto post-link
// TODO: instrument
if (thin_lto) {
__builtin_trap(); // TODO
} else if (lto) {
__builtin_trap(); // TODO
} else if (lto) {
__builtin_trap(); // TODO
} else {
module_pass_manager.addPass(pass_builder.buildPerModuleDefaultPipeline(optimization_level, lto));
}
// TODO: if emit bitcode/IR
module_pass_manager.run(module, module_analysis_manager);
}

View File

@ -72,6 +72,9 @@ pub extern fn llvm_host_cpu_name() llvm.String;
pub extern fn llvm_host_cpu_features() llvm.String;
pub extern fn llvm_create_target_machine(create: *const llvm.Target.Machine.Create, error_message: *llvm.String) ?*llvm.Target.Machine;
pub extern fn llvm_module_set_target(module: *llvm.Module, target_machine: *llvm.Target.Machine) void;
pub extern fn llvm_module_run_optimization_pipeline(module: *llvm.Module, target_machine: *llvm.Target.Machine, options: llvm.OptimizationOptions) void;
pub fn get_initializer(comptime llvm_arch: llvm.Architecture) type {
const arch_name = @tagName(llvm_arch);