From b7eff075fc41da5aecdc95bf78eb310e293c651f Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Tue, 18 Feb 2025 17:52:00 -0600 Subject: [PATCH] Optimization pipeline --- src/LLVM.zig | 321 +++++++++++++++++++++++++++++++---------------- src/lib.zig | 1 + src/llvm.cpp | 144 ++++++++++++++++++++- src/llvm_api.zig | 3 + 4 files changed, 356 insertions(+), 113 deletions(-) diff --git a/src/LLVM.zig b/src/LLVM.zig index 3e66d4d..9f2b3fa 100644 --- a/src/LLVM.zig +++ b/src/LLVM.zig @@ -158,11 +158,125 @@ pub const Target = opaque { aix = 6, zos = 7, }, - reserved: u21 = 0, + reserved: PaddingType = 0, }, loop_alignment: c_uint, binutils_version: [2]c_int, mc: MCTargetOptions, + + const padding_bit_count = 21; + const PaddingType = @Type(.{ + .int = .{ + .signedness = .unsigned, + .bits = padding_bit_count, + }, + }); + comptime { + assert(@sizeOf(Target.Options) == 136); + assert(padding_bit_count == 21); + } + + pub fn default() Target.Options { + return .{ + .binutils_version = .{ 0, 0 }, + .flags0 = .{ + .unsafe_fp_math = 0, + .no_infs_fp_math = 0, + .no_nans_fp_math = 0, + .no_trapping_fp_math = 1, + .no_signed_zeroes_fp_math = 0, + .approx_func_fp_match = 0, + .enable_aix_extended_altivec_abi = 0, + .honor_sign_dependent_rounding_fp_math = 0, + .no_zeroes_in_bss = 0, + .guaranteed_tail_call_optimization = 0, + .stack_symbol_ordering = 1, + .enable_fast_isel = 0, + .enable_global_isel = 0, + .global_isel_abort_mode = .enable, + .swift_async_frame_pointer = .always, + .use_init_array = 0, + .disable_integrated_assembler = 0, + .function_sections = 0, + .data_sections = 0, + .ignore_xcoff_visibility = 0, + .xcoff_traceback_table = 1, + .unique_section_names = 1, + .unique_basic_block_section_names = 0, + .separate_named_sections = 0, + .trap_unreachable = 0, + .no_trap_after_noreturn = 0, + .tls_size = 0, + .emulated_tls = 0, + .enable_tls_descriptors = 0, + .enable_ipra = 0, + .emit_stack_size_section = 0, + .enable_machine_outliner = 0, + .enable_machine_function_splitter = 0, + .supports_default_outlining = 0, + .emit_address_significance_table = 0, + .bb_address_map = 0, + .bb_sections = .none, + .emit_call_site_information = 0, + .supports_debug_entry_values = 0, + .enable_debug_entry_values = 0, + .value_tracking_variable_locations = 0, + .force_dwarf_frame_section = 0, + .xray_function_index = 1, + .debug_strict_dwarf = 0, + .hotpatch = 0, + .ppc_gen_scalar_mass_entries = 0, + .jmc_instrument = 0, + .enable_cfi_fixup = 0, + .mis_expect = 0, + .xcoff_read_only_pointers = 0, + .float_abi = .default, + .thread_model = .posix, + }, + .flags1 = .{ + .fp_op_fusion_mode = .standard, + .eabi_version = .default, + .debugger_kind = .default, + .exception_handling = .none, + }, + .loop_alignment = 0, + .mc = .{ + .abi_name = .{}, + .assembly_language = .{}, + .split_dwarf_file = .{}, + .as_secure_log_file = .{}, + .argv0 = null, + .argv_pointer = null, + .argv_count = 0, + .integrated_assembler_search_path_pointer = null, + .integrated_assembler_search_path_count = 0, + .flags = .{ + .relax_all = 0, + .no_exec_stack = 0, + .fatal_warnings = 0, + .no_warn = 0, + .no_deprecated_warn = 0, + .no_type_check = 0, + .save_temp_labels = 0, + .incremental_linker_compatible = 0, + .fdpic = 0, + .show_mc_encoding = 0, + .show_mc_inst = 0, + .asm_verbose = 0, + .preserve_asm_comments = 1, + .dwarf64 = 0, + .crel = 0, + .x86_relax_relocations = 1, + .x86_sse2_avx = 0, + .emit_dwarf_unwind = .default, + .use_dwarf_directory = .default, + .debug_compression_type = .none, + .emit_compact_unwind_non_canonical = 0, + .ppc_use_full_register_names = 0, + }, + }, + }; + } }; pub const Machine = opaque { @@ -176,12 +290,14 @@ pub const Target = opaque { relocation_model: RelocationModel, optimization_level: CodeGenerationOptimizationLevel, jit: bool, - reserved: u32 = 0, - }; + reserved: [padding_byte_count]u8 = [1]u8{0} ** padding_byte_count, - comptime { - assert(@sizeOf(Create) == 192); - } + const padding_byte_count = 4; + comptime { + assert(@sizeOf(Create) == 192); + assert(padding_byte_count == 4); + } + }; pub fn create(options: Create, error_message: *String) ?*Target.Machine { const target_machine = api.llvm_create_target_machine(&options, error_message); @@ -189,6 +305,7 @@ pub const Target = opaque { } }; }; + pub const MCTargetOptions = extern struct { abi_name: String, assembly_language: String, @@ -234,13 +351,91 @@ pub const MCTargetOptions = extern struct { }, emit_compact_unwind_non_canonical: u1, ppc_use_full_register_names: u1, - reserved: u7 = 0, + reserved: PaddingType = 0, }, + + const padding_bit_count = 7; + const PaddingType = @Type(.{ + .int = .{ + .signedness = .unsigned, + .bits = 7, + }, + }); + comptime { + assert(@sizeOf(MCTargetOptions) == 112); + assert(padding_bit_count == 7); + } }; -comptime { - assert(@sizeOf(MCTargetOptions) == 112); -} +const OptimizationLevel = enum(u3) { + O0 = 0, + O1 = 1, + O2 = 2, + O3 = 3, + Os = 4, + Oz = 5, + + fn prefers_size(optimization_level: OptimizationLevel) bool { + return switch (optimization_level) { + .O0, .O1, .Os, .Oz => true, + .O2, .O3 => false, + }; + } + + fn prefers_speed(optimization_level: OptimizationLevel) bool { + return !prefers_size(optimization_level); + } +}; + +/// This is ABI-compatible with C++ +pub const OptimizationOptions = packed struct(u64) { + optimization_level: OptimizationLevel, + debug_info: u1, + loop_unrolling: u1, + loop_interleaving: u1, + loop_vectorization: u1, + slp_vectorization: u1, + merge_functions: u1, + call_graph_profile: u1, + unified_lto: u1, + assignment_tracking: u1, + verify_module: u1, + reserved: PaddingType = 0, + + const padding_bit_count = 51; + const PaddingType = @Type(.{ + .int = .{ + .signedness = .unsigned, + .bits = padding_bit_count, + }, + }); + + comptime { + assert(@sizeOf(OptimizationOptions) == @sizeOf(u64)); + assert(padding_bit_count == 51); + } + + const OptimizationOptionsCreate = packed struct { + optimization_level: OptimizationLevel, + debug_info: u1, + }; + pub fn default(create: OptimizationOptionsCreate) OptimizationOptions { + const pref_speed = @intFromBool(create.optimization_level.prefers_speed()); + return .{ + .optimization_level = create.optimization_level, + .debug_info = create.debug_info, + .loop_unrolling = pref_speed, + .loop_interleaving = pref_speed, + .loop_vectorization = pref_speed, + .slp_vectorization = pref_speed, + .merge_functions = pref_speed, + .call_graph_profile = 0, + .unified_lto = 0, + .assignment_tracking = create.debug_info, + .verify_module = @intFromBool(lib.optimization_mode == .ReleaseSafe or lib.optimization_mode == .Debug), + }; + } +}; pub const Architecture = enum { X86, @@ -271,6 +466,8 @@ pub const BasicBlock = opaque {}; pub const Module = opaque { pub const create_di_builder = api.LLVMCreateDIBuilder; + pub const set_target = api.llvm_module_set_target; + pub const run_optimization_pipeline = api.llvm_module_run_optimization_pipeline; pub fn to_string(module: *Module) []const u8 { return api.llvm_module_to_string(module).to_slice().?; @@ -533,105 +730,7 @@ pub fn experiment() void { var error_message: String = undefined; const target_machine = Target.Machine.create(.{ - .target_options = .{ - .binutils_version = .{ 0, 0 }, - .flags0 = .{ - .unsafe_fp_math = 0, - .no_infs_fp_math = 0, - .no_nans_fp_math = 0, - .no_trapping_fp_math = 1, - .no_signed_zeroes_fp_math = 0, - .approx_func_fp_match = 0, - .enable_aix_extended_altivec_abi = 0, - .honor_sign_dependent_rounding_fp_math = 0, - .no_zeroes_in_bss = 0, - .guaranteed_tail_call_optimization = 0, - .stack_symbol_ordering = 1, - .enable_fast_isel = 0, - .enable_global_isel = 0, - .global_isel_abort_mode = .enable, - .swift_async_frame_pointer = .always, - .use_init_array = 0, - .disable_integrated_assembler = 0, - .function_sections = 0, - .data_sections = 0, - .ignore_xcoff_visibility = 0, - .xcoff_traceback_table = 1, - .unique_section_names = 1, - .unique_basic_block_section_names = 0, - .separate_named_sections = 0, - .trap_unreachable = 0, - .no_trap_after_noreturn = 0, - .tls_size = 0, - .emulated_tls = 0, - .enable_tls_descriptors = 0, - .enable_ipra = 0, - .emit_stack_size_section = 0, - .enable_machine_outliner = 0, - .enable_machine_function_splitter = 0, - .supports_default_outlining = 0, - .emit_address_significance_table = 0, - .bb_address_map = 0, - .bb_sections = .none, - .emit_call_site_information = 0, - .supports_debug_entry_values = 0, - .enable_debug_entry_values = 0, - .value_tracking_variable_locations = 0, - .force_dwarf_frame_section = 0, - .xray_function_index = 1, - .debug_strict_dwarf = 0, - .hotpatch = 0, - .ppc_gen_scalar_mass_entries = 0, - .jmc_instrument = 0, - .enable_cfi_fixup = 0, - .mis_expect = 0, - .xcoff_read_only_pointers = 0, - .float_abi = .default, - .thread_model = .posix, - }, - .flags1 = .{ - .fp_op_fusion_mode = .standard, - .eabi_version = .default, - .debugger_kind = .default, - .exception_handling = .none, - }, - .loop_alignment = 0, - .mc = .{ - .abi_name = .{}, - .assembly_language = .{}, - .split_dwarf_file = .{}, - .as_secure_log_file = .{}, - .argv0 = null, - .argv_pointer = null, - .argv_count = 0, - .integrated_assembler_search_path_pointer = null, - .integrated_assembler_search_path_count = 0, - .flags = .{ - .relax_all = 0, - .no_exec_stack = 0, - .fatal_warnings = 0, - .no_warn = 0, - .no_deprecated_warn = 0, - .no_type_check = 0, - .save_temp_labels = 0, - .incremental_linker_compatible = 0, - .fdpic = 0, - .show_mc_encoding = 0, - .show_mc_inst = 0, - .asm_verbose = 0, - .preserve_asm_comments = 1, - .dwarf64 = 0, - .crel = 0, - .x86_relax_relocations = 1, - .x86_sse2_avx = 0, - .emit_dwarf_unwind = .default, - .use_dwarf_directory = .default, - .debug_compression_type = .none, - .emit_compact_unwind_non_canonical = 0, - .ppc_use_full_register_names = 0, - }, - }, - }, + .target_options = Target.Options.default(), .cpu_triple = String.from_slice(global.host_triple), .cpu_model = String.from_slice(global.host_cpu_model), .cpu_features = String.from_slice(global.host_cpu_features), @@ -642,5 +741,7 @@ pub fn experiment() void { }, &error_message) orelse { unreachable; }; - _ = target_machine; + module.set_target(target_machine); + + module.run_optimization_pipeline(target_machine, OptimizationOptions.default(.{ .optimization_level = .O3, .debug_info = 1 })); } diff --git a/src/lib.zig b/src/lib.zig index 366eea0..24c9e1c 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,4 +1,5 @@ const builtin = @import("builtin"); +pub const optimization_mode = builtin.mode; const VariableArguments = @import("std").builtin.VaList; extern "c" fn IsDebuggerPresent() bool; extern "c" fn __errno_location() *c_int; diff --git a/src/llvm.cpp b/src/llvm.cpp index 047ef00..848149e 100644 --- a/src/llvm.cpp +++ b/src/llvm.cpp @@ -3,9 +3,19 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" + +#include "llvm/Passes/PassBuilder.h" + +#include "llvm/Analysis/TargetLibraryInfo.h" + +#include "llvm/Frontend/Driver/CodeGenOptions.h" + #include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/SubtargetFeature.h" + #include "llvm/Target/TargetMachine.h" + #include "llvm/MC/TargetRegistry.h" #define EXPORT extern "C" @@ -181,6 +191,8 @@ enum class BBLLVMDebugCompressionType : u8 zstd = 2, }; +#define BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT (7) + struct BBLLVMMCTargetOptions { BBLLVMString abi_name; @@ -214,9 +226,11 @@ struct BBLLVMMCTargetOptions u32 debug_compression_type:2; u32 emit_compact_unwind_non_canonical:1; u32 ppc_use_full_register_names:1; - u32 reserved:7; + u32 reserved:BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT; }; + static_assert(sizeof(BBLLVMMCTargetOptions) == 112); +static_assert(BB_LLVM_MC_TARGET_OPTIONS_PADDING_BIT_COUNT == 7); enum class BBLLVMCodeModel : u8 { @@ -320,6 +334,8 @@ enum class BBLLVMExceptionHandling : u8 zos = 7, }; +#define BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT (21) + struct BBLLVMTargetOptions { u64 unsafe_fp_math:1; @@ -378,13 +394,18 @@ struct BBLLVMTargetOptions u32 eabi_version:3; u32 debugger_kind:3; u32 exception_handling:3; - u32 reserved:21; + u32 reserved:BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT; unsigned loop_alignment; int binutils_version[2]; BBLLVMMCTargetOptions mc; }; +static_assert(sizeof(BBLLVMTargetOptions) == 136); +static_assert(BB_LLVM_TARGET_OPTIONS_PADDING_BIT_COUNT == 21); + +#define BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT (4) + struct BBLLVMTargetMachineCreate { BBLLVMTargetOptions target_options; @@ -395,10 +416,11 @@ struct BBLLVMTargetMachineCreate BBLLVMCodeModel code_model; BBLLVMCodeGenerationOptimizationLevel optimization_level; bool jit; - u32 reserved; + u8 reserved[BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT]; }; static_assert(sizeof(BBLLVMTargetMachineCreate) == 192); +static_assert(BB_LLVM_TARGET_MACHINE_CREATE_PADDING_BYTE_COUNT == 4); EXPORT TargetMachine* llvm_create_target_machine(const BBLLVMTargetMachineCreate& create, BBLLVMString* error_message) { @@ -673,3 +695,119 @@ EXPORT TargetMachine* llvm_create_target_machine(const BBLLVMTargetMachineCreate return target_machine; } + +EXPORT void llvm_module_set_target(Module& module, TargetMachine& target_machine) +{ + module.setDataLayout(target_machine.createDataLayout()); + auto& triple_string = target_machine.getTargetTriple().getTriple(); + module.setTargetTriple(StringRef(triple_string)); +} + +enum class BBLLVMOptimizationLevel : u8 +{ + O0 = 0, + O1 = 1, + O2 = 2, + O3 = 3, + Os = 4, + Oz = 5, +}; + +#define BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT (51) +struct BBLLVMOptimizationOptions +{ + u64 optimization_level:3; + u64 debug_info:1; + u64 loop_unrolling:1; + u64 loop_interleaving:1; + u64 loop_vectorization:1; + u64 slp_vectorization:1; + u64 merge_functions:1; + u64 call_graph_profile:1; + u64 unified_lto:1; + u64 assignment_tracking:1; + u64 verify_module:1; + u64 reserved:BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT; +}; + +static_assert(sizeof(BBLLVMOptimizationOptions) == sizeof(u64)); +static_assert(BB_LLVM_OPTIMIZATION_OPTIONS_PADDING_BIT_COUNT == 51); + +EXPORT void llvm_module_run_optimization_pipeline(Module& module, TargetMachine& target_machine, BBLLVMOptimizationOptions options) +{ + // TODO: PGO + // TODO: CS profile + + PipelineTuningOptions pipeline_tuning_options; + pipeline_tuning_options.LoopUnrolling = options.loop_unrolling; + pipeline_tuning_options.LoopInterleaving = options.loop_interleaving; + pipeline_tuning_options.LoopVectorization = options.loop_vectorization; + pipeline_tuning_options.SLPVectorization = options.slp_vectorization; + pipeline_tuning_options.MergeFunctions = options.merge_functions; + pipeline_tuning_options.CallGraphProfile = options.call_graph_profile; + pipeline_tuning_options.UnifiedLTO = options.unified_lto; + + // TODO: instrumentation + + LoopAnalysisManager loop_analysis_manager; + FunctionAnalysisManager function_analysis_manager; + CGSCCAnalysisManager cgscc_analysis_manager; + ModuleAnalysisManager module_analysis_manager; + + PassBuilder pass_builder(&target_machine, pipeline_tuning_options); + + if (options.assignment_tracking && options.debug_info != 0) + { + pass_builder.registerPipelineStartEPCallback([&](ModulePassManager& MPM, OptimizationLevel Level) { + MPM.addPass(AssignmentTrackingPass()); + }); + } + + Triple target_triple = target_machine.getTargetTriple(); // Need to make a copy, incoming bugfix: https://github.com/llvm/llvm-project/pull/127718 + // TODO: add library (?) + std::unique_ptr TLII(llvm::driver::createTLII(target_triple, driver::VectorLibrary::NoLibrary)); + function_analysis_manager.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + + pass_builder.registerModuleAnalyses(module_analysis_manager); + pass_builder.registerCGSCCAnalyses(cgscc_analysis_manager); + pass_builder.registerFunctionAnalyses(function_analysis_manager); + pass_builder.registerLoopAnalyses(loop_analysis_manager); + pass_builder.crossRegisterProxies(loop_analysis_manager, function_analysis_manager, cgscc_analysis_manager, module_analysis_manager); + + ModulePassManager module_pass_manager; + + if (options.verify_module) + { + module_pass_manager.addPass(VerifierPass()); + } + + bool thin_lto = false; + bool lto = false; + + OptimizationLevel optimization_level; + switch ((BBLLVMOptimizationLevel)options.optimization_level) + { + case BBLLVMOptimizationLevel::O0: optimization_level = OptimizationLevel::O0; break; + case BBLLVMOptimizationLevel::O1: optimization_level = OptimizationLevel::O1; break; + case BBLLVMOptimizationLevel::O2: optimization_level = OptimizationLevel::O2; break; + case BBLLVMOptimizationLevel::O3: optimization_level = OptimizationLevel::O3; break; + case BBLLVMOptimizationLevel::Os: optimization_level = OptimizationLevel::Os; break; + case BBLLVMOptimizationLevel::Oz: optimization_level = OptimizationLevel::Oz; break; + } + + // TODO: thin lto post-link + // TODO: instrument + if (thin_lto) { + __builtin_trap(); // TODO + } else if (lto) { + __builtin_trap(); // TODO + } else if (lto) { + __builtin_trap(); // TODO + } else { + module_pass_manager.addPass(pass_builder.buildPerModuleDefaultPipeline(optimization_level, lto)); + } + + // TODO: if emit bitcode/IR + + module_pass_manager.run(module, module_analysis_manager); +} diff --git a/src/llvm_api.zig b/src/llvm_api.zig index 7b0965f..78fb0c6 100644 --- a/src/llvm_api.zig +++ b/src/llvm_api.zig @@ -72,6 +72,9 @@ pub extern fn llvm_host_cpu_name() llvm.String; pub extern fn llvm_host_cpu_features() llvm.String; pub extern fn llvm_create_target_machine(create: *const llvm.Target.Machine.Create, error_message: *llvm.String) ?*llvm.Target.Machine; +pub extern fn llvm_module_set_target(module: *llvm.Module, target_machine: *llvm.Target.Machine) void; + +pub extern fn llvm_module_run_optimization_pipeline(module: *llvm.Module, target_machine: *llvm.Target.Machine, options: llvm.OptimizationOptions) void; pub fn get_initializer(comptime llvm_arch: llvm.Architecture) type { const arch_name = @tagName(llvm_arch);