From 4b0e53fbe1ac2211a0edee305caaa25e8731de13 Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Thu, 12 Aug 2021 22:17:02 -0700 Subject: [PATCH] cleaned all this shit, also added a demo to generate object files... --- CMakeLists.txt | 8 +++++ cmake.toml | 10 +++++- include/vm_lifters.hpp | 9 +++--- include/vmp_rtn.hpp | 71 ++++++++++++++++++++++++------------------ src/lifters/add.cpp | 3 +- src/lifters/jmp.cpp | 6 ++-- src/lifters/nand.cpp | 2 +- src/lifters/shr.cpp | 4 +-- src/lifters/vmexit.cpp | 3 +- src/main.cpp | 45 +++++++++++++++++++++++++- src/vmp_rtn.cpp | 41 +++++++++++++++--------- 11 files changed, 141 insertions(+), 61 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59069e8..d5561ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,8 @@ target_compile_definitions(vmdevirt PRIVATE target_include_directories(vmdevirt PRIVATE include "dependencies/llvm/llvm/include/" + "dependencies/llvm/llvm/lib/Target/X86/" + "build/dependencies/llvm/llvm/lib/Target/X86" "build/dependencies/llvm/llvm/include/" ) @@ -96,6 +98,12 @@ target_link_libraries(vmdevirt PRIVATE LLVMCodeGen LLVMSupport LLVMLinker + LLVMX86CodeGen + LLVMX86AsmParser + LLVMX86Desc + LLVMX86Disassembler + LLVMX86Info + LLVMAsmParser ) unset(CMKR_TARGET) diff --git a/cmake.toml b/cmake.toml index 1be73bc..41fdb54 100644 --- a/cmake.toml +++ b/cmake.toml @@ -12,6 +12,8 @@ sources = [ include-directories = [ "include", "dependencies/llvm/llvm/include/", + "dependencies/llvm/llvm/lib/Target/X86/", + "build/dependencies/llvm/llvm/lib/Target/X86", "build/dependencies/llvm/llvm/include/", ] link-libraries = [ @@ -20,7 +22,13 @@ link-libraries = [ "LLVMCore", "LLVMCodeGen", "LLVMSupport", - "LLVMLinker" + "LLVMLinker", + "LLVMX86CodeGen", + "LLVMX86AsmParser", + "LLVMX86Desc", + "LLVMX86Disassembler", + "LLVMX86Info", + "LLVMAsmParser" ] compile-definitions = [ "NOMINMAX" diff --git a/include/vm_lifters.hpp b/include/vm_lifters.hpp index 1bbaec2..950e519 100644 --- a/include/vm_lifters.hpp +++ b/include/vm_lifters.hpp @@ -70,11 +70,10 @@ namespace vm { vm::handler::JMP, &jmp }, { vm::handler::VMEXIT, &vmexit } }; - static vm::llvm_value_t *and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *result ); - static vm::llvm_value_t *add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs, - vm::llvm_value_t *rhs ); - static vm::llvm_value_t *shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs, - vm::llvm_value_t *rhs, vm::llvm_value_t *result ); + static llvm::Value *and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *result ); + static llvm::Value *add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs ); + static llvm::Value *shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs, + llvm::Value *result ); public: static lifters_t *get_instance( void ) diff --git a/include/vmp_rtn.hpp b/include/vmp_rtn.hpp index c309fc6..e9f816b 100644 --- a/include/vmp_rtn.hpp +++ b/include/vmp_rtn.hpp @@ -3,67 +3,78 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Verifier.h" + +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" + +#include "X86TargetMachine.h" #include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" -namespace vm +namespace llvm { - // Obsessive-compulsive disorder is characterized by unreasonable - // thoughts and fears (obsessions) that lead to compulsive behaviors. - using llvm_type_t = llvm::Type; - using llvm_value_t = llvm::Value; - using llvm_module_t = llvm::Module; - using llvm_context_t = llvm::LLVMContext; - using llvm_function_t = llvm::Function; - using llvm_irbuilder_t = llvm::IRBuilder<>; - using llvm_alloca_inst_t = llvm::AllocaInst; - using llvm_basic_block_t = llvm::BasicBlock; - using llvm_global_value_t = llvm::GlobalValue; + extern "C" void LLVMInitializeX86TargetInfo(); + extern "C" void LLVMInitializeX86Target(); + extern "C" void LLVMInitializeX86TargetMC(); + extern "C" void LLVMInitializeX86AsmParser(); + extern "C" void LLVMInitializeX86AsmPrinter(); +} // namespace llvm +namespace vm +{ class vmp_rtn_t { friend class lifters_t; public: - explicit vmp_rtn_t( llvm_context_t *llvm_ctx, llvm_module_t *llvm_module, vm::ctx_t *vm_ctx, + explicit vmp_rtn_t( llvm::LLVMContext *llvm_ctx, llvm::Module *llvm_module, vm::ctx_t *vm_ctx, std::uintptr_t rtn_begin, std::vector< vm::instrs::code_block_t > vmp2_code_blocks ); - llvm_function_t *lift( void ); + llvm::Function *lift( void ); private: - llvm_context_t *llvm_ctx; - llvm_module_t *llvm_module; - llvm_function_t *llvm_fptr; - llvm_alloca_inst_t *flags, *stack; + llvm::LLVMContext *llvm_ctx; + llvm::Module *llvm_module; + llvm::Function *llvm_fptr; + llvm::AllocaInst *flags, *stack; vm::ctx_t *vm_ctx; std::uintptr_t rtn_begin; - std::shared_ptr< llvm_irbuilder_t > ir_builder; + std::shared_ptr< llvm::IRBuilder<> > ir_builder; - std::vector< llvm_alloca_inst_t * > virtual_registers; - std::vector< std::pair< std::uintptr_t, llvm_basic_block_t * > > llvm_code_blocks; + std::vector< llvm::AllocaInst * > virtual_registers; + std::vector< std::pair< std::uintptr_t, llvm::BasicBlock * > > llvm_code_blocks; std::vector< vm::instrs::code_block_t > vmp2_code_blocks; - void push( std::uint8_t byte_size, llvm_value_t *input_val ); + void push( std::uint8_t byte_size, llvm::Value *input_val ); llvm::Value *pop( std::uint8_t byte_size ); - llvm::Value *load_value( std::uint8_t byte_size, llvm_global_value_t *global ); - llvm::Value *load_value( std::uint8_t byte_size, llvm_alloca_inst_t *var ); + llvm::Value *load_value( std::uint8_t byte_size, llvm::GlobalValue *global ); + llvm::Value *load_value( std::uint8_t byte_size, llvm::AllocaInst *var ); - llvm_value_t *compute_sf( std::uint8_t byte_size, llvm_value_t *val ); - llvm_value_t *compute_zf( std::uint8_t byte_size, llvm_value_t *val ); - llvm_value_t *compute_pf( std::uint8_t byte_size, llvm_value_t *val ); - llvm_value_t *combine_flags( llvm_value_t *cf, llvm_value_t *pf, llvm_value_t *af, llvm_value_t *zf, - llvm_value_t *sf, llvm_value_t *of ); + llvm::Value *compute_sf( std::uint8_t byte_size, llvm::Value *val ); + llvm::Value *compute_zf( std::uint8_t byte_size, llvm::Value *val ); + llvm::Value *compute_pf( std::uint8_t byte_size, llvm::Value *val ); + llvm::Value *combine_flags( llvm::Value *cf, llvm::Value *pf, llvm::Value *af, llvm::Value *zf, llvm::Value *sf, + llvm::Value *of ); void create_virtual_registers( void ); void create_routine( void ); diff --git a/src/lifters/add.cpp b/src/lifters/add.cpp index 85903bb..4c25648 100644 --- a/src/lifters/add.cpp +++ b/src/lifters/add.cpp @@ -2,8 +2,7 @@ namespace vm { - vm::llvm_value_t *lifters_t::add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs, - vm::llvm_value_t *rhs ) + llvm::Value *lifters_t::add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs ) { auto op_size = llvm::IntegerType::get( *rtn->llvm_ctx, byte_size * 8 ); std::vector< llvm::Type * > intrinsic_arg_types; diff --git a/src/lifters/jmp.cpp b/src/lifters/jmp.cpp index 3ed5b33..5ccfe32 100644 --- a/src/lifters/jmp.cpp +++ b/src/lifters/jmp.cpp @@ -20,7 +20,7 @@ namespace vm // find the first branch basic block... auto bb1 = std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(), - [ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool { + [ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool { return block_data.first == vm_code_block.jcc.block_addr[ 0 ]; } ); @@ -30,7 +30,7 @@ namespace vm // find the second branch basic block... auto bb2 = std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(), - [ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool { + [ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool { return block_data.first == vm_code_block.jcc.block_addr[ 1 ]; } ); @@ -44,7 +44,7 @@ namespace vm auto rva = rtn->pop( 8 ); auto bb_data = std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(), - [ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool { + [ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool { return block_data.first == vm_code_block.jcc.block_addr[ 0 ]; } ); diff --git a/src/lifters/nand.cpp b/src/lifters/nand.cpp index fd602f2..d4af883 100644 --- a/src/lifters/nand.cpp +++ b/src/lifters/nand.cpp @@ -2,7 +2,7 @@ namespace vm { - vm::llvm_value_t *lifters_t::and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *result ) + llvm::Value *lifters_t::and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *result ) { auto cf = llvm::ConstantInt::get( llvm::IntegerType::get( *rtn->llvm_ctx, 64 ), 0 ); auto of = llvm::ConstantInt::get( llvm::IntegerType::get( *rtn->llvm_ctx, 64 ), 0 ); diff --git a/src/lifters/shr.cpp b/src/lifters/shr.cpp index dc11e46..4ffae00 100644 --- a/src/lifters/shr.cpp +++ b/src/lifters/shr.cpp @@ -3,8 +3,8 @@ namespace vm { // our undefined behavior is that we don't model cases where the shift count is zero... - vm::llvm_value_t *lifters_t::shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs, - vm::llvm_value_t *rhs, vm::llvm_value_t *result ) + llvm::Value *lifters_t::shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs, + llvm::Value *result ) { auto op_size = llvm::IntegerType::get( *rtn->llvm_ctx, byte_size * 8 ); auto msb = rtn->ir_builder->CreateLShr( lhs, ( byte_size * 8 ) - 1 ); diff --git a/src/lifters/vmexit.cpp b/src/lifters/vmexit.cpp index ee1bb99..f7265e5 100644 --- a/src/lifters/vmexit.cpp +++ b/src/lifters/vmexit.cpp @@ -4,5 +4,6 @@ namespace vm { lifters_t::lifter_callback_t lifters_t::vmexit = [ & ]( vm::vmp_rtn_t *rtn, const vm::instrs::code_block_t &vm_code_block, - const vm::instrs::virt_instr_t &vinstr, llvm::IRBuilder<> *ir_builder ) { ir_builder->CreateRetVoid(); }; + const vm::instrs::virt_instr_t &vinstr, + llvm::IRBuilder<> *ir_builder ) { ir_builder->CreateRet( ir_builder->CreateLoad( rtn->stack ) ); }; } diff --git a/src/main.cpp b/src/main.cpp index 77043ad..faf54a4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -77,5 +77,48 @@ int main( int argc, const char *argv[] ) vm::vmp_rtn_t vmp_rtn( &llvm_ctx, &llvm_module, &vm_ctx, first_block->vip_begin, vmp_code_blocks ); auto func = vmp_rtn.lift(); - func->print( llvm::outs(), nullptr ); + llvm_module.print( llvm::outs(), nullptr ); + + llvm::LLVMInitializeX86TargetInfo(); + llvm::LLVMInitializeX86Target(); + llvm::LLVMInitializeX86TargetMC(); + llvm::LLVMInitializeX86AsmParser(); + llvm::LLVMInitializeX86AsmPrinter(); + + auto TargetTriple = sys::getDefaultTargetTriple(); + llvm_module.setTargetTriple( TargetTriple ); + + std::string Error; + auto Target = TargetRegistry::lookupTarget( TargetTriple, Error ); + + auto CPU = "generic"; + auto Features = ""; + + TargetOptions opt; + auto RM = Optional< Reloc::Model >(); + auto TheTargetMachine = Target->createTargetMachine( TargetTriple, CPU, Features, opt, RM ); + + llvm_module.setDataLayout( TheTargetMachine->createDataLayout() ); + + auto Filename = "output.o"; + std::error_code EC; + raw_fd_ostream dest( Filename, EC, sys::fs::OF_None ); + + if ( EC ) + { + errs() << "Could not open file: " << EC.message(); + return 1; + } + + legacy::PassManager pass; + auto FileType = CGFT_ObjectFile; + + if ( TheTargetMachine->addPassesToEmitFile( pass, dest, nullptr, FileType ) ) + { + errs() << "TheTargetMachine can't emit a file of this type"; + return 1; + } + + pass.run( llvm_module ); + dest.flush(); } \ No newline at end of file diff --git a/src/vmp_rtn.cpp b/src/vmp_rtn.cpp index a14f127..874218d 100644 --- a/src/vmp_rtn.cpp +++ b/src/vmp_rtn.cpp @@ -3,14 +3,14 @@ namespace vm { - vmp_rtn_t::vmp_rtn_t( llvm_context_t *llvm_ctx, llvm_module_t *llvm_module, vm::ctx_t *vm_ctx, + vmp_rtn_t::vmp_rtn_t( llvm::LLVMContext *llvm_ctx, llvm::Module *llvm_module, vm::ctx_t *vm_ctx, std::uintptr_t rtn_begin, std::vector< vm::instrs::code_block_t > vmp2_code_blocks ) : llvm_ctx( llvm_ctx ), llvm_module( llvm_module ), vm_ctx( vm_ctx ), rtn_begin( rtn_begin ), vmp2_code_blocks( vmp2_code_blocks ) { // do not change the ordering of these function calls... create_routine(); - ir_builder = std::make_shared< llvm_irbuilder_t >( *llvm_ctx ); + ir_builder = std::make_shared< llvm::IRBuilder<> >( *llvm_ctx ); ir_builder->SetInsertPoint( llvm_code_blocks[ 0 ].second ); flags = ir_builder->CreateAlloca( ir_builder->getInt64Ty(), nullptr, "flags" ); stack = ir_builder->CreateAlloca( llvm::PointerType::get( ir_builder->getInt8Ty(), 0ull ), nullptr, "sp" ); @@ -32,7 +32,7 @@ namespace vm { // function has no arguments and returns void... maybe change this in the future as i learn // more and more LLVM... - auto func_ty = llvm::FunctionType::get( llvm::Type::getVoidTy( *llvm_ctx ), + auto func_ty = llvm::FunctionType::get( llvm::PointerType::getInt8PtrTy( *llvm_ctx ), { llvm::PointerType::getInt8PtrTy( *llvm_ctx ) }, false ); // convert the rtn_begin address to a hex string and prepend "rtn_" to it... @@ -51,7 +51,7 @@ namespace vm } } - void vmp_rtn_t::push( std::uint8_t num_bytes, llvm_value_t *val ) + void vmp_rtn_t::push( std::uint8_t num_bytes, llvm::Value *val ) { // sub rsp, num_bytes auto rsp_addr = ir_builder->CreateLoad( stack ); @@ -67,7 +67,7 @@ namespace vm ir_builder->CreateStore( val, rsp_cast_ptr ); } - llvm_value_t *vmp_rtn_t::pop( std::uint8_t num_bytes ) + llvm::Value *vmp_rtn_t::pop( std::uint8_t num_bytes ) { // mov rax, [rsp] auto rsp_addr = ir_builder->CreateLoad( stack ); @@ -85,7 +85,7 @@ namespace vm return pop_val; } - llvm_value_t *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm_global_value_t *var ) + llvm::Value *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm::GlobalValue *var ) { if ( byte_size * 8 != var->getType()->getPrimitiveSizeInBits() ) { @@ -97,7 +97,7 @@ namespace vm return ir_builder->CreateLoad( llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ), var ); } - llvm_value_t *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm_alloca_inst_t *var ) + llvm::Value *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm::AllocaInst *var ) { if ( byte_size * 8 != var->getType()->getPrimitiveSizeInBits() ) { @@ -109,7 +109,7 @@ namespace vm return ir_builder->CreateLoad( llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ), var ); } - llvm_function_t *vmp_rtn_t::lift( void ) + llvm::Function *vmp_rtn_t::lift( void ) { auto &code_blocks = llvm_fptr->getBasicBlockList(); auto lifters = vm::lifters_t::get_instance(); @@ -117,35 +117,46 @@ namespace vm for ( auto idx = 0u; idx < code_blocks.size(); ++idx ) { ir_builder->SetInsertPoint( llvm_code_blocks[ idx ].second ); + for ( auto &vinstr : vmp2_code_blocks[ idx ].vinstrs ) { if ( !lifters->lift( this, vmp2_code_blocks[ idx ], vinstr, ir_builder.get() ) ) { - std::printf( "> failed to devirtualize virtual instruction with opcode = %d\n", vinstr.opcode ); - llvm_module->print( llvm::outs(), nullptr ); + std::printf( + "> failed to devirtualize virtual instruction with opcode = %d, handler table rva = 0x%x\n", + vinstr.opcode, vinstr.trace_data.regs.r12 - vinstr.trace_data.regs.r13 ); + return nullptr; } } } + // TODO: update this list of optimizations to add more... + llvm::legacy::FunctionPassManager fpm( llvm_module ); + fpm.add( llvm::createInstructionCombiningPass() ); + fpm.add( llvm::createReassociatePass() ); + fpm.add( llvm::createGVNPass() ); + fpm.add( llvm::createCFGSimplificationPass() ); + fpm.doInitialization(); + fpm.run( *llvm_fptr ); return llvm_fptr; } - llvm_value_t *vmp_rtn_t::compute_sf( std::uint8_t byte_size, llvm_value_t *val ) + llvm::Value *vmp_rtn_t::compute_sf( std::uint8_t byte_size, llvm::Value *val ) { auto op_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ); auto msb = ir_builder->CreateLShr( val, ( byte_size * 8 ) - 1 ); return ir_builder->CreateZExt( msb, llvm::IntegerType::get( *llvm_ctx, 64 ) ); } - llvm_value_t *vmp_rtn_t::compute_zf( std::uint8_t byte_size, llvm_value_t *val ) + llvm::Value *vmp_rtn_t::compute_zf( std::uint8_t byte_size, llvm::Value *val ) { auto op_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ); auto is_zero = ir_builder->CreateICmpEQ( val, llvm::ConstantInt::get( op_size, 0 ) ); return ir_builder->CreateZExt( is_zero, llvm::IntegerType::get( *llvm_ctx, 64 ) ); } - llvm_value_t *vmp_rtn_t::compute_pf( std::uint8_t byte_size, llvm_value_t *val ) + llvm::Value *vmp_rtn_t::compute_pf( std::uint8_t byte_size, llvm::Value *val ) { auto operand_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ); auto popcount_intrinsic = llvm::Intrinsic::getDeclaration( llvm_module, llvm::Intrinsic::ctpop, @@ -156,8 +167,8 @@ namespace vm return ir_builder->CreateCall( popcount_intrinsic, { extended_bits } ); } - llvm_value_t *vmp_rtn_t::combine_flags( llvm_value_t *cf, llvm_value_t *pf, llvm_value_t *af, llvm_value_t *zf, - llvm_value_t *sf, llvm_value_t *of ) + llvm::Value *vmp_rtn_t::combine_flags( llvm::Value *cf, llvm::Value *pf, llvm::Value *af, llvm::Value *zf, + llvm::Value *sf, llvm::Value *of ) { auto shifted_pf = ir_builder->CreateShl( pf, 2, "shifted_pf", true, true ); auto shifted_af = ir_builder->CreateShl( llvm::ConstantInt::get( llvm::IntegerType::get( *llvm_ctx, 64 ), 0 ),