diff --git a/CMakeLists.txt b/CMakeLists.txt index 373e5f2..60acd20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ set(vmdevirt_SOURCES "") list(APPEND vmdevirt_SOURCES "src/devirt_t.cpp" + "src/devirt_utils.cpp" "src/lifters/add.cpp" "src/lifters/div.cpp" "src/lifters/jmp.cpp" @@ -59,6 +60,7 @@ list(APPEND vmdevirt_SOURCES "src/main.cpp" "src/vmp_rtn_t.cpp" "include/devirt_t.hpp" + "include/devirt_utils.hpp" "include/vm_lifters.hpp" "include/vmp_rtn_t.hpp" ) @@ -106,6 +108,7 @@ target_link_libraries(vmdevirt PRIVATE LLVMX86Disassembler LLVMX86Info LLVMAsmParser + LLVMPasses ) unset(CMKR_TARGET) diff --git a/cmake.toml b/cmake.toml index 41fdb54..d4afe36 100644 --- a/cmake.toml +++ b/cmake.toml @@ -28,7 +28,8 @@ link-libraries = [ "LLVMX86Desc", "LLVMX86Disassembler", "LLVMX86Info", - "LLVMAsmParser" + "LLVMAsmParser", + "LLVMPasses" ] compile-definitions = [ "NOMINMAX" diff --git a/include/devirt_t.hpp b/include/devirt_t.hpp index 63bce8f..42549c2 100644 --- a/include/devirt_t.hpp +++ b/include/devirt_t.hpp @@ -18,10 +18,12 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils.h" #include "X86TargetMachine.h" #include "llvm/Pass.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" diff --git a/include/devirt_utils.hpp b/include/devirt_utils.hpp new file mode 100644 index 0000000..90b122a --- /dev/null +++ b/include/devirt_utils.hpp @@ -0,0 +1,18 @@ +#include + +namespace devirt +{ + namespace util + { + /// + /// helper function to serialize vmp2 file data to vm::instr::code_block's... + /// + /// vector of pairs {vm enter offset, vector of code blocks} which gets filled up with + /// serialized data + /// a vector of bytes containing the vmp2 file... + /// returns true if serialization was successful + bool serialize_vmp2( + std::vector< std::pair< std::uint32_t, std::vector< vm::instrs::code_block_t > > > &virt_rtns, + std::vector< std::uint8_t > &vmp2file ); + } // namespace util +} // namespace devirt \ No newline at end of file diff --git a/src/devirt_t.cpp b/src/devirt_t.cpp index 8b2c5d2..7f7143f 100644 --- a/src/devirt_t.cpp +++ b/src/devirt_t.cpp @@ -67,7 +67,41 @@ namespace vm bool devirt_t::compile( std::vector< std::uint8_t > &obj ) { - return true; + llvm::legacy::FunctionPassManager pass_mgr( llvm_module ); + pass_mgr.add( llvm::createPromoteMemoryToRegisterPass() ); + pass_mgr.add( llvm::createCFGSimplificationPass() ); + pass_mgr.add( llvm::createSROAPass() ); + pass_mgr.add( llvm::createLoopSimplifyCFGPass() ); + pass_mgr.add( llvm::createNewGVNPass() ); + pass_mgr.add( llvm::createReassociatePass() ); + pass_mgr.add( llvm::createPartiallyInlineLibCallsPass() ); + pass_mgr.add( llvm::createDeadCodeEliminationPass() ); + pass_mgr.add( llvm::createCFGSimplificationPass() ); + pass_mgr.add( llvm::createInstructionCombiningPass() ); + pass_mgr.add( llvm::createFlattenCFGPass() ); + + for ( auto vmp_rtn : vmp_rtns ) + pass_mgr.run( *vmp_rtn->llvm_fptr ); + + // compile to native x86_64.... + llvm::TargetOptions opt; + llvm::SmallVector< char, 128 > buff; + llvm::raw_svector_ostream dest( buff ); + llvm::legacy::PassManager pass; + std::string error; + + auto target_triple = llvm::sys::getDefaultTargetTriple(); + llvm_module->setTargetTriple( target_triple ); + + auto target = llvm::TargetRegistry::lookupTarget( target_triple, error ); + auto reloc_model = llvm::Optional< llvm::Reloc::Model >(); + auto target_machine = target->createTargetMachine( target_triple, "generic", "", opt, reloc_model ); + llvm_module->setDataLayout( target_machine->createDataLayout() ); + + target_machine->addPassesToEmitFile( pass, dest, nullptr, llvm::CGFT_ObjectFile ); + auto result = pass.run( *llvm_module ); + obj.insert( obj.begin(), buff.begin(), buff.end() ); + return result; } llvm::Function *devirt_t::lift( std::uintptr_t rtn_begin, std::vector< vm::instrs::code_block_t > code_blocks ) diff --git a/src/devirt_utils.cpp b/src/devirt_utils.cpp new file mode 100644 index 0000000..a810ec2 --- /dev/null +++ b/src/devirt_utils.cpp @@ -0,0 +1,54 @@ +#include + +namespace devirt +{ + namespace util + { + bool serialize_vmp2( + std::vector< std::pair< std::uint32_t, std::vector< vm::instrs::code_block_t > > > &virt_rtns, + std::vector< std::uint8_t > &vmp2file ) + { + const auto file_header = reinterpret_cast< vmp2::v4::file_header * >( vmp2file.data() ); + + if ( file_header->version != vmp2::version_t::v4 ) + { + std::printf( "[!] invalid vmp2 file version... this build uses v3...\n" ); + return false; + } + + auto first_rtn = reinterpret_cast< vmp2::v4::rtn_t * >( reinterpret_cast< std::uintptr_t >( file_header ) + + file_header->rtn_offset ); + + for ( auto [ rtn_block, rtn_idx ] = std::pair{ first_rtn, 0ull }; rtn_idx < file_header->rtn_count; + ++rtn_idx, rtn_block = reinterpret_cast< vmp2::v4::rtn_t * >( + reinterpret_cast< std::uintptr_t >( rtn_block ) + rtn_block->size ) ) + { + virt_rtns.push_back( { rtn_block->vm_enter_offset, {} } ); + for ( auto [ code_block, block_idx ] = std::pair{ &rtn_block->code_blocks[ 0 ], 0ull }; + block_idx < rtn_block->code_block_count; + ++block_idx, code_block = reinterpret_cast< vmp2::v4::code_block_t * >( + reinterpret_cast< std::uintptr_t >( code_block ) + + code_block->next_block_offset ) ) + { + auto block_vinstrs = reinterpret_cast< vm::instrs::virt_instr_t * >( + reinterpret_cast< std::uintptr_t >( code_block ) + sizeof vmp2::v4::code_block_t + + ( code_block->num_block_addrs * 8 ) ); + + vm::instrs::code_block_t _code_block{ code_block->vip_begin }; + _code_block.jcc.has_jcc = code_block->has_jcc; + _code_block.jcc.type = code_block->jcc_type; + + for ( auto idx = 0u; idx < code_block->num_block_addrs; ++idx ) + _code_block.jcc.block_addr.push_back( code_block->branch_addr[ idx ] ); + + for ( auto idx = 0u; idx < code_block->vinstr_count; ++idx ) + _code_block.vinstrs.push_back( block_vinstrs[ idx ] ); + + virt_rtns.back().second.push_back( _code_block ); + } + } + + return true; + } + } // namespace util +} // namespace devirt \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 0e866be..2c25662 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,60 +1,9 @@ #include #include #include +#include #include -/// -/// helper function to serialize vmp2 file data to vm::instr::code_block's... -/// -/// vector of pairs {vm enter offset, vector of code blocks} which gets filled up with -/// serialized data -/// a vector of bytes containing the vmp2 file... -/// returns true if serialization was successful -bool serialize_vmp2( std::vector< std::pair< std::uint32_t, std::vector< vm::instrs::code_block_t > > > &virt_rtns, - std::vector< std::uint8_t > &vmp2file ) -{ - const auto file_header = reinterpret_cast< vmp2::v4::file_header * >( vmp2file.data() ); - - if ( file_header->version != vmp2::version_t::v4 ) - { - std::printf( "[!] invalid vmp2 file version... this build uses v3...\n" ); - return false; - } - - auto first_rtn = reinterpret_cast< vmp2::v4::rtn_t * >( reinterpret_cast< std::uintptr_t >( file_header ) + - file_header->rtn_offset ); - - for ( auto [ rtn_block, rtn_idx ] = std::pair{ first_rtn, 0ull }; rtn_idx < file_header->rtn_count; - ++rtn_idx, rtn_block = reinterpret_cast< vmp2::v4::rtn_t * >( - reinterpret_cast< std::uintptr_t >( rtn_block ) + rtn_block->size ) ) - { - virt_rtns.push_back( { rtn_block->vm_enter_offset, {} } ); - for ( auto [ code_block, block_idx ] = std::pair{ &rtn_block->code_blocks[ 0 ], 0ull }; - block_idx < rtn_block->code_block_count; - ++block_idx, code_block = reinterpret_cast< vmp2::v4::code_block_t * >( - reinterpret_cast< std::uintptr_t >( code_block ) + code_block->next_block_offset ) ) - { - auto block_vinstrs = reinterpret_cast< vm::instrs::virt_instr_t * >( - reinterpret_cast< std::uintptr_t >( code_block ) + sizeof vmp2::v4::code_block_t + - ( code_block->num_block_addrs * 8 ) ); - - vm::instrs::code_block_t _code_block{ code_block->vip_begin }; - _code_block.jcc.has_jcc = code_block->has_jcc; - _code_block.jcc.type = code_block->jcc_type; - - for ( auto idx = 0u; idx < code_block->num_block_addrs; ++idx ) - _code_block.jcc.block_addr.push_back( code_block->branch_addr[ idx ] ); - - for ( auto idx = 0u; idx < code_block->vinstr_count; ++idx ) - _code_block.vinstrs.push_back( block_vinstrs[ idx ] ); - - virt_rtns.back().second.push_back( _code_block ); - } - } - - return true; -} - int main( int argc, const char *argv[] ) { argparse::argument_parser_t parser( "vmdevirt", "virtual instruction pseudo code generator" ); @@ -87,7 +36,7 @@ int main( int argc, const char *argv[] ) const auto file_header = reinterpret_cast< vmp2::v4::file_header * >( vmp2file.data() ); std::vector< std::pair< std::uint32_t, std::vector< vm::instrs::code_block_t > > > virt_rtns; - if ( !serialize_vmp2( virt_rtns, vmp2file ) ) + if ( !devirt::util::serialize_vmp2( virt_rtns, vmp2file ) ) { std::printf( "> failed to serialize vmp2 file...\n" ); return false; @@ -97,18 +46,30 @@ int main( int argc, const char *argv[] ) llvm::Module llvm_module( "VMProtect 2 Devirtualization", llvm_ctx ); std::vector< std::uint8_t > compiled_obj; - vm::devirt_t vmp_rtn( &llvm_ctx, &llvm_module ); + vm::devirt_t vmp_devirt( &llvm_ctx, &llvm_module ); for ( auto &[ vm_enter_offset, vmp2_code_blocks ] : virt_rtns ) { - if ( !vmp_rtn.lift( vm_enter_offset + file_header->image_base, vmp2_code_blocks ) ) + if ( !vmp_devirt.lift( vm_enter_offset + file_header->image_base, vmp2_code_blocks ) ) { std::printf( "[!] failed to lift rtn_0x%p, please review the console...\n", vm_enter_offset + file_header->image_base ); return -1; } + + std::printf( "> lifted rtn_0x%p, number of blocks = %d\n", vm_enter_offset + file_header->image_base, + vmp2_code_blocks.size() ); } - vmp_rtn.compile( compiled_obj ); - std::printf( "> compiled obj size = %d\n", compiled_obj.size() ); + llvm::LLVMInitializeX86TargetInfo(); + llvm::LLVMInitializeX86Target(); + llvm::LLVMInitializeX86TargetMC(); + llvm::LLVMInitializeX86AsmParser(); + llvm::LLVMInitializeX86AsmPrinter(); + + vmp_devirt.compile( compiled_obj ); + std::printf( "> compiled all routines... compiled obj size = %d\n", compiled_obj.size() ); + + std::ofstream( "devirt.o", std::ios::binary ) + .write( reinterpret_cast< const char * >( compiled_obj.data() ), compiled_obj.size() ); } \ No newline at end of file