cleaned all this shit, also added a demo to generate object files...

merge-requests/6/head
_xeroxz 3 years ago
parent 5223011f4b
commit 4b0e53fbe1

@ -86,6 +86,8 @@ target_compile_definitions(vmdevirt PRIVATE
target_include_directories(vmdevirt PRIVATE
include
"dependencies/llvm/llvm/include/"
"dependencies/llvm/llvm/lib/Target/X86/"
"build/dependencies/llvm/llvm/lib/Target/X86"
"build/dependencies/llvm/llvm/include/"
)
@ -96,6 +98,12 @@ target_link_libraries(vmdevirt PRIVATE
LLVMCodeGen
LLVMSupport
LLVMLinker
LLVMX86CodeGen
LLVMX86AsmParser
LLVMX86Desc
LLVMX86Disassembler
LLVMX86Info
LLVMAsmParser
)
unset(CMKR_TARGET)

@ -12,6 +12,8 @@ sources = [
include-directories = [
"include",
"dependencies/llvm/llvm/include/",
"dependencies/llvm/llvm/lib/Target/X86/",
"build/dependencies/llvm/llvm/lib/Target/X86",
"build/dependencies/llvm/llvm/include/",
]
link-libraries = [
@ -20,7 +22,13 @@ link-libraries = [
"LLVMCore",
"LLVMCodeGen",
"LLVMSupport",
"LLVMLinker"
"LLVMLinker",
"LLVMX86CodeGen",
"LLVMX86AsmParser",
"LLVMX86Desc",
"LLVMX86Disassembler",
"LLVMX86Info",
"LLVMAsmParser"
]
compile-definitions = [
"NOMINMAX"

@ -70,11 +70,10 @@ namespace vm
{ vm::handler::JMP, &jmp },
{ vm::handler::VMEXIT, &vmexit } };
static vm::llvm_value_t *and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *result );
static vm::llvm_value_t *add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs,
vm::llvm_value_t *rhs );
static vm::llvm_value_t *shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs,
vm::llvm_value_t *rhs, vm::llvm_value_t *result );
static llvm::Value *and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *result );
static llvm::Value *add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs );
static llvm::Value *shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs,
llvm::Value *result );
public:
static lifters_t *get_instance( void )

@ -3,67 +3,78 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "X86TargetMachine.h"
#include "llvm/Pass.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
namespace vm
namespace llvm
{
// Obsessive-compulsive disorder is characterized by unreasonable
// thoughts and fears (obsessions) that lead to compulsive behaviors.
using llvm_type_t = llvm::Type;
using llvm_value_t = llvm::Value;
using llvm_module_t = llvm::Module;
using llvm_context_t = llvm::LLVMContext;
using llvm_function_t = llvm::Function;
using llvm_irbuilder_t = llvm::IRBuilder<>;
using llvm_alloca_inst_t = llvm::AllocaInst;
using llvm_basic_block_t = llvm::BasicBlock;
using llvm_global_value_t = llvm::GlobalValue;
extern "C" void LLVMInitializeX86TargetInfo();
extern "C" void LLVMInitializeX86Target();
extern "C" void LLVMInitializeX86TargetMC();
extern "C" void LLVMInitializeX86AsmParser();
extern "C" void LLVMInitializeX86AsmPrinter();
} // namespace llvm
namespace vm
{
class vmp_rtn_t
{
friend class lifters_t;
public:
explicit vmp_rtn_t( llvm_context_t *llvm_ctx, llvm_module_t *llvm_module, vm::ctx_t *vm_ctx,
explicit vmp_rtn_t( llvm::LLVMContext *llvm_ctx, llvm::Module *llvm_module, vm::ctx_t *vm_ctx,
std::uintptr_t rtn_begin, std::vector< vm::instrs::code_block_t > vmp2_code_blocks );
llvm_function_t *lift( void );
llvm::Function *lift( void );
private:
llvm_context_t *llvm_ctx;
llvm_module_t *llvm_module;
llvm_function_t *llvm_fptr;
llvm_alloca_inst_t *flags, *stack;
llvm::LLVMContext *llvm_ctx;
llvm::Module *llvm_module;
llvm::Function *llvm_fptr;
llvm::AllocaInst *flags, *stack;
vm::ctx_t *vm_ctx;
std::uintptr_t rtn_begin;
std::shared_ptr< llvm_irbuilder_t > ir_builder;
std::shared_ptr< llvm::IRBuilder<> > ir_builder;
std::vector< llvm_alloca_inst_t * > virtual_registers;
std::vector< std::pair< std::uintptr_t, llvm_basic_block_t * > > llvm_code_blocks;
std::vector< llvm::AllocaInst * > virtual_registers;
std::vector< std::pair< std::uintptr_t, llvm::BasicBlock * > > llvm_code_blocks;
std::vector< vm::instrs::code_block_t > vmp2_code_blocks;
void push( std::uint8_t byte_size, llvm_value_t *input_val );
void push( std::uint8_t byte_size, llvm::Value *input_val );
llvm::Value *pop( std::uint8_t byte_size );
llvm::Value *load_value( std::uint8_t byte_size, llvm_global_value_t *global );
llvm::Value *load_value( std::uint8_t byte_size, llvm_alloca_inst_t *var );
llvm::Value *load_value( std::uint8_t byte_size, llvm::GlobalValue *global );
llvm::Value *load_value( std::uint8_t byte_size, llvm::AllocaInst *var );
llvm_value_t *compute_sf( std::uint8_t byte_size, llvm_value_t *val );
llvm_value_t *compute_zf( std::uint8_t byte_size, llvm_value_t *val );
llvm_value_t *compute_pf( std::uint8_t byte_size, llvm_value_t *val );
llvm_value_t *combine_flags( llvm_value_t *cf, llvm_value_t *pf, llvm_value_t *af, llvm_value_t *zf,
llvm_value_t *sf, llvm_value_t *of );
llvm::Value *compute_sf( std::uint8_t byte_size, llvm::Value *val );
llvm::Value *compute_zf( std::uint8_t byte_size, llvm::Value *val );
llvm::Value *compute_pf( std::uint8_t byte_size, llvm::Value *val );
llvm::Value *combine_flags( llvm::Value *cf, llvm::Value *pf, llvm::Value *af, llvm::Value *zf, llvm::Value *sf,
llvm::Value *of );
void create_virtual_registers( void );
void create_routine( void );

@ -2,8 +2,7 @@
namespace vm
{
vm::llvm_value_t *lifters_t::add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs,
vm::llvm_value_t *rhs )
llvm::Value *lifters_t::add_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs )
{
auto op_size = llvm::IntegerType::get( *rtn->llvm_ctx, byte_size * 8 );
std::vector< llvm::Type * > intrinsic_arg_types;

@ -20,7 +20,7 @@ namespace vm
// find the first branch basic block...
auto bb1 =
std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(),
[ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool {
[ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool {
return block_data.first == vm_code_block.jcc.block_addr[ 0 ];
} );
@ -30,7 +30,7 @@ namespace vm
// find the second branch basic block...
auto bb2 =
std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(),
[ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool {
[ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool {
return block_data.first == vm_code_block.jcc.block_addr[ 1 ];
} );
@ -44,7 +44,7 @@ namespace vm
auto rva = rtn->pop( 8 );
auto bb_data =
std::find_if( rtn->llvm_code_blocks.begin(), rtn->llvm_code_blocks.end(),
[ & ]( const std::pair< std::uintptr_t, llvm_basic_block_t * > &block_data ) -> bool {
[ & ]( const std::pair< std::uintptr_t, llvm::BasicBlock * > &block_data ) -> bool {
return block_data.first == vm_code_block.jcc.block_addr[ 0 ];
} );

@ -2,7 +2,7 @@
namespace vm
{
vm::llvm_value_t *lifters_t::and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *result )
llvm::Value *lifters_t::and_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *result )
{
auto cf = llvm::ConstantInt::get( llvm::IntegerType::get( *rtn->llvm_ctx, 64 ), 0 );
auto of = llvm::ConstantInt::get( llvm::IntegerType::get( *rtn->llvm_ctx, 64 ), 0 );

@ -3,8 +3,8 @@
namespace vm
{
// our undefined behavior is that we don't model cases where the shift count is zero...
vm::llvm_value_t *lifters_t::shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, vm::llvm_value_t *lhs,
vm::llvm_value_t *rhs, vm::llvm_value_t *result )
llvm::Value *lifters_t::shr_flags( vm::vmp_rtn_t *rtn, std::uint8_t byte_size, llvm::Value *lhs, llvm::Value *rhs,
llvm::Value *result )
{
auto op_size = llvm::IntegerType::get( *rtn->llvm_ctx, byte_size * 8 );
auto msb = rtn->ir_builder->CreateLShr( lhs, ( byte_size * 8 ) - 1 );

@ -4,5 +4,6 @@ namespace vm
{
lifters_t::lifter_callback_t lifters_t::vmexit =
[ & ]( vm::vmp_rtn_t *rtn, const vm::instrs::code_block_t &vm_code_block,
const vm::instrs::virt_instr_t &vinstr, llvm::IRBuilder<> *ir_builder ) { ir_builder->CreateRetVoid(); };
const vm::instrs::virt_instr_t &vinstr,
llvm::IRBuilder<> *ir_builder ) { ir_builder->CreateRet( ir_builder->CreateLoad( rtn->stack ) ); };
}

@ -77,5 +77,48 @@ int main( int argc, const char *argv[] )
vm::vmp_rtn_t vmp_rtn( &llvm_ctx, &llvm_module, &vm_ctx, first_block->vip_begin, vmp_code_blocks );
auto func = vmp_rtn.lift();
func->print( llvm::outs(), nullptr );
llvm_module.print( llvm::outs(), nullptr );
llvm::LLVMInitializeX86TargetInfo();
llvm::LLVMInitializeX86Target();
llvm::LLVMInitializeX86TargetMC();
llvm::LLVMInitializeX86AsmParser();
llvm::LLVMInitializeX86AsmPrinter();
auto TargetTriple = sys::getDefaultTargetTriple();
llvm_module.setTargetTriple( TargetTriple );
std::string Error;
auto Target = TargetRegistry::lookupTarget( TargetTriple, Error );
auto CPU = "generic";
auto Features = "";
TargetOptions opt;
auto RM = Optional< Reloc::Model >();
auto TheTargetMachine = Target->createTargetMachine( TargetTriple, CPU, Features, opt, RM );
llvm_module.setDataLayout( TheTargetMachine->createDataLayout() );
auto Filename = "output.o";
std::error_code EC;
raw_fd_ostream dest( Filename, EC, sys::fs::OF_None );
if ( EC )
{
errs() << "Could not open file: " << EC.message();
return 1;
}
legacy::PassManager pass;
auto FileType = CGFT_ObjectFile;
if ( TheTargetMachine->addPassesToEmitFile( pass, dest, nullptr, FileType ) )
{
errs() << "TheTargetMachine can't emit a file of this type";
return 1;
}
pass.run( llvm_module );
dest.flush();
}

@ -3,14 +3,14 @@
namespace vm
{
vmp_rtn_t::vmp_rtn_t( llvm_context_t *llvm_ctx, llvm_module_t *llvm_module, vm::ctx_t *vm_ctx,
vmp_rtn_t::vmp_rtn_t( llvm::LLVMContext *llvm_ctx, llvm::Module *llvm_module, vm::ctx_t *vm_ctx,
std::uintptr_t rtn_begin, std::vector< vm::instrs::code_block_t > vmp2_code_blocks )
: llvm_ctx( llvm_ctx ), llvm_module( llvm_module ), vm_ctx( vm_ctx ), rtn_begin( rtn_begin ),
vmp2_code_blocks( vmp2_code_blocks )
{
// do not change the ordering of these function calls...
create_routine();
ir_builder = std::make_shared< llvm_irbuilder_t >( *llvm_ctx );
ir_builder = std::make_shared< llvm::IRBuilder<> >( *llvm_ctx );
ir_builder->SetInsertPoint( llvm_code_blocks[ 0 ].second );
flags = ir_builder->CreateAlloca( ir_builder->getInt64Ty(), nullptr, "flags" );
stack = ir_builder->CreateAlloca( llvm::PointerType::get( ir_builder->getInt8Ty(), 0ull ), nullptr, "sp" );
@ -32,7 +32,7 @@ namespace vm
{
// function has no arguments and returns void... maybe change this in the future as i learn
// more and more LLVM...
auto func_ty = llvm::FunctionType::get( llvm::Type::getVoidTy( *llvm_ctx ),
auto func_ty = llvm::FunctionType::get( llvm::PointerType::getInt8PtrTy( *llvm_ctx ),
{ llvm::PointerType::getInt8PtrTy( *llvm_ctx ) }, false );
// convert the rtn_begin address to a hex string and prepend "rtn_" to it...
@ -51,7 +51,7 @@ namespace vm
}
}
void vmp_rtn_t::push( std::uint8_t num_bytes, llvm_value_t *val )
void vmp_rtn_t::push( std::uint8_t num_bytes, llvm::Value *val )
{
// sub rsp, num_bytes
auto rsp_addr = ir_builder->CreateLoad( stack );
@ -67,7 +67,7 @@ namespace vm
ir_builder->CreateStore( val, rsp_cast_ptr );
}
llvm_value_t *vmp_rtn_t::pop( std::uint8_t num_bytes )
llvm::Value *vmp_rtn_t::pop( std::uint8_t num_bytes )
{
// mov rax, [rsp]
auto rsp_addr = ir_builder->CreateLoad( stack );
@ -85,7 +85,7 @@ namespace vm
return pop_val;
}
llvm_value_t *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm_global_value_t *var )
llvm::Value *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm::GlobalValue *var )
{
if ( byte_size * 8 != var->getType()->getPrimitiveSizeInBits() )
{
@ -97,7 +97,7 @@ namespace vm
return ir_builder->CreateLoad( llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ), var );
}
llvm_value_t *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm_alloca_inst_t *var )
llvm::Value *vmp_rtn_t::load_value( std::uint8_t byte_size, llvm::AllocaInst *var )
{
if ( byte_size * 8 != var->getType()->getPrimitiveSizeInBits() )
{
@ -109,7 +109,7 @@ namespace vm
return ir_builder->CreateLoad( llvm::IntegerType::get( *llvm_ctx, byte_size * 8 ), var );
}
llvm_function_t *vmp_rtn_t::lift( void )
llvm::Function *vmp_rtn_t::lift( void )
{
auto &code_blocks = llvm_fptr->getBasicBlockList();
auto lifters = vm::lifters_t::get_instance();
@ -117,35 +117,46 @@ namespace vm
for ( auto idx = 0u; idx < code_blocks.size(); ++idx )
{
ir_builder->SetInsertPoint( llvm_code_blocks[ idx ].second );
for ( auto &vinstr : vmp2_code_blocks[ idx ].vinstrs )
{
if ( !lifters->lift( this, vmp2_code_blocks[ idx ], vinstr, ir_builder.get() ) )
{
std::printf( "> failed to devirtualize virtual instruction with opcode = %d\n", vinstr.opcode );
llvm_module->print( llvm::outs(), nullptr );
std::printf(
"> failed to devirtualize virtual instruction with opcode = %d, handler table rva = 0x%x\n",
vinstr.opcode, vinstr.trace_data.regs.r12 - vinstr.trace_data.regs.r13 );
return nullptr;
}
}
}
// TODO: update this list of optimizations to add more...
llvm::legacy::FunctionPassManager fpm( llvm_module );
fpm.add( llvm::createInstructionCombiningPass() );
fpm.add( llvm::createReassociatePass() );
fpm.add( llvm::createGVNPass() );
fpm.add( llvm::createCFGSimplificationPass() );
fpm.doInitialization();
fpm.run( *llvm_fptr );
return llvm_fptr;
}
llvm_value_t *vmp_rtn_t::compute_sf( std::uint8_t byte_size, llvm_value_t *val )
llvm::Value *vmp_rtn_t::compute_sf( std::uint8_t byte_size, llvm::Value *val )
{
auto op_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 );
auto msb = ir_builder->CreateLShr( val, ( byte_size * 8 ) - 1 );
return ir_builder->CreateZExt( msb, llvm::IntegerType::get( *llvm_ctx, 64 ) );
}
llvm_value_t *vmp_rtn_t::compute_zf( std::uint8_t byte_size, llvm_value_t *val )
llvm::Value *vmp_rtn_t::compute_zf( std::uint8_t byte_size, llvm::Value *val )
{
auto op_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 );
auto is_zero = ir_builder->CreateICmpEQ( val, llvm::ConstantInt::get( op_size, 0 ) );
return ir_builder->CreateZExt( is_zero, llvm::IntegerType::get( *llvm_ctx, 64 ) );
}
llvm_value_t *vmp_rtn_t::compute_pf( std::uint8_t byte_size, llvm_value_t *val )
llvm::Value *vmp_rtn_t::compute_pf( std::uint8_t byte_size, llvm::Value *val )
{
auto operand_size = llvm::IntegerType::get( *llvm_ctx, byte_size * 8 );
auto popcount_intrinsic = llvm::Intrinsic::getDeclaration( llvm_module, llvm::Intrinsic::ctpop,
@ -156,8 +167,8 @@ namespace vm
return ir_builder->CreateCall( popcount_intrinsic, { extended_bits } );
}
llvm_value_t *vmp_rtn_t::combine_flags( llvm_value_t *cf, llvm_value_t *pf, llvm_value_t *af, llvm_value_t *zf,
llvm_value_t *sf, llvm_value_t *of )
llvm::Value *vmp_rtn_t::combine_flags( llvm::Value *cf, llvm::Value *pf, llvm::Value *af, llvm::Value *zf,
llvm::Value *sf, llvm::Value *of )
{
auto shifted_pf = ir_builder->CreateShl( pf, 2, "shifted_pf", true, true );
auto shifted_af = ir_builder->CreateShl( llvm::ConstantInt::get( llvm::IntegerType::get( *llvm_ctx, 64 ), 0 ),

Loading…
Cancel
Save