diff --git a/CMakeLists.txt b/CMakeLists.txt index 9968024..0089bda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ set(vmprofiler_SOURCES "") list(APPEND vmprofiler_SOURCES "src/calc_jmp.cpp" + "src/scn.cpp" "src/vmctx.cpp" "src/vmhandler.cpp" "src/vminstrs.cpp" @@ -65,6 +66,7 @@ list(APPEND vmprofiler_SOURCES "src/vmprofiles/write.cpp" "src/vmutils.cpp" "include/calc_jmp.hpp" + "include/scn.hpp" "include/transform.hpp" "include/vmctx.hpp" "include/vmhandlers.hpp" diff --git a/include/scn.hpp b/include/scn.hpp new file mode 100644 index 0000000..713e2e9 --- /dev/null +++ b/include/scn.hpp @@ -0,0 +1,27 @@ +#include + +/// +/// small namespace that contains function wrappers to determine the validity of linear virtual addresses... +/// +namespace scn +{ + /// + /// determines if a pointer lands inside of a section that is readonly... + /// + /// this also checks to make sure the section is not discardable... + /// + /// linear virtual address of the module.... + /// linear virtual address + /// returns true if ptr lands inside of a readonly section of the module + bool read_only( std::uint64_t module_base, std::uint64_t ptr ); + + /// + /// determines if a pointer lands inside of a section that is executable... + /// + /// this also checks to make sure the section is not discardable... + /// + /// + /// + /// + bool executable( std::uint64_t module_base, std::uint64_t ptr ); +} // namespace scn \ No newline at end of file diff --git a/include/vminstrs.hpp b/include/vminstrs.hpp index ad5465b..84ed251 100644 --- a/include/vminstrs.hpp +++ b/include/vminstrs.hpp @@ -3,6 +3,7 @@ #include #include #include +#include /// /// contains all functions related to virtual instructions... @@ -72,6 +73,15 @@ namespace vm::instrs /// if last lconstdw is found, return filled in jcc_data structure... std::optional< jcc_data > get_jcc_data( vm::ctx_t &ctx, code_block_t &code_block ); + /// + /// this algo is used to try and find a jmp tables address and all of its entries for a given code block... + /// + /// vm context + /// code block that has an absolute jmp... this routine is going to see if this code block + /// actually is jmp table stub if a jmp table is found then we decrypt all entries and return a + /// vector of them.. + std::optional< std::vector< std::uint64_t > > get_switch_cases( vm::ctx_t &ctx, code_block_t &code_block ); + /// /// the top of the stack will contain the lower 32bits of the RVA to the virtual instructions /// that will be jumping too... the RVA is image based (not module based, but optional header image diff --git a/include/vmp2.hpp b/include/vmp2.hpp index 02520d2..6705e1d 100644 --- a/include/vmp2.hpp +++ b/include/vmp2.hpp @@ -16,7 +16,8 @@ namespace vmp2 invalid, v1 = 0x101, v2 = 0x102, - v3 = 0x103 + v3 = 0x103, + v4 = 0x104 }; namespace v1 @@ -173,14 +174,15 @@ namespace vm { none, branching, - absolute + absolute, + switch_case }; struct jcc_data { bool has_jcc; jcc_type type; - std::uintptr_t block_addr[ 2 ]; + std::vector< std::uintptr_t > block_addr; }; struct code_block_t @@ -225,3 +227,52 @@ namespace vmp2 }; } // namespace v3 } // namespace vmp2 + +#pragma pack( push, 1 ) +namespace vmp2 +{ + namespace v4 + { + struct file_header + { + u32 magic; // VMP2 + u64 epoch_time; + version_t version; + + u64 module_base; + u64 image_base; + u64 vm_entry_rva; + + u32 module_offset; + u32 module_size; + + u32 rtn_count; + u32 rtn_offset; + }; + + struct jcc_data + { + bool has_jcc; + vm::instrs::jcc_type type; + std::uint32_t num_block_addrs; + std::uintptr_t block_addr[ 1 ]; + }; + + struct code_block_t + { + std::uintptr_t vip_begin; + std::uintptr_t next_block_offset; + jcc_data jcc; + + std::uint32_t vinstr_count; + vm::instrs::virt_instr_t vinstr[ 1 ]; + }; + + struct rtn_t + { + u32 code_block_count; + vmp2::v4::code_block_t code_blocks[ 1 ]; + }; + } // namespace v4 +#pragma pack( pop ) +} // namespace vmp2 \ No newline at end of file diff --git a/src/scn.cpp b/src/scn.cpp new file mode 100644 index 0000000..e3c3250 --- /dev/null +++ b/src/scn.cpp @@ -0,0 +1,34 @@ +#include + +namespace scn +{ + bool read_only( std::uint64_t module_base, std::uint64_t ptr ) + { + auto win_image = reinterpret_cast< win::image_t<> * >( module_base ); + auto section_count = win_image->get_file_header()->num_sections; + auto sections = win_image->get_nt_headers()->get_sections(); + + for ( auto idx = 0u; idx < section_count; ++idx ) + if ( ptr >= sections[ idx ].virtual_address + module_base && + ptr < sections[ idx ].virtual_address + sections[ idx ].virtual_size + module_base ) + return !( sections[ idx ].characteristics.mem_discardable ) && + !( sections[ idx ].characteristics.mem_write ); + + return false; + } + + bool executable( std::uint64_t module_base, std::uint64_t ptr ) + { + auto win_image = reinterpret_cast< win::image_t<> * >( module_base ); + auto section_count = win_image->get_file_header()->num_sections; + auto sections = win_image->get_nt_headers()->get_sections(); + + for ( auto idx = 0u; idx < section_count; ++idx ) + if ( ptr >= sections[ idx ].virtual_address + module_base && + ptr < sections[ idx ].virtual_address + sections[ idx ].virtual_size + module_base ) + return !( sections[ idx ].characteristics.mem_discardable ) && + sections[ idx ].characteristics.mem_execute; + + return false; + } +} // namespace scn \ No newline at end of file diff --git a/src/vminstrs.cpp b/src/vminstrs.cpp index 040ed52..6dcc60d 100644 --- a/src/vminstrs.cpp +++ b/src/vminstrs.cpp @@ -207,6 +207,81 @@ namespace vm::instrs return result; } + std::optional< std::vector< std::uint64_t > > get_switch_cases( vm::ctx_t &ctx, code_block_t &code_block ) + { + // find the last LCONSTDW in this code block... it is the XOR decryption key... + auto lconstdw_xor_key = std::find_if( code_block.vinstrs.rbegin(), code_block.vinstrs.rend(), + []( const vm::instrs::virt_instr_t &vinstr ) -> bool { + auto profile = vm::handler::get_profile( vinstr.mnemonic_t ); + return profile && profile->mnemonic == vm::handler::LCONSTDW; + } ); + + if ( lconstdw_xor_key == code_block.vinstrs.rend() ) + return {}; + + // extract the address in which we are jmp'ing too, this gets compared lower in the algo... + auto jmp_addr = code_block.vinstrs.back().trace_data.vsp.qword[ 0 ]; + + // find the SREGDW that sets a virtual register to the encrypted rva we are jmping too... + auto sregdw_jmp_addr = std::find_if( lconstdw_xor_key, code_block.vinstrs.rend(), + [ & ]( const vm::instrs::virt_instr_t &vinstr ) -> bool { + if ( vinstr.mnemonic_t == vm::handler::SREGDW ) + { + if ( ( ( ( std::uint32_t )vinstr.trace_data.vsp.qword[ 0 ] ) ^ + lconstdw_xor_key->operand.imm.u ) == jmp_addr ) + { + return true; + } + } + return false; + } ); + + if ( sregdw_jmp_addr == code_block.vinstrs.rend() ) + return {}; + + // find the last READDW (the one above SREGDW...) + auto readdw_jmp_tbl = std::find_if( sregdw_jmp_addr, code_block.vinstrs.rend(), + [ & ]( const vm::instrs::virt_instr_t &vinstr ) -> bool { + return vinstr.mnemonic_t == vm::handler::READDW; + } ); + + if ( readdw_jmp_tbl == code_block.vinstrs.rend() ) + return {}; + + // find the last ADDQ which when computed results in the READDW address found above... + auto addq_jmp_tbl_addr = std::find_if( + readdw_jmp_tbl, code_block.vinstrs.rend(), [ & ]( const vm::instrs::virt_instr_t &vinstr ) -> bool { + return vinstr.mnemonic_t == vm::handler::ADDQ && + vinstr.trace_data.vsp.qword[ 0 ] + vinstr.trace_data.vsp.qword[ 1 ] == + readdw_jmp_tbl->trace_data.vsp.qword[ 0 ]; + } ); + + if ( addq_jmp_tbl_addr == code_block.vinstrs.rend() ) + return {}; + + // sanity check... + if ( !scn::executable( ctx.module_base, ctx.module_base + addq_jmp_tbl_addr->trace_data.vsp.qword[ 1 ] ) ) + return {}; + + auto jmp_table = + reinterpret_cast< std::uint32_t * >( ctx.module_base + addq_jmp_tbl_addr->trace_data.vsp.qword[ 1 ] ); + + std::vector< std::uint64_t > result; + for ( auto idx = 0u;; ++idx ) + { + auto code_addr = code_block_addr( ctx, jmp_table[ idx ] ^ lconstdw_xor_key->operand.imm.u ); + + // keep decrypting entries until we decrypt a value that doesnt land inside of an executable section... if + // this allows too many cases we will need to check to see if the first virtual instruction of this block is + // an SREGQ... + if ( scn::executable( ctx.module_base, code_addr ) ) + result.push_back( code_addr ); + else // we finished decrypting the table... + break; + } + return result; + } + std::optional< jcc_data > get_jcc_data( vm::ctx_t &vmctx, code_block_t &code_block ) { // there is no branch for this as this is a vmexit... @@ -224,15 +299,9 @@ namespace vm::instrs if ( result == code_block.vinstrs.rend() ) return jcc_data{ false, jcc_type::none }; - jcc_data jcc; const auto xor_key = static_cast< std::uint32_t >( result->operand.imm.u ); const auto &last_trace = code_block.vinstrs.back().trace_data; - // since result is already a variable and is a reverse itr - // im going to be using rbegin and rend here again... - // - // look for PUSHVSP virtual instructions with two encrypted virtual - // instruction rva's ontop of the virtual stack... result = std::find_if( code_block.vinstrs.rbegin(), code_block.vinstrs.rend(), [ & ]( const vm::instrs::virt_instr_t &vinstr ) -> bool { @@ -242,13 +311,8 @@ namespace vm::instrs const auto possible_block_1 = code_block_addr( vmctx, vinstr.trace_data.vsp.qword[ 0 ] ^ xor_key ), possible_block_2 = code_block_addr( vmctx, vinstr.trace_data.vsp.qword[ 1 ] ^ xor_key ); - // if this returns too many false positives we might have to get - // our hands dirty and look into trying to emulate each branch - // to see if the first instruction is an SREGQ... - return possible_block_1 > vmctx.module_base && - possible_block_1 < vmctx.module_base + vmctx.image_size && - possible_block_2 > vmctx.module_base && - possible_block_2 < vmctx.module_base + vmctx.image_size; + return scn::executable( vmctx.module_base, possible_block_1 ) && + scn::executable( vmctx.module_base, possible_block_2 ); } return false; } ); @@ -256,20 +320,32 @@ namespace vm::instrs // if there is not two branches... if ( result == code_block.vinstrs.rend() ) { - jcc.block_addr[ 0 ] = code_block_addr( vmctx, last_trace ); - jcc.has_jcc = true; - jcc.type = jcc_type::absolute; - } - // else there are two branches... - else - { - jcc.block_addr[ 0 ] = code_block_addr( vmctx, result->trace_data.vsp.qword[ 0 ] ^ xor_key ); - jcc.block_addr[ 1 ] = code_block_addr( vmctx, result->trace_data.vsp.qword[ 1 ] ^ xor_key ); - - jcc.has_jcc = true; - jcc.type = jcc_type::branching; + // see if this code block is actually a jmp table for a switch case.... + auto result = get_switch_cases( vmctx, code_block ); + if ( result.has_value() ) + { + auto vec = result.value(); + jcc_data jcc; + jcc.has_jcc = true; + jcc.type = jcc_type::switch_case; + jcc.block_addr = vec; + return jcc; + } + else + { + jcc_data jcc; + jcc.block_addr.push_back( code_block_addr( vmctx, last_trace ) ); + jcc.has_jcc = true; + jcc.type = jcc_type::absolute; + return jcc; + } } + jcc_data jcc; + jcc.block_addr.push_back( code_block_addr( vmctx, result->trace_data.vsp.qword[ 0 ] ^ xor_key ) ); + jcc.block_addr.push_back( code_block_addr( vmctx, result->trace_data.vsp.qword[ 1 ] ^ xor_key ) ); + jcc.has_jcc = true; + jcc.type = jcc_type::branching; return jcc; }