diff --git a/.gitmodules b/.gitmodules index dedad5f..bf57d31 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "tests/deps/cli-parser"] path = tests/deps/cli-parser url = https://githacks.org/_xeroxz/cli-parser +[submodule "deps/unicorn"] + path = deps/unicorn + url = https://github.com/unicorn-engine/unicorn.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 23308ed..06de3e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,7 @@ target_include_directories(vmprofiler PUBLIC target_link_libraries(vmprofiler PUBLIC Zydis linux-pe + unicorn ) unset(CMKR_TARGET) diff --git a/cmake.toml b/cmake.toml index 49a8d5a..0ef0925 100644 --- a/cmake.toml +++ b/cmake.toml @@ -17,6 +17,7 @@ include-directories = [ link-libraries = [ "Zydis", "linux-pe", + "unicorn" ] compile-definitions = [ diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index afc8594..13987e9 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -1,6 +1,23 @@ # This file is automatically generated from cmake.toml - DO NOT EDIT # See https://github.com/build-cpp/cmkr for more information +cmake_minimum_required(VERSION 3.15) + +# Regenerate CMakeLists.txt automatically in the root project +set(CMKR_ROOT_PROJECT OFF) +if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + set(CMKR_ROOT_PROJECT ON) + + # Bootstrap cmkr + include(cmkr.cmake OPTIONAL RESULT_VARIABLE CMKR_INCLUDE_RESULT) + if(CMKR_INCLUDE_RESULT) + cmkr() + endif() + + # Enable folder support + set_property(GLOBAL PROPERTY USE_FOLDERS ON) +endif() + # Create a configure-time dependency on cmake.toml to improve IDE support if(CMKR_ROOT_PROJECT) configure_file(cmake.toml cmake.toml COPYONLY) @@ -23,6 +40,16 @@ endif() add_subdirectory(zydis) set(CMAKE_FOLDER ${CMKR_CMAKE_FOLDER}) +# unicorn +set(CMKR_CMAKE_FOLDER ${CMAKE_FOLDER}) +if(CMAKE_FOLDER) + set(CMAKE_FOLDER "${CMAKE_FOLDER}/unicorn") +else() + set(CMAKE_FOLDER unicorn) +endif() +add_subdirectory(unicorn) +set(CMAKE_FOLDER ${CMKR_CMAKE_FOLDER}) + # Target linux-pe set(CMKR_TARGET linux-pe) set(linux-pe_SOURCES "") diff --git a/deps/cmake.toml b/deps/cmake.toml index df2cdfb..812f316 100644 --- a/deps/cmake.toml +++ b/deps/cmake.toml @@ -6,6 +6,7 @@ ZYDIS_FUZZ_AFL_FAST = false ZYDIS_LIBFUZZER = false [subdir.zydis] +[subdir.unicorn] [target.linux-pe] type = "interface" diff --git a/deps/unicorn b/deps/unicorn new file mode 160000 index 0000000..63a445c --- /dev/null +++ b/deps/unicorn @@ -0,0 +1 @@ +Subproject commit 63a445cbba18bf1313ac3699b5d25462b5d529f4 diff --git a/include/vmctx.hpp b/include/vmctx.hpp index b908aeb..0941aaf 100644 --- a/include/vmctx.hpp +++ b/include/vmctx.hpp @@ -1,4 +1,5 @@ #pragma once +#include #include namespace vm { diff --git a/include/vminstrs.hpp b/include/vminstrs.hpp index d55851a..5a1a116 100644 --- a/include/vminstrs.hpp +++ b/include/vminstrs.hpp @@ -1,4 +1,5 @@ #pragma once +#include #include namespace vm::instrs { @@ -68,6 +69,24 @@ struct vinstr_t { } imm; }; +/// +/// emu instruction containing current cpu register values and such... +/// +struct emu_instr_t { + zydis_decoded_instr_t m_instr; + uc_context* m_cpu; +}; + +/// +/// handler trace containing information about a stream of instructions... also +/// contains some information about the virtual machine such as vip and vsp... +/// +struct hndlr_trace_t { + std::uintptr_t m_hndlr_addr; + zydis_reg_t m_vip, m_vsp; + std::vector m_instrs; +}; + /// /// matcher function which returns true if an instruction matches a desired /// one... @@ -80,9 +99,10 @@ using matcher_t = std::function -using vinstr_gen_t = std::function(zydis_reg_t& vip, - zydis_reg_t& vsp, - zydis_rtn_t& hndlr)>; +using vinstr_gen_t = + std::function(zydis_reg_t& vip, + zydis_reg_t& vsp, + hndlr_trace_t& hndlr)>; /// /// each virtual instruction has its own profiler_t structure which can generate @@ -111,21 +131,52 @@ struct profiler_t { vinstr_gen_t generate; }; +/// +/// list of all profiles here... +/// extern profiler_t jmp; +extern profiler_t sreg; + +/// +/// unsorted vector of profiles... they get sorted once at runtime... +/// inline std::vector profiles = {&jmp}; +/// +/// sorts the profiles by descending order of matchers... this will prevent a +/// smaller profiler with less matchers from being used when it should not be... +/// +/// this function can be called multiple times... +/// +inline void init() { + if (static std::atomic_bool once = true; once.exchange(false)) + std::sort(profiles.begin(), profiles.end(), + [&](profiler_t* a, profiler_t* b) -> bool { + return a->matchers.size() > b->matchers.size(); + }); +} + +/// +/// determines the virtual instruction for the vm handler given vsp and vip... +/// +/// vip native register... +/// vsp native register... +/// +/// returns vinstr_t structure... inline vinstr_t determine(zydis_reg_t& vip, zydis_reg_t& vsp, - zydis_rtn_t& hndlr) { + hndlr_trace_t& hndlr) { + const auto& instrs = hndlr.m_instrs; const auto profile = std::find_if( profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool { for (auto& matcher : profile->matchers) { - const auto matched = std::find_if(hndlr.begin(), hndlr.end(), - [&](zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; - return matcher(vip, vsp, i); - }); - if (matched == hndlr.end()) + const auto matched = + std::find_if(instrs.begin(), instrs.end(), + [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; + return matcher(vip, vsp, i); + }); + if (matched == instrs.end()) return false; } return true; @@ -137,4 +188,21 @@ inline vinstr_t determine(zydis_reg_t& vip, auto result = (*profile)->generate(vip, vsp, hndlr); return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown}; } + +/// +/// get profile from mnemonic... +/// +/// mnemonic of the profile to get... +/// pointer to the profile... +inline profiler_t* get_profile(mnemonic_t mnemonic) { + if (mnemonic == mnemonic_t::unknown) + return nullptr; + + const auto res = std::find_if(profiles.begin(), profiles.end(), + [&](profiler_t* profile) -> bool { + return profile->mnemonic == mnemonic; + }); + + return res == profiles.end() ? nullptr : *res; +} } // namespace vm::instrs \ No newline at end of file diff --git a/src/vmctx.cpp b/src/vmctx.cpp index 4a07d92..fd5eaaa 100644 --- a/src/vmctx.cpp +++ b/src/vmctx.cpp @@ -11,6 +11,9 @@ vmctx_t::vmctx_t(std::uintptr_t module_base, m_image_size(image_size) {} bool vmctx_t::init() { + vm::utils::init(); + vm::instrs::init(); + // flatten and deobfuscate the vm entry... if (!vm::utils::flatten(m_vm_entry, m_module_base + m_vm_entry_rva)) return false; diff --git a/src/vmprofiles/jmp.cpp b/src/vmprofiles/jmp.cpp index a6ef5cc..ef92654 100644 --- a/src/vmprofiles/jmp.cpp +++ b/src/vmprofiles/jmp.cpp @@ -4,8 +4,8 @@ namespace vm::instrs { profiler_t jmp = { "JMP", mnemonic_t::jmp, - // MOV REG, [VSP] - {{[&](const zydis_reg_t vip, + {{// MOV REG, [VSP] + [&](const zydis_reg_t vip, const zydis_reg_t vsp, const zydis_decoded_instr_t& instr) -> bool { return instr.mnemonic == ZYDIS_MNEMONIC_MOV && @@ -34,10 +34,11 @@ profiler_t jmp = { }}}, [&](zydis_reg_t& vip, zydis_reg_t& vsp, - zydis_rtn_t& hndlr) -> std::optional { + hndlr_trace_t& hndlr) -> std::optional { + const auto& instrs = hndlr.m_instrs; const auto xchg = std::find_if( - hndlr.begin(), hndlr.end(), [&](const zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; + instrs.begin(), instrs.end(), [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; return i.mnemonic == ZYDIS_MNEMONIC_XCHG && i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && @@ -50,77 +51,77 @@ profiler_t jmp = { }); // this JMP virtual instruction changes VSP as well as VIP... - if (xchg != hndlr.end()) { + if (xchg != instrs.end()) { // grab the register that isnt VSP in the XCHG... // xchg reg, vsp or xchg vsp, reg... - zydis_reg_t write_dep = xchg->instr.operands[0].reg.value != vsp - ? xchg->instr.operands[0].reg.value - : xchg->instr.operands[1].reg.value; + zydis_reg_t write_dep = xchg->m_instr.operands[0].reg.value != vsp + ? xchg->m_instr.operands[0].reg.value + : xchg->m_instr.operands[1].reg.value; // update VIP... VSP becomes VIP... with the XCHG... - vip = xchg->instr.operands[0].reg.value != vsp - ? xchg->instr.operands[1].reg.value - : xchg->instr.operands[0].reg.value; + vip = xchg->m_instr.operands[0].reg.value != vsp + ? xchg->m_instr.operands[1].reg.value + : xchg->m_instr.operands[0].reg.value; // find the next MOV REG, write_dep... this REG will be VSP... const auto mov_reg_write_dep = std::find_if( - xchg, hndlr.end(), [&](const zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; + xchg, instrs.end(), [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; return i.mnemonic == ZYDIS_MNEMONIC_MOV && i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].reg.value == write_dep; }); - if (mov_reg_write_dep == hndlr.end()) + if (mov_reg_write_dep == instrs.end()) return {}; - vsp = mov_reg_write_dep->instr.operands[0].reg.value; + vsp = mov_reg_write_dep->m_instr.operands[0].reg.value; } else { // find the MOV REG, [VSP] instruction... const auto mov_reg_deref_vsp = std::find_if( - hndlr.begin(), hndlr.end(), - [&](const zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; + instrs.begin(), instrs.end(), + [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; return i.mnemonic == ZYDIS_MNEMONIC_MOV && i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && i.operands[1].mem.base == vsp; }); - if (mov_reg_deref_vsp == hndlr.end()) + if (mov_reg_deref_vsp == instrs.end()) return {}; // find the MOV REG, mov_reg_deref_vsp->operands[0].reg.value const auto mov_vip_reg = std::find_if( - mov_reg_deref_vsp, hndlr.end(), - [&](const zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; + mov_reg_deref_vsp, instrs.end(), + [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; return i.mnemonic == ZYDIS_MNEMONIC_MOV && i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].reg.value == - mov_reg_deref_vsp->instr.operands[0].reg.value; + mov_reg_deref_vsp->m_instr.operands[0].reg.value; }); - if (mov_vip_reg == hndlr.end()) + if (mov_vip_reg == instrs.end()) return {}; - vip = mov_vip_reg->instr.operands[0].reg.value; + vip = mov_vip_reg->m_instr.operands[0].reg.value; // see if VSP gets updated as well... const auto mov_reg_vsp = std::find_if( - mov_reg_deref_vsp, hndlr.end(), - [&](const zydis_instr_t& instr) -> bool { - const auto& i = instr.instr; + mov_reg_deref_vsp, instrs.end(), + [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; return i.mnemonic == ZYDIS_MNEMONIC_MOV && i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && i.operands[1].reg.value == vsp; }); - if (mov_reg_vsp != hndlr.end()) - vsp = mov_reg_vsp->instr.operands[0].reg.value; + if (mov_reg_vsp != instrs.end()) + vsp = mov_reg_vsp->m_instr.operands[0].reg.value; } return vinstr_t{mnemonic_t::jmp}; }}; diff --git a/src/vmprofiles/sreg.cpp b/src/vmprofiles/sreg.cpp index e69de29..47c85a2 100644 --- a/src/vmprofiles/sreg.cpp +++ b/src/vmprofiles/sreg.cpp @@ -0,0 +1,48 @@ +#include + +namespace vm::instrs { +profiler_t sreg = { + "SREG", + mnemonic_t::sreg, + {{// MOV REG, [VSP] + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && + instr.operands[1].mem.base == vsp; + }, + // ADD VSP, 8 + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_ADD && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[0].reg.value == vsp && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && + instr.operands[1].imm.value.u == 8; + }, + // MOV REG, [VIP] + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && + instr.operands[1].mem.base == vip; + }, + // MOV [RSP+REG], REG + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY && + instr.operands[0].mem.base == ZYDIS_REGISTER_RSP && + instr.operands[0].mem.index != ZYDIS_REGISTER_NONE && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER; + }}}, + [&](zydis_reg_t& vip, + zydis_reg_t& vsp, + hndlr_trace_t& hndlr) -> std::optional { return {}; }}; +} \ No newline at end of file diff --git a/src/vmutils.cpp b/src/vmutils.cpp index a103728..c014666 100644 --- a/src/vmutils.cpp +++ b/src/vmutils.cpp @@ -119,8 +119,7 @@ void deobfuscate(zydis_rtn_t& routine) { std::uint32_t last_size = 0u; static const std::vector blacklist = { ZYDIS_MNEMONIC_CLC, ZYDIS_MNEMONIC_BT, ZYDIS_MNEMONIC_TEST, - ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC, - ZYDIS_MNEMONIC_JMP}; + ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC}; static const std::vector whitelist = { ZYDIS_MNEMONIC_PUSH, ZYDIS_MNEMONIC_POP, ZYDIS_MNEMONIC_CALL, @@ -139,6 +138,11 @@ void deobfuscate(zydis_rtn_t& routine) { break; } + if (is_jmp(itr->instr)) { + routine.erase(itr); + break; + } + zydis_reg_t reg = ZYDIS_REGISTER_NONE; // look for operands with writes to a register... for (auto op_idx = 0u; op_idx < itr->instr.operand_count; ++op_idx)