diff --git a/CMakeLists.txt b/CMakeLists.txt index 06de3e1..f06b06f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ set(vmprofiler_SOURCES "") list(APPEND vmprofiler_SOURCES "src/vmctx.cpp" + "src/vminstrs.cpp" "src/vmlocate.cpp" "src/vmprofiles/jmp.cpp" "src/vmprofiles/sreg.cpp" diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index 13987e9..d000f21 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -1,23 +1,6 @@ # This file is automatically generated from cmake.toml - DO NOT EDIT # See https://github.com/build-cpp/cmkr for more information -cmake_minimum_required(VERSION 3.15) - -# Regenerate CMakeLists.txt automatically in the root project -set(CMKR_ROOT_PROJECT OFF) -if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - set(CMKR_ROOT_PROJECT ON) - - # Bootstrap cmkr - include(cmkr.cmake OPTIONAL RESULT_VARIABLE CMKR_INCLUDE_RESULT) - if(CMKR_INCLUDE_RESULT) - cmkr() - endif() - - # Enable folder support - set_property(GLOBAL PROPERTY USE_FOLDERS ON) -endif() - # Create a configure-time dependency on cmake.toml to improve IDE support if(CMKR_ROOT_PROJECT) configure_file(cmake.toml cmake.toml COPYONLY) diff --git a/include/vminstrs.hpp b/include/vminstrs.hpp index 5a1a116..31dc534 100644 --- a/include/vminstrs.hpp +++ b/include/vminstrs.hpp @@ -47,7 +47,8 @@ struct vinstr_t { /// /// size varient of the virtual instruction... I.E SREGQ would have a value of - /// "64" here...where the SREGDW varient would have a "32" here... + /// "64" here...where the SREGDW varient would have a "32" here... this is the + /// stack disposition essentially, or the value on the stack... /// u8 size; @@ -140,7 +141,14 @@ extern profiler_t sreg; /// /// unsorted vector of profiles... they get sorted once at runtime... /// -inline std::vector profiles = {&jmp}; +inline std::vector profiles = {&jmp, &sreg}; + +/// +/// deadstore and opaque branch removal from unicorn engine trace... this is the +/// same algorithm as the one in vm::utils::deobfuscate... +/// +/// +void deobfuscate(hndlr_trace_t& trace); /// /// sorts the profiles by descending order of matchers... this will prevent a @@ -148,13 +156,7 @@ inline std::vector profiles = {&jmp}; /// /// this function can be called multiple times... /// -inline void init() { - if (static std::atomic_bool once = true; once.exchange(false)) - std::sort(profiles.begin(), profiles.end(), - [&](profiler_t* a, profiler_t* b) -> bool { - return a->matchers.size() > b->matchers.size(); - }); -} +void init(); /// /// determines the virtual instruction for the vm handler given vsp and vip... @@ -163,46 +165,12 @@ inline void init() { /// vsp native register... /// /// returns vinstr_t structure... -inline vinstr_t determine(zydis_reg_t& vip, - zydis_reg_t& vsp, - hndlr_trace_t& hndlr) { - const auto& instrs = hndlr.m_instrs; - const auto profile = std::find_if( - profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool { - for (auto& matcher : profile->matchers) { - const auto matched = - std::find_if(instrs.begin(), instrs.end(), - [&](const emu_instr_t& instr) -> bool { - const auto& i = instr.m_instr; - return matcher(vip, vsp, i); - }); - if (matched == instrs.end()) - return false; - } - return true; - }); - - if (profile == profiles.end()) - return vinstr_t{mnemonic_t::unknown}; - - auto result = (*profile)->generate(vip, vsp, hndlr); - return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown}; -} +vinstr_t determine(zydis_reg_t& vip, zydis_reg_t& vsp, hndlr_trace_t& hndlr); /// /// get profile from mnemonic... /// /// mnemonic of the profile to get... /// pointer to the profile... -inline profiler_t* get_profile(mnemonic_t mnemonic) { - if (mnemonic == mnemonic_t::unknown) - return nullptr; - - const auto res = std::find_if(profiles.begin(), profiles.end(), - [&](profiler_t* profile) -> bool { - return profile->mnemonic == mnemonic; - }); - - return res == profiles.end() ? nullptr : *res; -} +profiler_t* get_profile(mnemonic_t mnemonic); } // namespace vm::instrs \ No newline at end of file diff --git a/src/vminstrs.cpp b/src/vminstrs.cpp new file mode 100644 index 0000000..ad885af --- /dev/null +++ b/src/vminstrs.cpp @@ -0,0 +1,163 @@ +#include + +namespace vm::instrs { +void deobfuscate(hndlr_trace_t& trace) { + static const auto _uses_reg = [](zydis_decoded_operand_t& op, + zydis_reg_t reg) -> bool { + switch (op.type) { + case ZYDIS_OPERAND_TYPE_MEMORY: { + return vm::utils::reg::compare(op.mem.base, reg) || + vm::utils::reg::compare(op.mem.index, reg); + } + case ZYDIS_OPERAND_TYPE_REGISTER: { + return vm::utils::reg::compare(op.reg.value, reg); + } + default: + break; + } + return false; + }; + + static const auto _reads = [](zydis_decoded_instr_t& instr, + zydis_reg_t reg) -> bool { + if (instr.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY && + vm::utils::reg::compare(instr.operands[0].mem.base, reg)) + return true; + + for (auto op_idx = 0u; op_idx < instr.operand_count; ++op_idx) + if (instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_READ && + _uses_reg(instr.operands[op_idx], reg)) + return true; + return false; + }; + + static const auto _writes = [](zydis_decoded_instr_t& instr, + zydis_reg_t reg) -> bool { + for (auto op_idx = 0u; op_idx < instr.operand_count; ++op_idx) + // if instruction writes to the specific register... + if (instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_WRITE && + !(instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_READ) && + vm::utils::reg::compare(instr.operands[op_idx].reg.value, reg)) + return true; + return false; + }; + + std::uint32_t last_size = 0u; + static const std::vector blacklist = { + ZYDIS_MNEMONIC_CLC, ZYDIS_MNEMONIC_BT, ZYDIS_MNEMONIC_TEST, + ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC}; + + static const std::vector whitelist = { + ZYDIS_MNEMONIC_PUSH, ZYDIS_MNEMONIC_POP, ZYDIS_MNEMONIC_CALL, + ZYDIS_MNEMONIC_DIV}; + + do { + last_size = trace.m_instrs.size(); + for (auto itr = trace.m_instrs.begin(); itr != trace.m_instrs.end(); + ++itr) { + if (std::find(whitelist.begin(), whitelist.end(), + itr->m_instr.mnemonic) != whitelist.end()) + continue; + + if (std::find(blacklist.begin(), blacklist.end(), + itr->m_instr.mnemonic) != blacklist.end()) { + trace.m_instrs.erase(itr); + break; + } + + if (vm::utils::is_jmp(itr->m_instr)) { + trace.m_instrs.erase(itr); + break; + } + + zydis_reg_t reg = ZYDIS_REGISTER_NONE; + // look for operands with writes to a register... + for (auto op_idx = 0u; op_idx < itr->m_instr.operand_count; ++op_idx) + if (itr->m_instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER && + itr->m_instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_WRITE) + reg = vm::utils::reg::to64(itr->m_instr.operands[0].reg.value); + + // if this current instruction writes to a register, look ahead in the + // instruction stream to see if it gets written too before it gets read... + if (reg != ZYDIS_REGISTER_NONE) { + // find the next place that this register is written too... + auto write_result = std::find_if(itr + 1, trace.m_instrs.end(), + [&](emu_instr_t& instr) -> bool { + return _writes(instr.m_instr, reg); + }); + + auto read_result = std::find_if(itr + 1, write_result, + [&](emu_instr_t& instr) -> bool { + return _reads(instr.m_instr, reg); + }); + + // if there is neither a read or a write to this register in the + // instruction stream then we are going to be safe and leave the + // instruction in the stream... + if (read_result == trace.m_instrs.end() && + write_result == trace.m_instrs.end()) + continue; + + // if there is no read of the register before the next write... and + // there is a known next write, then remove the instruction from the + // stream... + if (read_result == write_result && + write_result != trace.m_instrs.end()) { + // if the instruction reads and writes the same register than skip... + if (_reads(read_result->m_instr, reg) && + _writes(read_result->m_instr, reg)) + continue; + + trace.m_instrs.erase(itr); + break; + } + } + } + } while (last_size != trace.m_instrs.size()); +} + +void init() { + if (static std::atomic_bool once = true; once.exchange(false)) + std::sort(profiles.begin(), profiles.end(), + [&](profiler_t* a, profiler_t* b) -> bool { + return a->matchers.size() > b->matchers.size(); + }); +} + +vinstr_t determine(zydis_reg_t& vip, zydis_reg_t& vsp, hndlr_trace_t& hndlr) { + const auto& instrs = hndlr.m_instrs; + const auto profile = std::find_if( + profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool { + for (auto& matcher : profile->matchers) { + const auto matched = + std::find_if(instrs.begin(), instrs.end(), + [&](const emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; + return matcher(vip, vsp, i); + }); + if (matched == instrs.end()) + return false; + } + return true; + }); + + if (profile == profiles.end()) + return vinstr_t{mnemonic_t::unknown}; + + auto result = (*profile)->generate(vip, vsp, hndlr); + return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown}; +} + +profiler_t* get_profile(mnemonic_t mnemonic) { + if (mnemonic == mnemonic_t::unknown) + return nullptr; + + const auto res = std::find_if(profiles.begin(), profiles.end(), + [&](profiler_t* profile) -> bool { + return profile->mnemonic == mnemonic; + }); + + return res == profiles.end() ? nullptr : *res; +} +} // namespace vm::instrs \ No newline at end of file diff --git a/src/vmprofiles/sreg.cpp b/src/vmprofiles/sreg.cpp index 47c85a2..0b05d02 100644 --- a/src/vmprofiles/sreg.cpp +++ b/src/vmprofiles/sreg.cpp @@ -13,21 +13,22 @@ profiler_t sreg = { instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && instr.operands[1].mem.base == vsp; }, - // ADD VSP, 8 + // ADD VSP, OFFSET [&](const zydis_reg_t vip, const zydis_reg_t vsp, const zydis_decoded_instr_t& instr) -> bool { return instr.mnemonic == ZYDIS_MNEMONIC_ADD && instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && instr.operands[0].reg.value == vsp && - instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - instr.operands[1].imm.value.u == 8; + instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; }, // MOV REG, [VIP] [&](const zydis_reg_t vip, const zydis_reg_t vsp, const zydis_decoded_instr_t& instr) -> bool { - return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + return (instr.mnemonic == ZYDIS_MNEMONIC_MOV || + instr.mnemonic == ZYDIS_MNEMONIC_MOVSX || + instr.mnemonic == ZYDIS_MNEMONIC_MOVZX) && instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && instr.operands[1].mem.base == vip; @@ -44,5 +45,35 @@ profiler_t sreg = { }}}, [&](zydis_reg_t& vip, zydis_reg_t& vsp, - hndlr_trace_t& hndlr) -> std::optional { return {}; }}; + hndlr_trace_t& hndlr) -> std::optional { + vinstr_t res; + res.mnemonic = mnemonic_t::sreg; + + // locates ADD VSP, VALUE... + const auto add_vsp = std::find_if( + hndlr.m_instrs.begin(), hndlr.m_instrs.end(), + [&](emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; + return i.mnemonic == ZYDIS_MNEMONIC_ADD && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[0].reg.value == vsp && + i.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; + }); + + res.size = add_vsp->m_instr.operands[1].imm.value.u * 8; + + // MOV [RSP+REG], REG... + const auto mov_vreg_value = std::find_if( + hndlr.m_instrs.begin(), hndlr.m_instrs.end(), + [&](emu_instr_t& instr) -> bool { + const auto& i = instr.m_instr; + return i.mnemonic == ZYDIS_MNEMONIC_MOV && + i.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY && + i.operands[0].mem.base == ZYDIS_REGISTER_RSP && + i.operands[0].mem.index != ZYDIS_REGISTER_NONE && + i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER; + }); + + return res; + }}; } \ No newline at end of file