diff --git a/CMakeLists.txt b/CMakeLists.txt index 610b08d..23308ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,8 +52,11 @@ set(vmprofiler_SOURCES "") list(APPEND vmprofiler_SOURCES "src/vmctx.cpp" "src/vmlocate.cpp" + "src/vmprofiles/jmp.cpp" + "src/vmprofiles/sreg.cpp" "src/vmutils.cpp" "include/vmctx.hpp" + "include/vminstrs.hpp" "include/vmlocate.hpp" "include/vmprofiler.hpp" "include/vmutils.hpp" diff --git a/include/vmctx.hpp b/include/vmctx.hpp index 59efc4a..b908aeb 100644 --- a/include/vmctx.hpp +++ b/include/vmctx.hpp @@ -12,9 +12,9 @@ class vmctx_t { const std::uintptr_t m_module_base, m_image_base, m_vm_entry_rva, m_image_size; - zydis_register_t get_vip() const { return m_vip; } - zydis_register_t get_vsp() const { return m_vsp; } - zydis_routine_t get_vm_enter() const { return m_vm_entry; } + zydis_reg_t get_vip() const { return m_vip; } + zydis_reg_t get_vsp() const { return m_vsp; } + zydis_rtn_t get_vm_enter() const { return m_vm_entry; } private: /// @@ -22,11 +22,11 @@ class vmctx_t { /// enter... these will change during the execution inside of the vm but these /// values stay the same as the ones used by vm enter... /// - zydis_register_t m_vip, m_vsp; + zydis_reg_t m_vip, m_vsp; /// /// the virtual machine enter flattened and deobfuscated... /// - zydis_routine_t m_vm_entry; + zydis_rtn_t m_vm_entry; }; } // namespace vm \ No newline at end of file diff --git a/include/vminstrs.hpp b/include/vminstrs.hpp new file mode 100644 index 0000000..d55851a --- /dev/null +++ b/include/vminstrs.hpp @@ -0,0 +1,140 @@ +#pragma once +#include + +namespace vm::instrs { +/// +/// mnemonic representation of supported virtual instructions... +/// +enum class mnemonic_t { + unknown, + sreg, + lreg, + lconst, + add, + div, + idiv, + mul, + imul, + nand, + read, + write, + shl, + shld, + shr, + shrd, + lvsp, + svsp, + writecr3, + readcr3, + writecr8, + readcr8, + cpuid, + rdtsc, + call, + jmp, + vmexit +}; + +/// +/// the main virtual instruction structure which is returned by profilers... +/// +struct vinstr_t { + /// + /// mnemonic of the virtual instruction... + /// + mnemonic_t mnemonic; + + /// + /// size varient of the virtual instruction... I.E SREGQ would have a value of + /// "64" here...where the SREGDW varient would have a "32" here... + /// + u8 size; + + struct { + /// + /// true if the virtual instruction has an imm false if not... + /// + bool has_imm; + + /// + /// size in bits of the imm... 8, 16, 32, 64... + /// + u8 size; + + /// + /// imm value... + /// + u64 val; + } imm; +}; + +/// +/// matcher function which returns true if an instruction matches a desired +/// one... +/// +using matcher_t = std::function; + +/// +/// virtual instruction structure generator... this can update the vip and vsp +/// argument... it cannot update the instruction stream (hndlr)... +/// +using vinstr_gen_t = std::function(zydis_reg_t& vip, + zydis_reg_t& vsp, + zydis_rtn_t& hndlr)>; + +/// +/// each virtual instruction has its own profiler_t structure which can generate +/// all varients of the virtual instruction for each size... +/// +struct profiler_t { + /// + /// string name of the virtual instruction that this profile generates for... + /// + std::string name; + + /// + /// mnemonic representation of the virtual instruction... + /// + mnemonic_t mnemonic; + + /// + /// vector of matcher lambda's which return true if a given instruction + /// matches... + /// + std::vector matchers; + + /// + /// generates a virtual instruction structure... + /// + vinstr_gen_t generate; +}; + +extern profiler_t jmp; +inline std::vector profiles = {&jmp}; + +inline vinstr_t determine(zydis_reg_t& vip, + zydis_reg_t& vsp, + zydis_rtn_t& hndlr) { + const auto profile = std::find_if( + profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool { + for (auto& matcher : profile->matchers) { + const auto matched = std::find_if(hndlr.begin(), hndlr.end(), + [&](zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return matcher(vip, vsp, i); + }); + if (matched == hndlr.end()) + return false; + } + return true; + }); + + if (profile == profiles.end()) + return vinstr_t{mnemonic_t::unknown}; + + auto result = (*profile)->generate(vip, vsp, hndlr); + return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown}; +} +} // namespace vm::instrs \ No newline at end of file diff --git a/include/vmlocate.hpp b/include/vmlocate.hpp index eb6013c..f07e8da 100644 --- a/include/vmlocate.hpp +++ b/include/vmlocate.hpp @@ -6,7 +6,7 @@ #define PUSH_4B_MASK "x????" namespace vm::locate { -inline bool find(const zydis_routine_t& rtn, +inline bool find(const zydis_rtn_t& rtn, std::function callback) { auto res = std::find_if(rtn.begin(), rtn.end(), callback); return res != rtn.end(); diff --git a/include/vmprofiler.hpp b/include/vmprofiler.hpp index 224ec8a..db78b39 100644 --- a/include/vmprofiler.hpp +++ b/include/vmprofiler.hpp @@ -1,4 +1,5 @@ #pragma once #include #include -#include \ No newline at end of file +#include +#include \ No newline at end of file diff --git a/include/vmutils.hpp b/include/vmutils.hpp index 1c22f93..32c31f8 100644 --- a/include/vmutils.hpp +++ b/include/vmutils.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include using u8 = unsigned char; @@ -13,7 +14,7 @@ using u32 = unsigned int; using u64 = unsigned long long; using zydis_decoded_instr_t = ZydisDecodedInstruction; -using zydis_register_t = ZydisRegister; +using zydis_reg_t = ZydisRegister; using zydis_mnemonic_t = ZydisMnemonic; using zydis_decoded_operand_t = ZydisDecodedOperand; @@ -23,7 +24,7 @@ struct zydis_instr_t { std::uintptr_t addr; }; -using zydis_routine_t = std::vector; +using zydis_rtn_t = std::vector; namespace vm::utils { inline thread_local std::shared_ptr g_decoder = nullptr; @@ -72,7 +73,7 @@ bool is_jmp(const zydis_decoded_instr_t& instr); /// /// reference to a zydis_routine_t to be /// printed... -void print(zydis_routine_t& routine); +void print(zydis_rtn_t& routine); /// /// prints a single disassembly view of an instruction... @@ -90,7 +91,7 @@ namespace reg { /// a zydis decoded register value... /// returns the largest width register of the given register... AL /// gives RAX... -zydis_register_t to64(zydis_register_t reg); +zydis_reg_t to64(zydis_reg_t reg); /// /// compares to registers with each other... calls to64 and compares... @@ -98,7 +99,7 @@ zydis_register_t to64(zydis_register_t reg); /// register a... /// register b... /// returns true if register to64(a) == to64(b)... -bool compare(zydis_register_t a, zydis_register_t b); +bool compare(zydis_reg_t a, zydis_reg_t b); } // namespace reg /// @@ -109,7 +110,7 @@ bool compare(zydis_register_t a, zydis_register_t b); /// from... keep JCC's in the flattened /// instruction stream... returns true if flattened was /// successful... -bool flatten(zydis_routine_t& routine, +bool flatten(zydis_rtn_t& routine, std::uintptr_t routine_addr, bool keep_jmps = false, std::uint32_t max_instrs = 500, @@ -119,7 +120,7 @@ bool flatten(zydis_routine_t& routine, /// deadstore deobfuscation of a flattened routine... /// /// reference to a flattened instruction vector... -void deobfuscate(zydis_routine_t& routine); +void deobfuscate(zydis_rtn_t& routine); /// /// small namespace that contains function wrappers to determine the validity of diff --git a/src/vmlocate.cpp b/src/vmlocate.cpp index a49f647..3b3df78 100644 --- a/src/vmlocate.cpp +++ b/src/vmlocate.cpp @@ -28,7 +28,7 @@ std::vector get_vm_entries(std::uintptr_t module_base, std::uintptr_t result = module_base; std::vector entries; - static const auto push_regs = [&](const zydis_routine_t& rtn) -> bool { + static const auto push_regs = [&](const zydis_rtn_t& rtn) -> bool { for (unsigned reg = ZYDIS_REGISTER_RAX; reg < ZYDIS_REGISTER_R15; ++reg) { auto res = std::find_if( rtn.begin(), rtn.end(), [&](const zydis_instr_t& instr) -> bool { @@ -49,7 +49,7 @@ std::vector get_vm_entries(std::uintptr_t module_base, result = sigscan((void*)++result, module_size - (result - module_base), PUSH_4B_IMM, PUSH_4B_MASK); - zydis_routine_t rtn; + zydis_rtn_t rtn; if (!vm::utils::scn::executable(module_base, result)) continue; diff --git a/src/vmprofiles/jmp.cpp b/src/vmprofiles/jmp.cpp new file mode 100644 index 0000000..a6ef5cc --- /dev/null +++ b/src/vmprofiles/jmp.cpp @@ -0,0 +1,127 @@ +#include + +namespace vm::instrs { +profiler_t jmp = { + "JMP", + mnemonic_t::jmp, + // MOV REG, [VSP] + {{[&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && + instr.operands[1].mem.base == vsp; + }, + // ADD VSP, 8 + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_ADD && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[0].reg.value == vsp && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && + instr.operands[1].imm.value.u == 8; + }, + // MOV VIP, REG + [&](const zydis_reg_t vip, + const zydis_reg_t vsp, + const zydis_decoded_instr_t& instr) -> bool { + return instr.mnemonic == ZYDIS_MNEMONIC_MOV && + instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + instr.operands[0].reg.value == vip && + instr.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER; + }}}, + [&](zydis_reg_t& vip, + zydis_reg_t& vsp, + zydis_rtn_t& hndlr) -> std::optional { + const auto xchg = std::find_if( + hndlr.begin(), hndlr.end(), [&](const zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return i.mnemonic == ZYDIS_MNEMONIC_XCHG && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + // exclusive or... operand 1 or operand 2 can be VSP but they + // both cannot be... + ((i.operands[1].reg.value == vsp || + i.operands[0].reg.value == vsp) && + !((i.operands[1].reg.value == vsp) && + (i.operands[0].reg.value == vsp))); + }); + + // this JMP virtual instruction changes VSP as well as VIP... + if (xchg != hndlr.end()) { + // grab the register that isnt VSP in the XCHG... + // xchg reg, vsp or xchg vsp, reg... + zydis_reg_t write_dep = xchg->instr.operands[0].reg.value != vsp + ? xchg->instr.operands[0].reg.value + : xchg->instr.operands[1].reg.value; + + // update VIP... VSP becomes VIP... with the XCHG... + vip = xchg->instr.operands[0].reg.value != vsp + ? xchg->instr.operands[1].reg.value + : xchg->instr.operands[0].reg.value; + + // find the next MOV REG, write_dep... this REG will be VSP... + const auto mov_reg_write_dep = std::find_if( + xchg, hndlr.end(), [&](const zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return i.mnemonic == ZYDIS_MNEMONIC_MOV && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].reg.value == write_dep; + }); + + if (mov_reg_write_dep == hndlr.end()) + return {}; + + vsp = mov_reg_write_dep->instr.operands[0].reg.value; + } else { + // find the MOV REG, [VSP] instruction... + const auto mov_reg_deref_vsp = std::find_if( + hndlr.begin(), hndlr.end(), + [&](const zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return i.mnemonic == ZYDIS_MNEMONIC_MOV && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY && + i.operands[1].mem.base == vsp; + }); + + if (mov_reg_deref_vsp == hndlr.end()) + return {}; + + // find the MOV REG, mov_reg_deref_vsp->operands[0].reg.value + const auto mov_vip_reg = std::find_if( + mov_reg_deref_vsp, hndlr.end(), + [&](const zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return i.mnemonic == ZYDIS_MNEMONIC_MOV && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].reg.value == + mov_reg_deref_vsp->instr.operands[0].reg.value; + }); + + if (mov_vip_reg == hndlr.end()) + return {}; + + vip = mov_vip_reg->instr.operands[0].reg.value; + + // see if VSP gets updated as well... + const auto mov_reg_vsp = std::find_if( + mov_reg_deref_vsp, hndlr.end(), + [&](const zydis_instr_t& instr) -> bool { + const auto& i = instr.instr; + return i.mnemonic == ZYDIS_MNEMONIC_MOV && + i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && + i.operands[1].reg.value == vsp; + }); + + if (mov_reg_vsp != hndlr.end()) + vsp = mov_reg_vsp->instr.operands[0].reg.value; + } + return vinstr_t{mnemonic_t::jmp}; + }}; +} \ No newline at end of file diff --git a/src/vmprofiles/sreg.cpp b/src/vmprofiles/sreg.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/vmutils.cpp b/src/vmutils.cpp index 2dafbfd..a103728 100644 --- a/src/vmutils.cpp +++ b/src/vmutils.cpp @@ -8,7 +8,7 @@ void print(const zydis_decoded_instr_t& instr) { std::puts(buffer); } -void print(zydis_routine_t& routine) { +void print(zydis_rtn_t& routine) { char buffer[256]; for (auto [instr, raw, addr] : routine) { ZydisFormatterFormatInstruction(vm::utils::g_formatter.get(), &instr, @@ -22,7 +22,7 @@ bool is_jmp(const zydis_decoded_instr_t& instr) { instr.mnemonic <= ZYDIS_MNEMONIC_JZ; } -bool flatten(zydis_routine_t& routine, +bool flatten(zydis_rtn_t& routine, std::uintptr_t routine_addr, bool keep_jmps, std::uint32_t max_instrs, @@ -74,9 +74,9 @@ bool flatten(zydis_routine_t& routine, return false; } -void deobfuscate(zydis_routine_t& routine) { +void deobfuscate(zydis_rtn_t& routine) { static const auto _uses_reg = [](zydis_decoded_operand_t& op, - zydis_register_t reg) -> bool { + zydis_reg_t reg) -> bool { switch (op.type) { case ZYDIS_OPERAND_TYPE_MEMORY: { return vm::utils::reg::compare(op.mem.base, reg) || @@ -92,7 +92,7 @@ void deobfuscate(zydis_routine_t& routine) { }; static const auto _reads = [](zydis_decoded_instr_t& instr, - zydis_register_t reg) -> bool { + zydis_reg_t reg) -> bool { if (instr.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY && vm::utils::reg::compare(instr.operands[0].mem.base, reg)) return true; @@ -105,7 +105,7 @@ void deobfuscate(zydis_routine_t& routine) { }; static const auto _writes = [](zydis_decoded_instr_t& instr, - zydis_register_t reg) -> bool { + zydis_reg_t reg) -> bool { for (auto op_idx = 0u; op_idx < instr.operand_count; ++op_idx) // if instruction writes to the specific register... if (instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER && @@ -119,7 +119,8 @@ void deobfuscate(zydis_routine_t& routine) { std::uint32_t last_size = 0u; static const std::vector blacklist = { ZYDIS_MNEMONIC_CLC, ZYDIS_MNEMONIC_BT, ZYDIS_MNEMONIC_TEST, - ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC}; + ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC, + ZYDIS_MNEMONIC_JMP}; static const std::vector whitelist = { ZYDIS_MNEMONIC_PUSH, ZYDIS_MNEMONIC_POP, ZYDIS_MNEMONIC_CALL, @@ -138,7 +139,7 @@ void deobfuscate(zydis_routine_t& routine) { break; } - zydis_register_t reg = ZYDIS_REGISTER_NONE; + zydis_reg_t reg = ZYDIS_REGISTER_NONE; // look for operands with writes to a register... for (auto op_idx = 0u; op_idx < itr->instr.operand_count; ++op_idx) if (itr->instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER && @@ -183,11 +184,11 @@ void deobfuscate(zydis_routine_t& routine) { } namespace reg { -zydis_register_t to64(zydis_register_t reg) { +zydis_reg_t to64(zydis_reg_t reg) { return ZydisRegisterGetLargestEnclosing(ZYDIS_MACHINE_MODE_LONG_64, reg); } -bool compare(zydis_register_t a, zydis_register_t b) { +bool compare(zydis_reg_t a, zydis_reg_t b) { return to64(a) == to64(b); } } // namespace reg