diff --git a/CMakeLists.txt b/CMakeLists.txt
index 06de3e1..f06b06f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,7 @@ set(vmprofiler_SOURCES "")
list(APPEND vmprofiler_SOURCES
"src/vmctx.cpp"
+ "src/vminstrs.cpp"
"src/vmlocate.cpp"
"src/vmprofiles/jmp.cpp"
"src/vmprofiles/sreg.cpp"
diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt
index 13987e9..d000f21 100644
--- a/deps/CMakeLists.txt
+++ b/deps/CMakeLists.txt
@@ -1,23 +1,6 @@
# This file is automatically generated from cmake.toml - DO NOT EDIT
# See https://github.com/build-cpp/cmkr for more information
-cmake_minimum_required(VERSION 3.15)
-
-# Regenerate CMakeLists.txt automatically in the root project
-set(CMKR_ROOT_PROJECT OFF)
-if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
- set(CMKR_ROOT_PROJECT ON)
-
- # Bootstrap cmkr
- include(cmkr.cmake OPTIONAL RESULT_VARIABLE CMKR_INCLUDE_RESULT)
- if(CMKR_INCLUDE_RESULT)
- cmkr()
- endif()
-
- # Enable folder support
- set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-endif()
-
# Create a configure-time dependency on cmake.toml to improve IDE support
if(CMKR_ROOT_PROJECT)
configure_file(cmake.toml cmake.toml COPYONLY)
diff --git a/include/vminstrs.hpp b/include/vminstrs.hpp
index 5a1a116..31dc534 100644
--- a/include/vminstrs.hpp
+++ b/include/vminstrs.hpp
@@ -47,7 +47,8 @@ struct vinstr_t {
///
/// size varient of the virtual instruction... I.E SREGQ would have a value of
- /// "64" here...where the SREGDW varient would have a "32" here...
+ /// "64" here...where the SREGDW varient would have a "32" here... this is the
+ /// stack disposition essentially, or the value on the stack...
///
u8 size;
@@ -140,7 +141,14 @@ extern profiler_t sreg;
///
/// unsorted vector of profiles... they get sorted once at runtime...
///
-inline std::vector profiles = {&jmp};
+inline std::vector profiles = {&jmp, &sreg};
+
+///
+/// deadstore and opaque branch removal from unicorn engine trace... this is the
+/// same algorithm as the one in vm::utils::deobfuscate...
+///
+///
+void deobfuscate(hndlr_trace_t& trace);
///
/// sorts the profiles by descending order of matchers... this will prevent a
@@ -148,13 +156,7 @@ inline std::vector profiles = {&jmp};
///
/// this function can be called multiple times...
///
-inline void init() {
- if (static std::atomic_bool once = true; once.exchange(false))
- std::sort(profiles.begin(), profiles.end(),
- [&](profiler_t* a, profiler_t* b) -> bool {
- return a->matchers.size() > b->matchers.size();
- });
-}
+void init();
///
/// determines the virtual instruction for the vm handler given vsp and vip...
@@ -163,46 +165,12 @@ inline void init() {
/// vsp native register...
///
/// returns vinstr_t structure...
-inline vinstr_t determine(zydis_reg_t& vip,
- zydis_reg_t& vsp,
- hndlr_trace_t& hndlr) {
- const auto& instrs = hndlr.m_instrs;
- const auto profile = std::find_if(
- profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool {
- for (auto& matcher : profile->matchers) {
- const auto matched =
- std::find_if(instrs.begin(), instrs.end(),
- [&](const emu_instr_t& instr) -> bool {
- const auto& i = instr.m_instr;
- return matcher(vip, vsp, i);
- });
- if (matched == instrs.end())
- return false;
- }
- return true;
- });
-
- if (profile == profiles.end())
- return vinstr_t{mnemonic_t::unknown};
-
- auto result = (*profile)->generate(vip, vsp, hndlr);
- return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown};
-}
+vinstr_t determine(zydis_reg_t& vip, zydis_reg_t& vsp, hndlr_trace_t& hndlr);
///
/// get profile from mnemonic...
///
/// mnemonic of the profile to get...
/// pointer to the profile...
-inline profiler_t* get_profile(mnemonic_t mnemonic) {
- if (mnemonic == mnemonic_t::unknown)
- return nullptr;
-
- const auto res = std::find_if(profiles.begin(), profiles.end(),
- [&](profiler_t* profile) -> bool {
- return profile->mnemonic == mnemonic;
- });
-
- return res == profiles.end() ? nullptr : *res;
-}
+profiler_t* get_profile(mnemonic_t mnemonic);
} // namespace vm::instrs
\ No newline at end of file
diff --git a/src/vminstrs.cpp b/src/vminstrs.cpp
new file mode 100644
index 0000000..ad885af
--- /dev/null
+++ b/src/vminstrs.cpp
@@ -0,0 +1,163 @@
+#include
+
+namespace vm::instrs {
+void deobfuscate(hndlr_trace_t& trace) {
+ static const auto _uses_reg = [](zydis_decoded_operand_t& op,
+ zydis_reg_t reg) -> bool {
+ switch (op.type) {
+ case ZYDIS_OPERAND_TYPE_MEMORY: {
+ return vm::utils::reg::compare(op.mem.base, reg) ||
+ vm::utils::reg::compare(op.mem.index, reg);
+ }
+ case ZYDIS_OPERAND_TYPE_REGISTER: {
+ return vm::utils::reg::compare(op.reg.value, reg);
+ }
+ default:
+ break;
+ }
+ return false;
+ };
+
+ static const auto _reads = [](zydis_decoded_instr_t& instr,
+ zydis_reg_t reg) -> bool {
+ if (instr.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY &&
+ vm::utils::reg::compare(instr.operands[0].mem.base, reg))
+ return true;
+
+ for (auto op_idx = 0u; op_idx < instr.operand_count; ++op_idx)
+ if (instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_READ &&
+ _uses_reg(instr.operands[op_idx], reg))
+ return true;
+ return false;
+ };
+
+ static const auto _writes = [](zydis_decoded_instr_t& instr,
+ zydis_reg_t reg) -> bool {
+ for (auto op_idx = 0u; op_idx < instr.operand_count; ++op_idx)
+ // if instruction writes to the specific register...
+ if (instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER &&
+ instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_WRITE &&
+ !(instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_READ) &&
+ vm::utils::reg::compare(instr.operands[op_idx].reg.value, reg))
+ return true;
+ return false;
+ };
+
+ std::uint32_t last_size = 0u;
+ static const std::vector blacklist = {
+ ZYDIS_MNEMONIC_CLC, ZYDIS_MNEMONIC_BT, ZYDIS_MNEMONIC_TEST,
+ ZYDIS_MNEMONIC_CMP, ZYDIS_MNEMONIC_CMC, ZYDIS_MNEMONIC_STC};
+
+ static const std::vector whitelist = {
+ ZYDIS_MNEMONIC_PUSH, ZYDIS_MNEMONIC_POP, ZYDIS_MNEMONIC_CALL,
+ ZYDIS_MNEMONIC_DIV};
+
+ do {
+ last_size = trace.m_instrs.size();
+ for (auto itr = trace.m_instrs.begin(); itr != trace.m_instrs.end();
+ ++itr) {
+ if (std::find(whitelist.begin(), whitelist.end(),
+ itr->m_instr.mnemonic) != whitelist.end())
+ continue;
+
+ if (std::find(blacklist.begin(), blacklist.end(),
+ itr->m_instr.mnemonic) != blacklist.end()) {
+ trace.m_instrs.erase(itr);
+ break;
+ }
+
+ if (vm::utils::is_jmp(itr->m_instr)) {
+ trace.m_instrs.erase(itr);
+ break;
+ }
+
+ zydis_reg_t reg = ZYDIS_REGISTER_NONE;
+ // look for operands with writes to a register...
+ for (auto op_idx = 0u; op_idx < itr->m_instr.operand_count; ++op_idx)
+ if (itr->m_instr.operands[op_idx].type == ZYDIS_OPERAND_TYPE_REGISTER &&
+ itr->m_instr.operands[op_idx].actions & ZYDIS_OPERAND_ACTION_WRITE)
+ reg = vm::utils::reg::to64(itr->m_instr.operands[0].reg.value);
+
+ // if this current instruction writes to a register, look ahead in the
+ // instruction stream to see if it gets written too before it gets read...
+ if (reg != ZYDIS_REGISTER_NONE) {
+ // find the next place that this register is written too...
+ auto write_result = std::find_if(itr + 1, trace.m_instrs.end(),
+ [&](emu_instr_t& instr) -> bool {
+ return _writes(instr.m_instr, reg);
+ });
+
+ auto read_result = std::find_if(itr + 1, write_result,
+ [&](emu_instr_t& instr) -> bool {
+ return _reads(instr.m_instr, reg);
+ });
+
+ // if there is neither a read or a write to this register in the
+ // instruction stream then we are going to be safe and leave the
+ // instruction in the stream...
+ if (read_result == trace.m_instrs.end() &&
+ write_result == trace.m_instrs.end())
+ continue;
+
+ // if there is no read of the register before the next write... and
+ // there is a known next write, then remove the instruction from the
+ // stream...
+ if (read_result == write_result &&
+ write_result != trace.m_instrs.end()) {
+ // if the instruction reads and writes the same register than skip...
+ if (_reads(read_result->m_instr, reg) &&
+ _writes(read_result->m_instr, reg))
+ continue;
+
+ trace.m_instrs.erase(itr);
+ break;
+ }
+ }
+ }
+ } while (last_size != trace.m_instrs.size());
+}
+
+void init() {
+ if (static std::atomic_bool once = true; once.exchange(false))
+ std::sort(profiles.begin(), profiles.end(),
+ [&](profiler_t* a, profiler_t* b) -> bool {
+ return a->matchers.size() > b->matchers.size();
+ });
+}
+
+vinstr_t determine(zydis_reg_t& vip, zydis_reg_t& vsp, hndlr_trace_t& hndlr) {
+ const auto& instrs = hndlr.m_instrs;
+ const auto profile = std::find_if(
+ profiles.begin(), profiles.end(), [&](profiler_t* profile) -> bool {
+ for (auto& matcher : profile->matchers) {
+ const auto matched =
+ std::find_if(instrs.begin(), instrs.end(),
+ [&](const emu_instr_t& instr) -> bool {
+ const auto& i = instr.m_instr;
+ return matcher(vip, vsp, i);
+ });
+ if (matched == instrs.end())
+ return false;
+ }
+ return true;
+ });
+
+ if (profile == profiles.end())
+ return vinstr_t{mnemonic_t::unknown};
+
+ auto result = (*profile)->generate(vip, vsp, hndlr);
+ return result.has_value() ? result.value() : vinstr_t{mnemonic_t::unknown};
+}
+
+profiler_t* get_profile(mnemonic_t mnemonic) {
+ if (mnemonic == mnemonic_t::unknown)
+ return nullptr;
+
+ const auto res = std::find_if(profiles.begin(), profiles.end(),
+ [&](profiler_t* profile) -> bool {
+ return profile->mnemonic == mnemonic;
+ });
+
+ return res == profiles.end() ? nullptr : *res;
+}
+} // namespace vm::instrs
\ No newline at end of file
diff --git a/src/vmprofiles/sreg.cpp b/src/vmprofiles/sreg.cpp
index 47c85a2..0b05d02 100644
--- a/src/vmprofiles/sreg.cpp
+++ b/src/vmprofiles/sreg.cpp
@@ -13,21 +13,22 @@ profiler_t sreg = {
instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY &&
instr.operands[1].mem.base == vsp;
},
- // ADD VSP, 8
+ // ADD VSP, OFFSET
[&](const zydis_reg_t vip,
const zydis_reg_t vsp,
const zydis_decoded_instr_t& instr) -> bool {
return instr.mnemonic == ZYDIS_MNEMONIC_ADD &&
instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
instr.operands[0].reg.value == vsp &&
- instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
- instr.operands[1].imm.value.u == 8;
+ instr.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
},
// MOV REG, [VIP]
[&](const zydis_reg_t vip,
const zydis_reg_t vsp,
const zydis_decoded_instr_t& instr) -> bool {
- return instr.mnemonic == ZYDIS_MNEMONIC_MOV &&
+ return (instr.mnemonic == ZYDIS_MNEMONIC_MOV ||
+ instr.mnemonic == ZYDIS_MNEMONIC_MOVSX ||
+ instr.mnemonic == ZYDIS_MNEMONIC_MOVZX) &&
instr.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
instr.operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY &&
instr.operands[1].mem.base == vip;
@@ -44,5 +45,35 @@ profiler_t sreg = {
}}},
[&](zydis_reg_t& vip,
zydis_reg_t& vsp,
- hndlr_trace_t& hndlr) -> std::optional { return {}; }};
+ hndlr_trace_t& hndlr) -> std::optional {
+ vinstr_t res;
+ res.mnemonic = mnemonic_t::sreg;
+
+ // locates ADD VSP, VALUE...
+ const auto add_vsp = std::find_if(
+ hndlr.m_instrs.begin(), hndlr.m_instrs.end(),
+ [&](emu_instr_t& instr) -> bool {
+ const auto& i = instr.m_instr;
+ return i.mnemonic == ZYDIS_MNEMONIC_ADD &&
+ i.operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
+ i.operands[0].reg.value == vsp &&
+ i.operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
+ });
+
+ res.size = add_vsp->m_instr.operands[1].imm.value.u * 8;
+
+ // MOV [RSP+REG], REG...
+ const auto mov_vreg_value = std::find_if(
+ hndlr.m_instrs.begin(), hndlr.m_instrs.end(),
+ [&](emu_instr_t& instr) -> bool {
+ const auto& i = instr.m_instr;
+ return i.mnemonic == ZYDIS_MNEMONIC_MOV &&
+ i.operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY &&
+ i.operands[0].mem.base == ZYDIS_REGISTER_RSP &&
+ i.operands[0].mem.index != ZYDIS_REGISTER_NONE &&
+ i.operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER;
+ });
+
+ return res;
+ }};
}
\ No newline at end of file