diff --git a/CMakeLists.txt b/CMakeLists.txt index 87eabc5..839b40f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,7 @@ list(APPEND Theodosius_SOURCES "include/decomp/symbol.hpp" "include/obf/engine.hpp" "include/obf/pass.hpp" + "include/obf/passes/func_split_pass.hpp" "include/obf/passes/jcc_rewrite_pass.hpp" "include/obf/passes/next_inst_pass.hpp" "include/obf/passes/reloc_transform_pass.hpp" @@ -78,6 +79,7 @@ list(APPEND Theodosius_SOURCES "src/decomp/routine.cpp" "src/decomp/symbol.cpp" "src/obf/engine.cpp" + "src/obf/passes/func_split_pass.cpp" "src/obf/passes/jcc_rewrite_pass.cpp" "src/obf/passes/next_inst_pass.cpp" "src/obf/passes/reloc_transform_pass.cpp" diff --git a/examples/demo/hello_world_pass.hpp b/examples/demo/hello_world_pass.hpp index 90012a5..1be948e 100644 --- a/examples/demo/hello_world_pass.hpp +++ b/examples/demo/hello_world_pass.hpp @@ -47,7 +47,7 @@ class hello_world_pass_t : public generic_pass_t { return &obj; } - void generic_pass(decomp::symbol_t* sym) override { + void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) override { spdlog::info("[hello_world_pass_t] symbol name: {}, symbol hash: {}", sym->name(), sym->hash()); } diff --git a/examples/demo/main.cpp b/examples/demo/main.cpp index b146f41..ffb3561 100644 --- a/examples/demo/main.cpp +++ b/examples/demo/main.cpp @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -114,16 +115,17 @@ int main(int argc, char* argv[]) { // auto engine = theo::obf::engine_t::get(); - // add in our hello world pass here - // - engine->add_pass(theo::obf::hello_world_pass_t::get()); - // add the rest of the passes in this order. this order is important. // + engine->add_pass(theo::obf::func_split_pass_t::get()); engine->add_pass(theo::obf::reloc_transform_pass_t::get()); engine->add_pass(theo::obf::next_inst_pass_t::get()); engine->add_pass(theo::obf::jcc_rewrite_pass_t::get()); + // add in our hello world pass here + // + engine->add_pass(theo::obf::hello_world_pass_t::get()); + std::string entry_name; std::cout << "enter the name of the entry point: "; std::cin >> entry_name; @@ -147,5 +149,8 @@ int main(int argc, char* argv[]) { spdlog::info("decomposed {} symbols...", res.value()); auto entry_pnt = t.compose(); spdlog::info("entry point address: {:X}", entry_pnt); + spdlog::info("press enter to execute {}", entry_name.c_str()); + + std::getchar(); reinterpret_cast(entry_pnt)(); } \ No newline at end of file diff --git a/include/decomp/symbol.hpp b/include/decomp/symbol.hpp index fc09ad4..0b568cb 100644 --- a/include/decomp/symbol.hpp +++ b/include/decomp/symbol.hpp @@ -139,6 +139,12 @@ class symbol_t { /// the type of the symbol. sym_type_t type() const; + /// + /// setter for the type value. + /// + /// type of symbol. + void type(sym_type_t type); + /// /// returns a vector of relocations. /// diff --git a/include/obf/pass.hpp b/include/obf/pass.hpp index f0bf1fc..a3c609e 100644 --- a/include/obf/pass.hpp +++ b/include/obf/pass.hpp @@ -59,6 +59,10 @@ using allocator_t = std::function; +// symbol table map associated with the symbol table data structure. +// +using sym_map_t = std::map; + /// /// the pass_t class is a base clase for all passes made. you must override the /// pass_t::run virtual function and declare the logic of your pass there. @@ -82,7 +86,7 @@ class pass_t { /// allows you to manipulate symbols in a generic manner. /// /// a symbol of the same type of m_sym_type. - virtual void generic_pass(decomp::symbol_t* sym) = 0; + virtual void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) = 0; /// /// This virtual method is invoked prior to calling the "copier". This allows diff --git a/include/obf/passes/func_split_pass.hpp b/include/obf/passes/func_split_pass.hpp new file mode 100644 index 0000000..ad7f955 --- /dev/null +++ b/include/obf/passes/func_split_pass.hpp @@ -0,0 +1,42 @@ +// Copyright (c) 2022, _xeroxz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#pragma once +#include + +namespace theo::obf { +class func_split_pass_t : public generic_pass_t { + explicit func_split_pass_t() : generic_pass_t(decomp::sym_type_t::function) {} + + public: + static func_split_pass_t* get(); + void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) override; +}; +} // namespace theo::obf \ No newline at end of file diff --git a/include/obf/passes/jcc_rewrite_pass.hpp b/include/obf/passes/jcc_rewrite_pass.hpp index 630ece1..2272660 100644 --- a/include/obf/passes/jcc_rewrite_pass.hpp +++ b/include/obf/passes/jcc_rewrite_pass.hpp @@ -64,6 +64,6 @@ class jcc_rewrite_pass_t : public generic_pass_t { public: static jcc_rewrite_pass_t* get(); - void generic_pass(decomp::symbol_t* sym) override; + void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) override; }; } // namespace theo::obf \ No newline at end of file diff --git a/include/obf/passes/next_inst_pass.hpp b/include/obf/passes/next_inst_pass.hpp index 674c352..f872936 100644 --- a/include/obf/passes/next_inst_pass.hpp +++ b/include/obf/passes/next_inst_pass.hpp @@ -92,7 +92,7 @@ class next_inst_pass_t : public generic_pass_t { public: static next_inst_pass_t* get(); - void generic_pass(decomp::symbol_t* sym) override; + void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) override; private: std::optional has_next_inst_reloc(decomp::symbol_t*); diff --git a/include/obf/passes/reloc_transform_pass.hpp b/include/obf/passes/reloc_transform_pass.hpp index 89e51ce..598d608 100644 --- a/include/obf/passes/reloc_transform_pass.hpp +++ b/include/obf/passes/reloc_transform_pass.hpp @@ -59,7 +59,7 @@ class reloc_transform_pass_t : public generic_pass_t { public: static reloc_transform_pass_t* get(); - void generic_pass(decomp::symbol_t* sym) override; + void generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) override; private: std::optional has_legit_reloc(decomp::symbol_t* sym); diff --git a/include/recomp/reloc.hpp b/include/recomp/reloc.hpp index ecd6b75..94b7be6 100644 --- a/include/recomp/reloc.hpp +++ b/include/recomp/reloc.hpp @@ -81,30 +81,7 @@ class reloc_t { /// too. void offset(std::uint32_t offset) { m_offset = offset; } - /// - /// adds a transformation to be applied to the relocation prior to writing it - /// into the symbol. - /// - /// a pair containing a lambda function that when executed - /// transforms a relocation. the second value in the pair is a random value - /// which is passed to the lambda. - void add_transform( - std::pair entry) { - m_transforms.push_back(entry); - } - - /// - /// gets the vector of transformation. - /// - /// returns the vector of transformations. - std::vector>& - get_transforms() { - return m_transforms; - } - private: - std::vector> - m_transforms; std::string m_sym_name; std::size_t m_hash; std::uint32_t m_offset; diff --git a/include/recomp/symbol_table.hpp b/include/recomp/symbol_table.hpp index 75d77c2..3f375a8 100644 --- a/include/recomp/symbol_table.hpp +++ b/include/recomp/symbol_table.hpp @@ -99,6 +99,12 @@ class symbol_table_t { /// returns the size of the symbol table. std::uint32_t size(); + /// + /// getter for underlying symbol hash map. + /// + /// returns the symbol hashmap. + std::map& get(); + private: std::map m_table; }; diff --git a/src/decomp/symbol.cpp b/src/decomp/symbol.cpp index 134811a..d9eadeb 100644 --- a/src/decomp/symbol.cpp +++ b/src/decomp/symbol.cpp @@ -81,6 +81,10 @@ sym_type_t symbol_t::type() const { return m_sym_type; } +void symbol_t::type(sym_type_t type) { + m_sym_type = type; +} + void symbol_t::allocated_at(std::uintptr_t allocated_at) { m_allocated_at = allocated_at; } diff --git a/src/obf/passes/func_split_pass.cpp b/src/obf/passes/func_split_pass.cpp new file mode 100644 index 0000000..e07af4a --- /dev/null +++ b/src/obf/passes/func_split_pass.cpp @@ -0,0 +1,135 @@ +// Copyright (c) 2022, _xeroxz +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#include + +namespace theo::obf { +func_split_pass_t* func_split_pass_t::get() { + static func_split_pass_t obj; + return &obj; +} + +void func_split_pass_t::generic_pass(decomp::symbol_t* sym, + sym_map_t& sym_tbl) { + std::uint32_t offset = {}; + xed_error_enum_t err; + xed_decoded_inst_t instr; + std::vector result; + xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; + xed_decoded_inst_zero_set_mode(&instr, &istate); + + // keep looping over the function, lower the number of bytes each time... + // + while ((err = xed_decode(&instr, sym->data().data() + offset, + sym->data().size() - offset)) == XED_ERROR_NONE) { + // symbol name is of the format: symbol@instroffset, I.E: main@11... + // + auto new_sym_name = decomp::symbol_t::name(sym->img(), sym->sym()); + + // first instruction doesnt need the @offset... + // + if (offset) + new_sym_name.append("@").append(std::to_string(offset)); + + std::vector relocs; + auto scn_relocs = reinterpret_cast( + sym->scn()->ptr_relocs + reinterpret_cast(sym->img())); + + // find if this instruction has a relocation or not... + // if so, return the reloc_t... + // + auto reloc = std::find_if( + scn_relocs, scn_relocs + sym->scn()->num_relocs, + [&](coff::reloc_t reloc) { + return reloc.virtual_address >= sym->sym()->value + offset && + reloc.virtual_address < + sym->sym()->value + offset + + xed_decoded_inst_get_length(&instr); + }); + + // if there is indeed a reloc for this instruction... + // + if (reloc != scn_relocs + sym->scn()->num_relocs) { + auto sym_reloc = sym->img()->get_symbol(reloc->symbol_index); + auto sym_name = decomp::symbol_t::name(sym->img(), sym_reloc); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + auto reloc_offset = reloc->virtual_address - sym->sym()->value - offset; + relocs.push_back( + recomp::reloc_t(reloc_offset, sym_hash, sym_name.data())); + } + // add a reloc to the next instruction... + // note that the offset is ZERO... comp_t will understand that + // relocs with offset ZERO means the next instructions... + // + auto next_inst_sym = decomp::symbol_t::name(sym->img(), sym->sym()) + .append("@") + .append(std::to_string( + offset + xed_decoded_inst_get_length(&instr))); + + relocs.push_back(recomp::reloc_t(0, decomp::symbol_t::hash(next_inst_sym), + next_inst_sym.data())); + // get the instructions bytes + // + std::vector inst_bytes( + sym->data().data() + offset, + sym->data().data() + offset + xed_decoded_inst_get_length(&instr)); + + result.push_back(decomp::symbol_t(sym->img(), new_sym_name, offset, + inst_bytes, sym->scn(), sym->sym(), + relocs, decomp::sym_type_t::instruction)); + // after creating the symbol and dealing with relocs then print the + // information we have concluded... + // + char buff[255]; + offset += xed_decoded_inst_get_length(&instr); + xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL, NULL, + NULL); + spdlog::info("[func_split_pass_t] {}: {}", new_sym_name, buff); + // need to set this so that instr can be used to decode again... + xed_decoded_inst_zero_set_mode(&instr, &istate); + } + + // remove the relocation to the next symbol from the last instruction + // + auto& last_inst = result.back(); + auto& last_inst_relocs = last_inst.relocs(); + last_inst_relocs.erase(last_inst_relocs.end() - 1); + + // insert the split instructions into the symbol table. + // + for (auto& symbol : result) { + auto itr = sym_tbl.find(symbol.hash()); + if (itr != sym_tbl.end()) + itr->second = symbol; + else + sym_tbl.insert({symbol.hash(), symbol}); + } +} +} // namespace theo::obf \ No newline at end of file diff --git a/src/obf/passes/jcc_rewrite_pass.cpp b/src/obf/passes/jcc_rewrite_pass.cpp index 65f83b3..97e33aa 100644 --- a/src/obf/passes/jcc_rewrite_pass.cpp +++ b/src/obf/passes/jcc_rewrite_pass.cpp @@ -37,7 +37,8 @@ jcc_rewrite_pass_t* jcc_rewrite_pass_t::get() { return &obj; } -void jcc_rewrite_pass_t::generic_pass(decomp::symbol_t* sym) { +void jcc_rewrite_pass_t::generic_pass(decomp::symbol_t* sym, + sym_map_t& sym_tbl) { std::int32_t disp = {}; xed_decoded_inst_t inst; xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; @@ -77,7 +78,7 @@ void jcc_rewrite_pass_t::generic_pass(decomp::symbol_t* sym) { // run next_inst_pass on this symbol to generate the transformations for the // relocation to the jcc branch dest instruction... - next_inst_pass_t::get()->generic_pass(sym); + next_inst_pass_t::get()->generic_pass(sym, sym_tbl); } }; } // namespace theo::obf \ No newline at end of file diff --git a/src/obf/passes/next_inst_pass.cpp b/src/obf/passes/next_inst_pass.cpp index 6915400..eabd92a 100644 --- a/src/obf/passes/next_inst_pass.cpp +++ b/src/obf/passes/next_inst_pass.cpp @@ -36,7 +36,7 @@ next_inst_pass_t* next_inst_pass_t::get() { return &obj; } -void next_inst_pass_t::generic_pass(decomp::symbol_t* sym) { +void next_inst_pass_t::generic_pass(decomp::symbol_t* sym, sym_map_t& sym_tbl) { std::optional reloc; if (!(reloc = has_next_inst_reloc(sym)).has_value()) return; diff --git a/src/obf/passes/reloc_transform_pass.cpp b/src/obf/passes/reloc_transform_pass.cpp index 3e01b6f..dd7f4ea 100644 --- a/src/obf/passes/reloc_transform_pass.cpp +++ b/src/obf/passes/reloc_transform_pass.cpp @@ -36,7 +36,8 @@ reloc_transform_pass_t* reloc_transform_pass_t::get() { return &obj; } -void reloc_transform_pass_t::generic_pass(decomp::symbol_t* sym) { +void reloc_transform_pass_t::generic_pass(decomp::symbol_t* sym, + sym_map_t& sym_tbl) { std::optional reloc; if (!(reloc = has_legit_reloc(sym)).has_value()) return; diff --git a/src/recomp/symbol_table.cpp b/src/recomp/symbol_table.cpp index e81007d..449bcdd 100644 --- a/src/recomp/symbol_table.cpp +++ b/src/recomp/symbol_table.cpp @@ -69,6 +69,10 @@ std::optional symbol_table_t::sym_from_alloc( : std::optional{}; } +std::map& symbol_table_t::get() { + return m_table; +} + std::uint32_t symbol_table_t::size() { return m_table.size(); } diff --git a/src/theo.cpp b/src/theo.cpp index c750667..015f9ab 100644 --- a/src/theo.cpp +++ b/src/theo.cpp @@ -54,12 +54,34 @@ std::optional theo_t::decompose() { } std::uintptr_t theo_t::compose() { - // run obfuscation engine on all symbols... - // auto engine = obf::engine_t::get(); + auto& sym_tbl = m_sym_tbl.get(); + + // run obfuscation engine on function symbols... + // + m_sym_tbl.for_each([&](decomp::symbol_t& sym) { + engine->for_each(&sym, [&](decomp::symbol_t* sym, obf::pass_t* pass) { + if (sym->type() == decomp::sym_type_t::function) + pass->generic_pass(sym, sym_tbl); + }); + }); + + // run obfuscation engine on instruction symbols... + // + m_sym_tbl.for_each([&](decomp::symbol_t& sym) { + engine->for_each(&sym, [&](decomp::symbol_t* sym, obf::pass_t* pass) { + if (sym->type() == decomp::sym_type_t::instruction) + pass->generic_pass(sym, sym_tbl); + }); + }); + + // run obfuscation engine on all other symbols... + // m_sym_tbl.for_each([&](decomp::symbol_t& sym) { engine->for_each(&sym, [&](decomp::symbol_t* sym, obf::pass_t* pass) { - pass->generic_pass(sym); + if (sym->type() != decomp::sym_type_t::instruction && + sym->type() != decomp::sym_type_t::function) + pass->generic_pass(sym, sym_tbl); }); });