diff --git a/include/decomp/decomp.hpp b/include/decomp/decomp.hpp index 1330684..ae52c11 100644 --- a/include/decomp/decomp.hpp +++ b/include/decomp/decomp.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -12,6 +13,7 @@ #include namespace theo::decomp { +using sym_data_t = std::pair; class decomp_t { public: explicit decomp_t(std::vector& lib, @@ -22,13 +24,20 @@ class decomp_t { std::vector objs(); recomp::symbol_table_t* syms(); std::map& scn_hash_tbl(); - std::optional decompose(); + std::optional decompose( + const std::string&& entry_sym); private: + std::uint32_t ext_used_syms(const std::string&& entry_sym); + std::optional get_symbol(const std::string_view& name); + const std::vector m_lib; std::vector m_objs; std::vector m_rtns; + std::set m_used_syms; + std::set m_processed_objs; std::map m_scn_hash_tbl; + std::map> m_lookup_tbl; recomp::symbol_table_t* m_syms; }; } // namespace theo::decomp \ No newline at end of file diff --git a/include/recomp/symbol_table.hpp b/include/recomp/symbol_table.hpp index 1449652..162007f 100644 --- a/include/recomp/symbol_table.hpp +++ b/include/recomp/symbol_table.hpp @@ -16,8 +16,8 @@ class symbol_table_t { void add_symbol(decomp::symbol_t& sym); void add_symbols(std::vector& syms); - std::optional sym_from_hash(std::size_t hash); - std::optional sym_from_alloc(std::uintptr_t allocated_at); + std::optional sym_from_hash(std::size_t hash); + std::optional sym_from_alloc(std::uintptr_t allocated_at); void update(std::size_t hash, decomp::symbol_t& sym); void update(std::size_t hash, std::uintptr_t allocated_at); diff --git a/include/theo.hpp b/include/theo.hpp index d243c66..7b3a976 100644 --- a/include/theo.hpp +++ b/include/theo.hpp @@ -25,13 +25,16 @@ using lnk_fns_t = class theo_t { public: - explicit theo_t(std::vector& lib, lnk_fns_t lnkr_fns); + explicit theo_t(std::vector& lib, + lnk_fns_t lnkr_fns, + const std::string&& entry_sym); std::optional decompose(); - std::uintptr_t compose(const std::string&& entry_sym); + std::uintptr_t compose(); std::uintptr_t resolve(const std::string&& sym); private: + std::string m_entry_sym; decomp::decomp_t m_dcmp; recomp::recomp_t m_recmp; recomp::symbol_table_t m_sym_tbl; diff --git a/src/tests/demo/main.cpp b/src/tests/demo/main.cpp index 544c1be..66f13a8 100644 --- a/src/tests/demo/main.cpp +++ b/src/tests/demo/main.cpp @@ -1,4 +1,6 @@ #include +#include + #include #include @@ -18,6 +20,9 @@ int main(int argc, char* argv[]) { fdata.resize(fsize); f.read((char*)fdata.data(), fsize); + LoadLibraryA("user32.dll"); + LoadLibraryA("win32u.dll"); + theo::recomp::allocator_t allocator = [&](std::uint32_t size, coff::section_characteristics_t section_type) -> std::uintptr_t { @@ -32,11 +37,26 @@ int main(int argc, char* argv[]) { }; theo::recomp::resolver_t resolver = [&](std::string sym) -> std::uintptr_t { - return reinterpret_cast( - GetProcAddress(LoadLibraryA("user32.dll"), sym.data())); + auto loaded_modules = std::make_unique(64); + std::uintptr_t result = 0u, loaded_module_sz = 0u; + if (!EnumProcessModules(GetCurrentProcess(), loaded_modules.get(), 512, + (PDWORD)&loaded_module_sz)) + return {}; + + for (auto i = 0u; i < loaded_module_sz / 8u; i++) { + wchar_t file_name[MAX_PATH] = L""; + if (!GetModuleFileNameExW(GetCurrentProcess(), loaded_modules.get()[i], + file_name, _countof(file_name))) + continue; + + if ((result = reinterpret_cast( + GetProcAddress(LoadLibraryW(file_name), sym.c_str())))) + break; + } + return result; }; - theo::theo_t t(fdata, {allocator, copier, resolver}); + theo::theo_t t(fdata, {allocator, copier, resolver}, "main"); auto res = t.decompose(); if (!res.has_value()) { @@ -45,7 +65,7 @@ int main(int argc, char* argv[]) { } spdlog::info("decomposed {} symbols...", res.value()); - auto entry_pnt = t.compose("EntryPoint"); + auto entry_pnt = t.compose(); spdlog::info("entry point address: {:X}", entry_pnt); std::getchar(); reinterpret_cast(entry_pnt)(); diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index 106180f..4da9d60 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -4,7 +4,8 @@ namespace theo::decomp { decomp_t::decomp_t(std::vector& lib, recomp::symbol_table_t* syms) : m_lib(lib), m_syms(syms) {} -std::optional decomp_t::decompose() { +std::optional decomp_t::decompose( + const std::string&& entry_sym) { // extract obj files from the archive file... // ar::view lib(m_lib.data(), m_lib.size()); @@ -20,115 +21,114 @@ std::optional decomp_t::decompose() { } }); + std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) { + for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) { + auto sym = img->get_symbol(idx); + auto sym_name = sym->name.to_string(img->get_strings()); + if (sym_name.length()) { + auto sym_hash = symbol_t::hash(sym_name.data()); + m_lookup_tbl[sym_hash].push_back({img, sym}); + } + } + }); + + // extract used symbols from objs and create a nice little set of them so that + // we can easily decompose them... no need deal with every single symbol... + spdlog::info("extracted {} symbols being used...", + ext_used_syms(entry_sym.data())); + // generate symbols, populate section hash table, for each object file // extracted from the archive file... // - std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) { - // populate section hash table... + std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) { + auto [img, sym] = data; + // populate section hash table with sections for the img of this + // symbol... only populate the hash table if its not been populated for + // this obj before... // - for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) { - auto scn = img->get_section(idx); - auto scn_sym_name = - std::string(scn->name.to_string(img->get_strings())) - .append("#") - .append(std::to_string(idx)) - .append("!") - .append(std::to_string(img->file_header.timedate_stamp)); - - // hash the name of the section + the index + the timestamp of the obj - // file it is in... - // - m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)}); + if (m_processed_objs.emplace(img).second) { + for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) { + auto scn = img->get_section(idx); + auto scn_sym_name = + std::string(scn->name.to_string(img->get_strings())) + .append("#") + .append(std::to_string(idx)) + .append("!") + .append(std::to_string(img->file_header.timedate_stamp)); + + // hash the name of the section + the index + the timestamp of the + // obj file it is in... + // + m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)}); + } } - auto syms_cnt = img->file_header.num_symbols; - for (auto idx = 0u; idx < syms_cnt; ++idx) { - auto sym = img->get_symbol(idx); - - // if the symbol is a function then we are going to decompose it... - // data symbols are handled after this... - // - if (sym->has_section()) { - if (sym->derived_type == coff::derived_type_id::function) { - auto scn = img->get_section(sym->section_index - 1); - auto dcmp_type = scn->name.to_string(img->get_strings()) == - INSTR_SPLIT_SECTION_NAME - ? decomp::sym_type_t::instruction - : decomp::sym_type_t::function; - auto fn_size = scn->size_raw_data; - auto fn_bgn = - scn->ptr_raw_data + reinterpret_cast(img); - - spdlog::info("decomposing function: {} size: {}", - sym->name.to_string(img->get_strings()), fn_size); - - std::vector fn(fn_bgn, fn_bgn + fn_size); - decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); - - auto syms = rtn.decompose(); - spdlog::info("decomposed routine into {} symbols...", syms.size()); - m_syms->add_symbols(syms); - } else if (sym->storage_class == - coff::storage_class_id::public_symbol) { - auto scn = img->get_section(sym->section_index - 1); - auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]); - - // if the section doesnt have a symbol then make one and put it into - // the symbol table... - // - if (!scn_sym.has_value()) { - auto scn_sym_name = - std::string(scn->name.to_string(img->get_strings())) - .append("#") - .append(std::to_string(sym->section_index - 1)) - .append("!") - .append(std::to_string(img->file_header.timedate_stamp)); - - std::vector scn_data( - reinterpret_cast(img) + scn->ptr_raw_data, - reinterpret_cast(img) + scn->ptr_raw_data + - scn->size_raw_data); - - decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, - {}, {}, sym_type_t::section); - - spdlog::info( - "generating symbol for section: {} sym name: {} hash: {:X} " - "section size: {}", - scn->name.to_string(img->get_strings()), new_scn_sym.name(), - new_scn_sym.hash(), scn->size_raw_data); - - m_syms->add_symbol(new_scn_sym); - } - - // create a symbol for the data... - // - decomp::symbol_t new_sym( - img, sym->name.to_string(img->get_strings()).data(), sym->value, - {}, scn, sym, {}, sym_type_t::data); + // if the symbol is a function then we are going to decompose it... + // data symbols are handled after this... + // + if (sym->has_section()) { + if (sym->derived_type == coff::derived_type_id::function) { + auto scn = img->get_section(sym->section_index - 1); + auto dcmp_type = + scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME + ? decomp::sym_type_t::instruction + : decomp::sym_type_t::function; + auto fn_size = scn->size_raw_data; + auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img); + + std::vector fn(fn_bgn, fn_bgn + fn_size); + decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); + + auto syms = rtn.decompose(); + m_syms->add_symbols(syms); + } else if (sym->storage_class == coff::storage_class_id::public_symbol || + sym->storage_class == coff::storage_class_id::private_symbol) { + auto scn = img->get_section(sym->section_index - 1); + auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]); + + // if the section doesnt have a symbol then make one and put it into + // the symbol table... + // + if (!scn_sym.has_value()) { + auto scn_sym_name = + std::string(scn->name.to_string(img->get_strings())) + .append("#") + .append(std::to_string(sym->section_index - 1)) + .append("!") + .append(std::to_string(img->file_header.timedate_stamp)); + + std::vector scn_data( + reinterpret_cast(img) + scn->ptr_raw_data, + reinterpret_cast(img) + scn->ptr_raw_data + + scn->size_raw_data); + + decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {}, + {}, sym_type_t::section); + + m_syms->add_symbol(new_scn_sym); + } - spdlog::info("adding data symbol: {} located inside of section: {}", - new_sym.name(), - m_syms->sym_from_hash(m_scn_hash_tbl[new_sym.scn()]) - .value() - .name()); + // create a symbol for the data... + // + decomp::symbol_t new_sym( + img, sym->name.to_string(img->get_strings()).data(), sym->value, {}, + scn, sym, {}, sym_type_t::data); - m_syms->add_symbol(new_sym); - } - } else if (sym->storage_class == - coff::storage_class_id:: - external_definition) { // else if the symbol has no - // section... these symbols require - // the linker to allocate space for - // them... - - std::vector data(sym->value, 0); - decomp::symbol_t bss_sym(img, - sym->name.to_string(img->get_strings()).data(), - {}, data, {}, sym, {}, sym_type_t::data); - - m_syms->add_symbol(bss_sym); + m_syms->add_symbol(new_sym); } + } else if (sym->storage_class == + coff::storage_class_id:: + external_definition) { // else if the symbol has no + // section... these symbols + // require the linker to allocate + // space for them... + + std::vector data(sym->value, 0); + decomp::symbol_t bss_sym(img, + sym->name.to_string(img->get_strings()).data(), + {}, data, {}, sym, {}, sym_type_t::data); + + m_syms->add_symbol(bss_sym); } }); @@ -137,6 +137,63 @@ std::optional decomp_t::decompose() { return m_syms; } +std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { + std::optional> entry; + if (!(entry = get_symbol(entry_sym.data())).has_value()) + return 0u; + + std::set cache; + const auto finding_syms = [&]() -> bool { + for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) { + auto [img, sym] = *itr; + if (sym->has_section() && !cache.count(sym)) { + auto scn = img->get_section(sym->section_index - 1); + auto num_relocs = scn->num_relocs; + auto relocs = reinterpret_cast( + scn->ptr_relocs + reinterpret_cast(img)); + + for (auto idx = 0u; idx < num_relocs; ++idx) { + auto reloc_sym = img->get_symbol(relocs[idx].symbol_index); + // if the symbol is defined in the current obj then we dont need to go + // looking for where its actually defined... + if (img->get_symbol(relocs[idx].symbol_index)->has_section()) { + sym_data_t sym_data = {img, reloc_sym}; + if (m_used_syms.emplace(sym_data).second) + return true; + } else { + auto sym_name = reloc_sym->name.to_string(img->get_strings()); + entry = get_symbol(sym_name); + if (m_used_syms.emplace(entry.value()).second) + return true; + } + } + cache.emplace(sym); + } + } + return false; + }; + + m_used_syms.emplace(entry.value()); + for (m_used_syms.emplace(entry.value()); finding_syms();) + ; + + return m_used_syms.size(); +} + +std::optional decomp_t::get_symbol(const std::string_view& name) { + coff::image_t* img = {}; + coff::symbol_t* sym = {}; + + auto& syms = m_lookup_tbl[symbol_t::hash(name.data())]; + for (auto idx = 0u; idx < syms.size(); ++idx) { + img = syms[idx].first; + sym = syms[idx].second; + if (sym->has_section()) + return {{img, sym}}; + } + return {{img, sym}}; +} + std::vector decomp_t::rtns() { return m_rtns; } diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index 1c14160..b9773ea 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -23,10 +23,6 @@ std::vector routine_t::decompose() { auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - spdlog::info("{} reloc to: {} hash: {:X} at offset: {}", - m_sym->name.to_string(m_img->get_strings()), sym_name, - sym_hash, scn_reloc->virtual_address); - relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash, sym_name.data())); } @@ -81,9 +77,6 @@ std::vector routine_t::decompose() { auto sym_hash = decomp::symbol_t::hash(sym_name.data()); auto reloc_offset = reloc->virtual_address - offset; - spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name, - reloc_offset); - relocs.push_back( recomp::reloc_t(reloc_offset, sym_hash, sym_name.data())); } diff --git a/src/theo/recomp/recomp.cpp b/src/theo/recomp/recomp.cpp index bfb9480..58a5648 100644 --- a/src/theo/recomp/recomp.cpp +++ b/src/theo/recomp/recomp.cpp @@ -42,7 +42,7 @@ void recomp_t::allocate() { assert(scn_sym.has_value()); } - sym.allocated_at(scn_sym.value().allocated_at() + sym.offset()); + sym.allocated_at(scn_sym.value()->allocated_at() + sym.offset()); } else { // else if there is no section then we allocate based upon the // size of the symbol... this is only done for symbols that are // bss... @@ -64,7 +64,8 @@ void recomp_t::resolve() { // resolve relocations in all symbols... // m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { - std::for_each(sym.relocs().begin(), sym.relocs().end(), [&](reloc_t reloc) { + auto& relocs = sym.relocs(); + std::for_each(relocs.begin(), relocs.end(), [&](reloc_t& reloc) { if (reloc.offset() > sym.data().size()) { spdlog::error( "invalid relocation... writing outside of symbol length... offset: " @@ -79,7 +80,7 @@ void recomp_t::resolve() { // auto reloc_sym = m_dcmp->syms()->sym_from_hash(reloc.hash()); auto allocated_at = reloc_sym.has_value() - ? reloc_sym.value().allocated_at() + ? reloc_sym.value()->allocated_at() : m_resolver(reloc.name()); if (!allocated_at) { @@ -136,6 +137,6 @@ void recomp_t::resolver(resolver_t resolve) { std::uintptr_t recomp_t::resolve(const std::string&& sym) { auto res = m_dcmp->syms()->sym_from_hash(decomp::symbol_t::hash(sym)); - return res.has_value() ? res->allocated_at() : 0; + return res.has_value() ? res.value()->allocated_at() : 0; } } // namespace theo::recomp \ No newline at end of file diff --git a/src/theo/recomp/symbol_table.cpp b/src/theo/recomp/symbol_table.cpp index bf4fd53..137ed53 100644 --- a/src/theo/recomp/symbol_table.cpp +++ b/src/theo/recomp/symbol_table.cpp @@ -31,13 +31,13 @@ void symbol_table_t::for_each(std::function fn) { fn(itr->second); } -std::optional symbol_table_t::sym_from_hash( +std::optional symbol_table_t::sym_from_hash( std::size_t hash) { - return m_table.count(hash) ? m_table.at(hash) - : std::optional{}; + return m_table.count(hash) ? &m_table.at(hash) + : std::optional{}; } -std::optional symbol_table_t::sym_from_alloc( +std::optional symbol_table_t::sym_from_alloc( std::uintptr_t allocated_at) { auto res = std::find_if(m_table.begin(), m_table.end(), @@ -45,10 +45,11 @@ std::optional symbol_table_t::sym_from_alloc( return itr.second.allocated_at() == allocated_at; }); - return res != m_table.end() ? res->second : std::optional{}; + return res != m_table.end() ? &res->second + : std::optional{}; } std::uint32_t symbol_table_t::size() { return m_table.size(); } -} // namespace theo::comp \ No newline at end of file +} // namespace theo::recomp \ No newline at end of file diff --git a/src/theo/theo.cpp b/src/theo/theo.cpp index 3047cb4..97d7a30 100644 --- a/src/theo/theo.cpp +++ b/src/theo/theo.cpp @@ -1,8 +1,10 @@ #include namespace theo { -theo_t::theo_t(std::vector& lib, lnk_fns_t lnkr_fns) - : m_dcmp(lib, &m_sym_tbl), m_recmp(&m_dcmp) { +theo_t::theo_t(std::vector& lib, + lnk_fns_t lnkr_fns, + const std::string&& entry_sym) + : m_dcmp(lib, &m_sym_tbl), m_recmp(&m_dcmp), m_entry_sym(entry_sym) { // init enc/dec tables only once... add obfuscation passes to the engine... // if (static std::atomic_bool v = true; v.exchange(false)) { @@ -23,32 +25,25 @@ theo_t::theo_t(std::vector& lib, lnk_fns_t lnkr_fns) } std::optional theo_t::decompose() { - auto res = m_dcmp.decompose(); + auto res = m_dcmp.decompose(m_entry_sym.data()); if (!res.has_value()) { spdlog::error("failed to decompose...\n"); return {}; } spdlog::info("decompose successful... {} symbols", res.value()->size()); - res.value()->for_each([&](decomp::symbol_t& sym) { - spdlog::info("hash: {:X}, name: {}", sym.hash(), sym.name()); - }); return res.value()->size(); } -std::uintptr_t theo_t::compose(const std::string&& entry_sym) { +std::uintptr_t theo_t::compose() { // run obfuscation engine on all symbols... // auto engine = obf::engine_t::get(); m_sym_tbl.for_each([&](decomp::symbol_t& sym) { engine->run(&sym); }); m_recmp.allocate(); - m_sym_tbl.for_each([&](decomp::symbol_t& sym) { - spdlog::info("{} allocated at {:X}", sym.name(), sym.allocated_at()); - }); - m_recmp.resolve(); m_recmp.copy_syms(); - return m_recmp.resolve(entry_sym.data()); + return m_recmp.resolve(m_entry_sym.data()); } } // namespace theo \ No newline at end of file