From 0529fda2cce513f583aa2c5e3e46a5ec1810fa3d Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Tue, 5 Apr 2022 17:08:28 -0700 Subject: [PATCH] demo is working with mapping entire functions... still need to make relocation transformations and such... --- include/comp/comp.hpp | 5 +- include/comp/reloc.hpp | 8 ++- include/comp/symbol_table.hpp | 5 +- include/decomp/decomp.hpp | 4 +- include/decomp/routine.hpp | 4 +- include/decomp/symbol.hpp | 18 +++--- src/tests/demo/main.cpp | 1 + src/tests/demolib/main.cpp | 8 +-- src/theo/comp/comp.cpp | 113 ++++++++++++++++++++++++++------- src/theo/comp/symbol_table.cpp | 21 +++--- src/theo/decomp/decomp.cpp | 78 +++++++++++++++++++++-- src/theo/decomp/routine.cpp | 24 +++---- src/theo/decomp/symbol.cpp | 20 ++---- src/theo/theo.cpp | 4 +- 14 files changed, 227 insertions(+), 86 deletions(-) diff --git a/include/comp/comp.hpp b/include/comp/comp.hpp index 4997eca..eb660bf 100644 --- a/include/comp/comp.hpp +++ b/include/comp/comp.hpp @@ -17,7 +17,10 @@ class comp_t { copier_t copy, resolver_t resolve); - void compose(); + void allocate(); + void resolve(); + void copy_syms(); + void allocator(allocator_t alloc); void copier(copier_t copy); void resolver(resolver_t resolve); diff --git a/include/comp/reloc.hpp b/include/comp/reloc.hpp index fb90cec..0d3949d 100644 --- a/include/comp/reloc.hpp +++ b/include/comp/reloc.hpp @@ -5,13 +5,17 @@ namespace theo::comp { class reloc_t { public: - explicit reloc_t(std::uint16_t offset, std::size_t hash) - : m_offset(offset), m_hash(hash) {} + explicit reloc_t(std::uint16_t offset, + std::size_t hash, + const std::string&& sym_name) + : m_offset(offset), m_hash(hash), m_sym_name(sym_name) {} std::size_t hash() { return m_hash; } + std::string name() { return m_sym_name; } std::uint16_t offset() { return m_offset; } private: + std::string m_sym_name; std::size_t m_hash; std::uint16_t m_offset; }; diff --git a/include/comp/symbol_table.hpp b/include/comp/symbol_table.hpp index 04b6ec6..e7f5cb7 100644 --- a/include/comp/symbol_table.hpp +++ b/include/comp/symbol_table.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -15,8 +16,8 @@ class symbol_table_t { void add_symbol(decomp::symbol_t& sym); void add_symbols(std::vector& syms); - decomp::symbol_t sym_from_hash(std::size_t hash); - decomp::symbol_t sym_from_alloc(std::uintptr_t allocated_at); + std::optional sym_from_hash(std::size_t hash); + std::optional sym_from_alloc(std::uintptr_t allocated_at); void update(std::size_t hash, decomp::symbol_t& sym); void update(std::size_t hash, std::uintptr_t allocated_at); diff --git a/include/decomp/decomp.hpp b/include/decomp/decomp.hpp index 72c83ee..79e29c1 100644 --- a/include/decomp/decomp.hpp +++ b/include/decomp/decomp.hpp @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include #include -#include #include #include @@ -20,12 +20,14 @@ class decomp_t { std::vector lib(); std::vector objs(); comp::symbol_table_t* syms(); + std::map& scn_hash_tbl(); std::optional decompose(); private: const std::vector m_lib; std::vector m_objs; std::vector m_rtns; + std::map m_scn_hash_tbl; comp::symbol_table_t* m_syms; }; } // namespace theo::decomp \ No newline at end of file diff --git a/include/decomp/routine.hpp b/include/decomp/routine.hpp index 51ae8f3..e503a66 100644 --- a/include/decomp/routine.hpp +++ b/include/decomp/routine.hpp @@ -21,7 +21,7 @@ class routine_t { coff::image_t* img, coff::section_header_t* scn, std::vector& fn, - decomp_type_t dcmp_type); + sym_type_t dcmp_type); std::vector decompose(); coff::section_header_t* scn(); @@ -32,6 +32,6 @@ class routine_t { std::vector m_data; coff::image_t* m_img; coff::section_header_t* m_scn; - decomp_type_t m_dcmp_type; + sym_type_t m_dcmp_type; }; } // namespace theo::decomp \ No newline at end of file diff --git a/include/decomp/symbol.hpp b/include/decomp/symbol.hpp index d9da2d6..2280b30 100644 --- a/include/decomp/symbol.hpp +++ b/include/decomp/symbol.hpp @@ -6,33 +6,31 @@ #include namespace theo::decomp { -enum decomp_type_t { none, instr_split }; +enum sym_type_t { function, inst_split, data, section }; class symbol_t { public: explicit symbol_t(std::string name, std::uintptr_t offset, std::vector data, - coff::section_header_t* scn, - coff::symbol_t* sym, - std::vector relocs, - decomp_type_t dcmp_type); + coff::section_header_t* scn = {}, + coff::symbol_t* sym = {}, + std::vector relocs = {}, + sym_type_t dcmp_type = {}); std::string name() const; std::uintptr_t offset() const; std::uintptr_t allocated_at() const; std::uint32_t size() const; coff::section_header_t* scn() const; - std::vector data() const; + std::vector& data(); coff::symbol_t* sym() const; - decomp_type_t dcmp_type() const; + sym_type_t sym_type() const; std::vector& relocs(); void allocated_at(std::uintptr_t allocated_at); std::size_t hash(); static std::size_t hash(const std::string& sym); - static std::size_t scn_hash(coff::symbol_t* sym, coff::section_header_t* scn, - coff::image_t* img); private: std::string m_name; @@ -40,7 +38,7 @@ class symbol_t { std::vector m_data; coff::section_header_t* m_scn; std::vector m_relocs; - decomp_type_t m_dcmp_type; + sym_type_t m_sym_type; coff::symbol_t* m_sym; }; } // namespace theo::decomp \ No newline at end of file diff --git a/src/tests/demo/main.cpp b/src/tests/demo/main.cpp index 5362fd1..638de14 100644 --- a/src/tests/demo/main.cpp +++ b/src/tests/demo/main.cpp @@ -47,4 +47,5 @@ int main(int argc, char* argv[]) { spdlog::info("decomposed {} symbols...", res.value()); auto entry_pnt = t.compose("EntryPoint"); spdlog::info("entry point address: {:X}", entry_pnt); + reinterpret_cast(entry_pnt)(); } \ No newline at end of file diff --git a/src/tests/demolib/main.cpp b/src/tests/demolib/main.cpp index 01abc2e..29f60f8 100644 --- a/src/tests/demolib/main.cpp +++ b/src/tests/demolib/main.cpp @@ -1,9 +1,9 @@ -extern "C" int MessageBox(void* hWnd, +extern "C" int MessageBoxA(void* hWnd, char* lpText, char* lpCaption, void* uType); -__declspec(code_seg(".obf")) extern "C" void EntryPoint() { - MessageBox(nullptr, "Hello World", "Hello World", nullptr); - MessageBox(nullptr, "Hello World 1", "Hello World 1", nullptr); +extern "C" void EntryPoint() { + MessageBoxA(nullptr, "Hello World", "Hello World", nullptr); + MessageBoxA(nullptr, "Hello World 1", "Hello World 1", nullptr); } \ No newline at end of file diff --git a/src/theo/comp/comp.cpp b/src/theo/comp/comp.cpp index e013ed7..e903678 100644 --- a/src/theo/comp/comp.cpp +++ b/src/theo/comp/comp.cpp @@ -8,35 +8,103 @@ comp_t::comp_t(decomp::decomp_t* dcmp, resolver_t resolve) : m_dcmp(dcmp), m_allocator(alloc), m_copier(copy), m_resolver(resolve) {} -void comp_t::compose() { +void comp_t::allocate() { + // map code & data/rdata/bss sections first... + // m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { - // if this symbol is a function.. - // - if (sym.scn()->characteristics.mem_execute) { - // if comp type is none (meaning entire function) then we just allocate - // it... - // - if (sym.dcmp_type() == decomp::decomp_type_t::none) { + switch (sym.sym_type()) { + case decomp::sym_type_t::section: + case decomp::sym_type_t::function: { sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); - spdlog::info("allocated entire function: {} at address: {:X} size: {}", - sym.name(), sym.allocated_at(), sym.size()); - } else { // else the dcmp_type is split instrs we are going to apply - // transformations... + break; + } + case decomp::sym_type_t::inst_split: { + // TODO: call into reloc_t static methods to generate random code to + // obfuscate relocations... + // + break; + } + default: + break; + } + }); - sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); + // then map data/rdata/bss symbols to the allocated sections... + // + m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { + if (sym.sym_type() == decomp::sym_type_t::data) { + auto scn_sym = + m_dcmp->syms()->sym_from_hash(m_dcmp->scn_hash_tbl()[sym.scn()]); - spdlog::info( - "transformed relocations of symbol: {} size is now: {} allocated " - "at: {:X}", - sym.name(), sym.size(), sym.allocated_at()); + if (!scn_sym.has_value()) { + spdlog::error("failed to locate section: {} for symbol: {}", + sym.scn()->name.to_string(), sym.name()); + + assert(scn_sym.has_value()); } - } else { // else the allocation is data which means we dont have any - // relocs... - sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); + + sym.allocated_at(scn_sym.value().allocated_at() + sym.offset()); } }); } +void comp_t::resolve() { + // resolve relocations in all symbols... + // + m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { + std::for_each(sym.relocs().begin(), sym.relocs().end(), [&](reloc_t reloc) { + if (reloc.offset() > sym.data().size()) { + spdlog::error( + "invalid relocation... writing outside of symbol length... offset: " + "{} sym size: {}", + sym.offset(), sym.data().size()); + + assert(reloc.offset() > sym.data().size()); + } + + // try and resolve the symbol by refering to the internal symbol table + // first... if there is no symbol then refer to the resolver... + // + auto reloc_sym = m_dcmp->syms()->sym_from_hash(reloc.hash()); + auto allocated_at = reloc_sym.has_value() + ? reloc_sym.value().allocated_at() + : m_resolver(reloc.name()); + + if (!allocated_at) { + spdlog::error("failed to resolve reloc from symbol: {} to symbol: {}", + sym.name(), reloc.name()); + + assert(allocated_at); + } + + switch (sym.sym_type()) { + case decomp::sym_type_t::function: { + *reinterpret_cast(sym.data().data() + + reloc.offset()) = allocated_at; + break; + } + case decomp::sym_type_t::inst_split: { + // TODO: run the vector of transformation operations here if the + // symbol is of type inst_split... the transformations will be applied + // to allocate_at() result on the symbol... + // + break; + } + default: + break; + } + }); + }); +} + +void comp_t::copy_syms() { + // copy symbols into memory using the copier supplied... + // + m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { + m_copier(sym.allocated_at(), sym.data().data(), sym.data().size()); + }); +} + void comp_t::allocator(allocator_t alloc) { m_allocator = alloc; } @@ -50,8 +118,7 @@ void comp_t::resolver(resolver_t resolve) { } std::uintptr_t comp_t::resolve(const std::string&& sym) { - return m_dcmp->syms() - ->sym_from_hash(decomp::symbol_t::hash(sym)) - .allocated_at(); + auto res = m_dcmp->syms()->sym_from_hash(decomp::symbol_t::hash(sym)); + return res.has_value() ? res->allocated_at() : 0; } } // namespace theo::comp \ No newline at end of file diff --git a/src/theo/comp/symbol_table.cpp b/src/theo/comp/symbol_table.cpp index 07a628f..828e53c 100644 --- a/src/theo/comp/symbol_table.cpp +++ b/src/theo/comp/symbol_table.cpp @@ -31,16 +31,21 @@ void symbol_table_t::for_each(std::function fn) { fn(itr->second); } -decomp::symbol_t symbol_table_t::sym_from_hash(std::size_t hash) { - return m_table.at(hash); +std::optional symbol_table_t::sym_from_hash( + std::size_t hash) { + return m_table.count(hash) ? m_table.at(hash) + : std::optional{}; } -decomp::symbol_t symbol_table_t::sym_from_alloc(std::uintptr_t allocated_at) { - return std::find_if(m_table.begin(), m_table.end(), - [&](std::pair itr) { - return itr.second.allocated_at() == allocated_at; - }) - ->second; +std::optional symbol_table_t::sym_from_alloc( + std::uintptr_t allocated_at) { + auto res = + std::find_if(m_table.begin(), m_table.end(), + [&](std::pair itr) { + return itr.second.allocated_at() == allocated_at; + }); + + return res != m_table.end() ? res->second : std::optional{}; } std::uint32_t symbol_table_t::size() { diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index a0d1c14..28ec36f 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -5,6 +5,8 @@ decomp_t::decomp_t(std::vector& lib, comp::symbol_table_t* syms) : m_lib(lib), m_syms(syms) {} std::optional decomp_t::decompose() { + // extract obj files from the archive file... + // ar::view lib(m_lib.data(), m_lib.size()); std::for_each( lib.begin(), lib.end(), @@ -18,7 +20,27 @@ std::optional decomp_t::decompose() { } }); + // generate symbols, populate section hash table, for each object file + // extracted from the archive file... + // std::for_each(m_objs.begin(), m_objs.end(), [&](coff::image_t* img) { + // populate section hash table... + // + for (auto idx = 0u; idx < img->file_header.num_sections; ++idx) { + auto scn = img->get_section(idx); + auto scn_sym_name = + std::string(scn->name.to_string(img->get_strings())) + .append("#") + .append(std::to_string(idx)) + .append("!") + .append(std::to_string(img->file_header.timedate_stamp)); + + // hash the name of the section + the index + thhe timestamp of the obj + // file it is in... + // + m_scn_hash_tbl.insert({scn, decomp::symbol_t::hash(scn_sym_name)}); + } + auto syms_cnt = img->file_header.num_symbols; for (auto idx = 0u; idx < syms_cnt; ++idx) { auto sym = img->get_symbol(idx); @@ -31,8 +53,8 @@ std::optional decomp_t::decompose() { auto scn = img->get_section(sym->section_index - 1); auto dcmp_type = scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME - ? decomp::decomp_type_t::instr_split - : decomp::decomp_type_t::none; + ? decomp::sym_type_t::inst_split + : decomp::sym_type_t::function; auto fn_size = scn->size_raw_data; auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img); @@ -48,14 +70,54 @@ std::optional decomp_t::decompose() { } else if (sym->has_section() && sym->storage_class == coff::storage_class_id::public_symbol) { auto scn = img->get_section(sym->section_index - 1); - spdlog::info("{} allocated in section: {} with size: {}", - sym->name.to_string(img->get_strings()), - scn->name.to_string(img->get_strings()), - scn->size_raw_data); + auto scn_sym = m_syms->sym_from_hash(m_scn_hash_tbl[scn]); + + // if the section doesnt have a symbol then make one and put it into the + // symbol table... + // + if (!scn_sym.has_value()) { + auto scn_sym_name = + std::string(scn->name.to_string(img->get_strings())) + .append("#") + .append(std::to_string(sym->section_index - 1)) + .append("!") + .append(std::to_string(img->file_header.timedate_stamp)); + + std::vector scn_data( + reinterpret_cast(img) + scn->ptr_raw_data, + reinterpret_cast(img) + scn->ptr_raw_data + + scn->size_raw_data); + + decomp::symbol_t new_scn_sym(scn_sym_name, 0, scn_data, scn, {}, {}, + sym_type_t::section); + + spdlog::info( + "generating symbol for section: {} sym name: {} hash: {:X}", + scn->name.to_string(img->get_strings()), new_scn_sym.name(), + new_scn_sym.hash()); + + m_syms->add_symbol(new_scn_sym); + } + + // create a symbol for the data... + // + decomp::symbol_t new_sym(sym->name.to_string(img->get_strings()).data(), + sym->value, {}, scn, sym, {}, + sym_type_t::data); + + spdlog::info("adding data symbol: {} located inside of section: {}", + new_sym.name(), + m_syms->sym_from_hash(m_scn_hash_tbl[new_sym.scn()]) + .value() + .name()); + + m_syms->add_symbol(new_sym); } } }); + // return the extract symbols to the caller... + // return m_syms; } @@ -74,4 +136,8 @@ std::vector decomp_t::objs() { comp::symbol_table_t* decomp_t::syms() { return m_syms; } + +std::map& decomp_t::scn_hash_tbl() { + return m_scn_hash_tbl; +} } // namespace theo::decomp \ No newline at end of file diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index 0b70ead..0831932 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -5,14 +5,14 @@ routine_t::routine_t(coff::symbol_t* sym, coff::image_t* img, coff::section_header_t* scn, std::vector& fn, - decomp_type_t dcmp_type) + sym_type_t dcmp_type) : m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {} std::vector routine_t::decompose() { std::vector result; switch (m_dcmp_type) { - case none: { + case function: { std::vector relocs; auto scn_relocs = reinterpret_cast( m_scn->ptr_relocs + reinterpret_cast(m_img)); @@ -23,19 +23,20 @@ std::vector routine_t::decompose() { auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - spdlog::info("{} reloc to: {} at offset: {}", + spdlog::info("{} reloc to: {} hash: {:X} at offset: {}", m_sym->name.to_string(m_img->get_strings()), sym_name, - scn_reloc->virtual_address); + sym_hash, scn_reloc->virtual_address); - relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash)); + relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash, + sym_name.data())); } result.push_back(decomp::symbol_t( m_sym->name.to_string(m_img->get_strings()).data(), m_sym->value, - m_data, m_scn, m_sym, relocs, decomp_type_t::none)); + m_data, m_scn, m_sym, relocs, sym_type_t::function)); break; } - case instr_split: { + case inst_split: { std::uint32_t offset = 0u; xed_error_enum_t err; @@ -83,7 +84,8 @@ std::vector routine_t::decompose() { spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name, reloc_offset); - relocs.push_back(comp::reloc_t(reloc_offset, sym_hash)); + relocs.push_back( + comp::reloc_t(reloc_offset, sym_hash, sym_name.data())); } // add a reloc to the next instruction... @@ -96,8 +98,8 @@ std::vector routine_t::decompose() { .append(std::to_string(offset + xed_decoded_inst_get_length(&instr))); - relocs.push_back( - comp::reloc_t(0, decomp::symbol_t::hash(next_inst_sym))); + relocs.push_back(comp::reloc_t(0, decomp::symbol_t::hash(next_inst_sym), + next_inst_sym.data())); // get the instructions bytes // @@ -107,7 +109,7 @@ std::vector routine_t::decompose() { result.push_back(decomp::symbol_t(new_sym_name, offset, inst_bytes, m_scn, m_sym, relocs, - decomp_type_t::instr_split)); + sym_type_t::inst_split)); // after creating the symbol and dealing with relocs then print the // information we have concluded... diff --git a/src/theo/decomp/symbol.cpp b/src/theo/decomp/symbol.cpp index 0619390..e2d5ed4 100644 --- a/src/theo/decomp/symbol.cpp +++ b/src/theo/decomp/symbol.cpp @@ -7,13 +7,13 @@ symbol_t::symbol_t(std::string name, coff::section_header_t* scn, coff::symbol_t* sym, std::vector relocs, - decomp_type_t dcmp_type) + sym_type_t dcmp_type) : m_name(name), m_offset(offset), m_data(data), m_scn(scn), m_relocs(relocs), - m_dcmp_type(dcmp_type), + m_sym_type(dcmp_type), m_sym(sym), m_allocated_at(0) {} @@ -37,12 +37,12 @@ std::uint32_t symbol_t::size() const { return m_data.size(); } -std::vector symbol_t::data() const { +std::vector& symbol_t::data() { return m_data; } -decomp_type_t symbol_t::dcmp_type() const { - return m_dcmp_type; +sym_type_t symbol_t::sym_type() const { + return m_sym_type; } void symbol_t::allocated_at(std::uintptr_t allocated_at) { @@ -64,14 +64,4 @@ std::vector& symbol_t::relocs() { std::size_t symbol_t::hash(const std::string& sym) { return std::hash{}(sym); } - -std::size_t symbol_t::scn_hash(coff::symbol_t* sym, - coff::section_header_t* scn, - coff::image_t* img) { - return hash(std::string(scn->name.to_string(img->get_strings())) - .append("#") - .append(std::to_string(sym->section_index - 1)) - .append("!") - .append(std::to_string(img->file_header.timedate_stamp))); -} } // namespace theo::decomp \ No newline at end of file diff --git a/src/theo/theo.cpp b/src/theo/theo.cpp index 70dd9e7..f308ffd 100644 --- a/src/theo/theo.cpp +++ b/src/theo/theo.cpp @@ -28,7 +28,9 @@ std::optional theo_t::decompose() { } std::uintptr_t theo_t::compose(const std::string&& entry_sym) { - m_cmp.compose(); + m_cmp.allocate(); + m_cmp.resolve(); + m_cmp.copy_syms(); return m_cmp.resolve(entry_sym.data()); } } // namespace theo \ No newline at end of file