diff --git a/include/comp/comp.hpp b/include/comp/comp.hpp index 0319dfc..4997eca 100644 --- a/include/comp/comp.hpp +++ b/include/comp/comp.hpp @@ -3,8 +3,8 @@ #include namespace theo::comp { -using resolver_t = std::function; -using copier_t = std::function; +using resolver_t = std::function; +using copier_t = std::function; using allocator_t = std::function; @@ -17,13 +17,14 @@ class comp_t { copier_t copy, resolver_t resolve); - std::optional compose(); + void compose(); void allocator(allocator_t alloc); void copier(copier_t copy); void resolver(resolver_t resolve); - std::uintptr_t resolve(std::string&& sym); + std::uintptr_t resolve(const std::string&& sym); private: + void gen_reloc_trans(decomp::symbol_t* sym); decomp::decomp_t* m_dcmp; resolver_t m_resolver; copier_t m_copier; diff --git a/include/decomp/routine.hpp b/include/decomp/routine.hpp index c9a9926..51ae8f3 100644 --- a/include/decomp/routine.hpp +++ b/include/decomp/routine.hpp @@ -15,8 +15,6 @@ extern "C" { #define INSTR_SPLIT_SECTION_NAME ".obf" namespace theo::decomp { -enum decomp_type_t { none, instr_split }; - class routine_t { public: explicit routine_t(coff::symbol_t* sym, diff --git a/include/decomp/symbol.hpp b/include/decomp/symbol.hpp index 83fee82..d9da2d6 100644 --- a/include/decomp/symbol.hpp +++ b/include/decomp/symbol.hpp @@ -6,24 +6,33 @@ #include namespace theo::decomp { +enum decomp_type_t { none, instr_split }; + class symbol_t { public: explicit symbol_t(std::string name, std::uintptr_t offset, std::vector data, coff::section_header_t* scn, - std::vector relocs); + coff::symbol_t* sym, + std::vector relocs, + decomp_type_t dcmp_type); std::string name() const; std::uintptr_t offset() const; std::uintptr_t allocated_at() const; std::uint32_t size() const; + coff::section_header_t* scn() const; std::vector data() const; - + coff::symbol_t* sym() const; + decomp_type_t dcmp_type() const; + std::vector& relocs(); void allocated_at(std::uintptr_t allocated_at); std::size_t hash(); static std::size_t hash(const std::string& sym); + static std::size_t scn_hash(coff::symbol_t* sym, coff::section_header_t* scn, + coff::image_t* img); private: std::string m_name; @@ -31,5 +40,7 @@ class symbol_t { std::vector m_data; coff::section_header_t* m_scn; std::vector m_relocs; + decomp_type_t m_dcmp_type; + coff::symbol_t* m_sym; }; } // namespace theo::decomp \ No newline at end of file diff --git a/src/tests/demo/main.cpp b/src/tests/demo/main.cpp index d149d7d..5362fd1 100644 --- a/src/tests/demo/main.cpp +++ b/src/tests/demo/main.cpp @@ -18,7 +18,25 @@ int main(int argc, char* argv[]) { fdata.resize(fsize); f.read((char*)fdata.data(), fsize); - theo::theo_t t(fdata, {}); + theo::comp::allocator_t allocator = + [&](std::uint32_t size, + coff::section_characteristics_t section_type) -> std::uintptr_t { + return reinterpret_cast(VirtualAlloc( + NULL, size, MEM_COMMIT | MEM_RESERVE, + section_type.mem_execute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE)); + }; + + theo::comp::copier_t copier = [&](std::uintptr_t ptr, void* buff, + std::uint32_t size) { + std::memcpy((void*)ptr, buff, size); + }; + + theo::comp::resolver_t resolver = [&](std::string sym) -> std::uintptr_t { + return reinterpret_cast( + GetProcAddress(LoadLibraryA("user32.dll"), sym.data())); + }; + + theo::theo_t t(fdata, {allocator, copier, resolver}); auto res = t.decompose(); if (!res.has_value()) { @@ -27,4 +45,6 @@ int main(int argc, char* argv[]) { } spdlog::info("decomposed {} symbols...", res.value()); + auto entry_pnt = t.compose("EntryPoint"); + spdlog::info("entry point address: {:X}", entry_pnt); } \ No newline at end of file diff --git a/src/tests/demolib/main.cpp b/src/tests/demolib/main.cpp index c27db31..01abc2e 100644 --- a/src/tests/demolib/main.cpp +++ b/src/tests/demolib/main.cpp @@ -3,6 +3,7 @@ extern "C" int MessageBox(void* hWnd, char* lpCaption, void* uType); -__declspec(code_seg(".obf")) void EntryPoint() { +__declspec(code_seg(".obf")) extern "C" void EntryPoint() { MessageBox(nullptr, "Hello World", "Hello World", nullptr); + MessageBox(nullptr, "Hello World 1", "Hello World 1", nullptr); } \ No newline at end of file diff --git a/src/theo/comp/comp.cpp b/src/theo/comp/comp.cpp index 4ca4537..e013ed7 100644 --- a/src/theo/comp/comp.cpp +++ b/src/theo/comp/comp.cpp @@ -8,6 +8,35 @@ comp_t::comp_t(decomp::decomp_t* dcmp, resolver_t resolve) : m_dcmp(dcmp), m_allocator(alloc), m_copier(copy), m_resolver(resolve) {} +void comp_t::compose() { + m_dcmp->syms()->for_each([&](theo::decomp::symbol_t& sym) { + // if this symbol is a function.. + // + if (sym.scn()->characteristics.mem_execute) { + // if comp type is none (meaning entire function) then we just allocate + // it... + // + if (sym.dcmp_type() == decomp::decomp_type_t::none) { + sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); + spdlog::info("allocated entire function: {} at address: {:X} size: {}", + sym.name(), sym.allocated_at(), sym.size()); + } else { // else the dcmp_type is split instrs we are going to apply + // transformations... + + sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); + + spdlog::info( + "transformed relocations of symbol: {} size is now: {} allocated " + "at: {:X}", + sym.name(), sym.size(), sym.allocated_at()); + } + } else { // else the allocation is data which means we dont have any + // relocs... + sym.allocated_at(m_allocator(sym.size(), sym.scn()->characteristics)); + } + }); +} + void comp_t::allocator(allocator_t alloc) { m_allocator = alloc; } @@ -20,7 +49,7 @@ void comp_t::resolver(resolver_t resolve) { m_resolver = resolve; } -std::uintptr_t comp_t::resolve(std::string&& sym) { +std::uintptr_t comp_t::resolve(const std::string&& sym) { return m_dcmp->syms() ->sym_from_hash(decomp::symbol_t::hash(sym)) .allocated_at(); diff --git a/src/theo/comp/symbol_table.cpp b/src/theo/comp/symbol_table.cpp index 8241c67..07a628f 100644 --- a/src/theo/comp/symbol_table.cpp +++ b/src/theo/comp/symbol_table.cpp @@ -27,7 +27,8 @@ void symbol_table_t::update(std::size_t hash, std::uintptr_t allocated_at) { } void symbol_table_t::for_each(std::function fn) { - std::for_each(m_table.begin(), m_table.end(), [&](auto v) { fn(v.second); }); + for (auto itr = m_table.begin(); itr != m_table.end(); ++itr) + fn(itr->second); } decomp::symbol_t symbol_table_t::sym_from_hash(std::size_t hash) { diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index 6585951..a0d1c14 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -45,6 +45,13 @@ std::optional decomp_t::decompose() { auto syms = rtn.decompose(); spdlog::info("decomposed routine into {} symbols...", syms.size()); m_syms->add_symbols(syms); + } else if (sym->has_section() && + sym->storage_class == coff::storage_class_id::public_symbol) { + auto scn = img->get_section(sym->section_index - 1); + spdlog::info("{} allocated in section: {} with size: {}", + sym->name.to_string(img->get_strings()), + scn->name.to_string(img->get_strings()), + scn->size_raw_data); } } }); diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index 2b6dea5..0b70ead 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -30,9 +30,9 @@ std::vector routine_t::decompose() { relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash)); } - result.push_back( - decomp::symbol_t(m_sym->name.to_string(m_img->get_strings()).data(), - m_sym->value, m_data, m_scn, relocs)); + result.push_back(decomp::symbol_t( + m_sym->name.to_string(m_img->get_strings()).data(), m_sym->value, + m_data, m_scn, m_sym, relocs, decomp_type_t::none)); break; } case instr_split: { @@ -86,12 +86,28 @@ std::vector routine_t::decompose() { relocs.push_back(comp::reloc_t(reloc_offset, sym_hash)); } + // add a reloc to the next instruction... + // note that the offset is ZERO... comp_t will understand that + // relocs with offset ZERO means the next instructions... + // + auto next_inst_sym = + std::string(m_sym->name.to_string(m_img->get_strings())) + .append("@") + .append(std::to_string(offset + + xed_decoded_inst_get_length(&instr))); + + relocs.push_back( + comp::reloc_t(0, decomp::symbol_t::hash(next_inst_sym))); + + // get the instructions bytes + // std::vector inst_bytes( m_data.data() + offset, m_data.data() + offset + xed_decoded_inst_get_length(&instr)); - result.push_back( - decomp::symbol_t(new_sym_name, offset, inst_bytes, m_scn, relocs)); + result.push_back(decomp::symbol_t(new_sym_name, offset, inst_bytes, + m_scn, m_sym, relocs, + decomp_type_t::instr_split)); // after creating the symbol and dealing with relocs then print the // information we have concluded... @@ -106,6 +122,11 @@ std::vector routine_t::decompose() { xed_decoded_inst_zero_set_mode(&instr, &istate); } + // remove the relocation to the next symbol from the last instruction + // + auto last_inst = result.back(); + auto last_inst_relocs = last_inst.relocs(); + last_inst_relocs.erase(last_inst_relocs.end() - 1); break; } default: diff --git a/src/theo/decomp/symbol.cpp b/src/theo/decomp/symbol.cpp index 635b733..0619390 100644 --- a/src/theo/decomp/symbol.cpp +++ b/src/theo/decomp/symbol.cpp @@ -5,12 +5,16 @@ symbol_t::symbol_t(std::string name, std::uintptr_t offset, std::vector data, coff::section_header_t* scn, - std::vector relocs) + coff::symbol_t* sym, + std::vector relocs, + decomp_type_t dcmp_type) : m_name(name), m_offset(offset), m_data(data), m_scn(scn), m_relocs(relocs), + m_dcmp_type(dcmp_type), + m_sym(sym), m_allocated_at(0) {} std::string symbol_t::name() const { @@ -25,6 +29,10 @@ std::uintptr_t symbol_t::allocated_at() const { return m_allocated_at; } +coff::section_header_t* symbol_t::scn() const { + return m_scn; +} + std::uint32_t symbol_t::size() const { return m_data.size(); } @@ -33,6 +41,10 @@ std::vector symbol_t::data() const { return m_data; } +decomp_type_t symbol_t::dcmp_type() const { + return m_dcmp_type; +} + void symbol_t::allocated_at(std::uintptr_t allocated_at) { m_allocated_at = allocated_at; } @@ -41,7 +53,25 @@ std::size_t symbol_t::hash() { return hash(m_name); } +coff::symbol_t* symbol_t::sym() const { + return m_sym; +} + +std::vector& symbol_t::relocs() { + return m_relocs; +} + std::size_t symbol_t::hash(const std::string& sym) { return std::hash{}(sym); } + +std::size_t symbol_t::scn_hash(coff::symbol_t* sym, + coff::section_header_t* scn, + coff::image_t* img) { + return hash(std::string(scn->name.to_string(img->get_strings())) + .append("#") + .append(std::to_string(sym->section_index - 1)) + .append("!") + .append(std::to_string(img->file_header.timedate_stamp))); +} } // namespace theo::decomp \ No newline at end of file diff --git a/src/theo/theo.cpp b/src/theo/theo.cpp index b540edf..70dd9e7 100644 --- a/src/theo/theo.cpp +++ b/src/theo/theo.cpp @@ -3,14 +3,18 @@ namespace theo { theo_t::theo_t(std::vector& lib, lnk_fns_t lnkr_fns) : m_dcmp(lib, &m_sym_tbl), m_cmp(&m_dcmp) { - if (static std::atomic_bool v = true; v.exchange(false)) { + // init enc/dec tables only once... + // + if (static std::atomic_bool v = true; v.exchange(false)) xed_tables_init(); - } + + m_cmp.allocator(std::get<0>(lnkr_fns)); + m_cmp.copier(std::get<1>(lnkr_fns)); + m_cmp.resolver(std::get<2>(lnkr_fns)); } std::optional theo_t::decompose() { auto res = m_dcmp.decompose(); - if (!res.has_value()) { spdlog::error("failed to decompose...\n"); return {}; @@ -22,4 +26,9 @@ std::optional theo_t::decompose() { }); return res.value()->size(); } + +std::uintptr_t theo_t::compose(const std::string&& entry_sym) { + m_cmp.compose(); + return m_cmp.resolve(entry_sym.data()); +} } // namespace theo \ No newline at end of file