From 116eff8a3272e4cc7bdefdf194b63259ab450faf Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Mon, 4 Apr 2022 13:03:05 -0700 Subject: [PATCH] finished decomp of routines... its time to start on comp... --- include/comp/symbol_table.hpp | 2 +- include/decomp/routine.hpp | 14 +++- include/decomp/symbol.hpp | 4 +- src/tests/demolib/main.cpp | 2 +- src/theo/comp/symbol_table.cpp | 2 +- src/theo/decomp/decomp.cpp | 9 ++- src/theo/decomp/routine.cpp | 131 +++++++++++++++++++++++++++------ src/theo/decomp/symbol.cpp | 7 +- src/theo/theo.cpp | 3 + 9 files changed, 139 insertions(+), 35 deletions(-) diff --git a/include/comp/symbol_table.hpp b/include/comp/symbol_table.hpp index 4ae1a46..04b6ec6 100644 --- a/include/comp/symbol_table.hpp +++ b/include/comp/symbol_table.hpp @@ -21,7 +21,7 @@ class symbol_table_t { void update(std::size_t hash, decomp::symbol_t& sym); void update(std::size_t hash, std::uintptr_t allocated_at); - void for_each(std::function fn); + void for_each(std::function fn); std::uint32_t size(); private: diff --git a/include/decomp/routine.hpp b/include/decomp/routine.hpp index 6a795fc..c9a9926 100644 --- a/include/decomp/routine.hpp +++ b/include/decomp/routine.hpp @@ -12,18 +12,28 @@ extern "C" { #include } +#define INSTR_SPLIT_SECTION_NAME ".obf" + namespace theo::decomp { +enum decomp_type_t { none, instr_split }; + class routine_t { public: - explicit routine_t(coff::section_header_t* scn, - std::vector& fn); + explicit routine_t(coff::symbol_t* sym, + coff::image_t* img, + coff::section_header_t* scn, + std::vector& fn, + decomp_type_t dcmp_type); std::vector decompose(); coff::section_header_t* scn(); std::vector data(); private: + coff::symbol_t* m_sym; std::vector m_data; + coff::image_t* m_img; coff::section_header_t* m_scn; + decomp_type_t m_dcmp_type; }; } // namespace theo::decomp \ No newline at end of file diff --git a/include/decomp/symbol.hpp b/include/decomp/symbol.hpp index 47a1a76..83fee82 100644 --- a/include/decomp/symbol.hpp +++ b/include/decomp/symbol.hpp @@ -11,7 +11,7 @@ class symbol_t { explicit symbol_t(std::string name, std::uintptr_t offset, std::vector data, - coff::section_header_t scn_hdr, + coff::section_header_t* scn, std::vector relocs); std::string name() const; @@ -29,7 +29,7 @@ class symbol_t { std::string m_name; std::uintptr_t m_offset, m_allocated_at; std::vector m_data; - coff::section_header_t m_scn_hdr; + coff::section_header_t* m_scn; std::vector m_relocs; }; } // namespace theo::decomp \ No newline at end of file diff --git a/src/tests/demolib/main.cpp b/src/tests/demolib/main.cpp index a986d2c..c27db31 100644 --- a/src/tests/demolib/main.cpp +++ b/src/tests/demolib/main.cpp @@ -3,6 +3,6 @@ extern "C" int MessageBox(void* hWnd, char* lpCaption, void* uType); -void EntryPoint() { +__declspec(code_seg(".obf")) void EntryPoint() { MessageBox(nullptr, "Hello World", "Hello World", nullptr); } \ No newline at end of file diff --git a/src/theo/comp/symbol_table.cpp b/src/theo/comp/symbol_table.cpp index f9d02f3..8241c67 100644 --- a/src/theo/comp/symbol_table.cpp +++ b/src/theo/comp/symbol_table.cpp @@ -26,7 +26,7 @@ void symbol_table_t::update(std::size_t hash, std::uintptr_t allocated_at) { m_table.insert({hash, v}); } -void symbol_table_t::for_each(std::function fn) { +void symbol_table_t::for_each(std::function fn) { std::for_each(m_table.begin(), m_table.end(), [&](auto v) { fn(v.second); }); } diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index 1c818d7..6585951 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -23,9 +23,16 @@ std::optional decomp_t::decompose() { for (auto idx = 0u; idx < syms_cnt; ++idx) { auto sym = img->get_symbol(idx); + // if the symbol is a function then we are going to decompose it... + // data symbols are handled after this... + // if (sym->has_section() && sym->derived_type == coff::derived_type_id::function) { auto scn = img->get_section(sym->section_index - 1); + auto dcmp_type = + scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME + ? decomp::decomp_type_t::instr_split + : decomp::decomp_type_t::none; auto fn_size = scn->size_raw_data; auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img); @@ -33,7 +40,7 @@ std::optional decomp_t::decompose() { sym->name.to_string(img->get_strings()), fn_size); std::vector fn(fn_bgn, fn_bgn + fn_size); - decomp::routine_t rtn(scn, fn); + decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); auto syms = rtn.decompose(); spdlog::info("decomposed routine into {} symbols...", syms.size()); diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index d8ea3f8..2b6dea5 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -1,33 +1,118 @@ #include namespace theo::decomp { -routine_t::routine_t(coff::section_header_t* scn, std::vector& fn) - : m_scn(scn), m_data(fn) {} +routine_t::routine_t(coff::symbol_t* sym, + coff::image_t* img, + coff::section_header_t* scn, + std::vector& fn, + decomp_type_t dcmp_type) + : m_img(img), m_scn(scn), m_data(fn), m_dcmp_type(dcmp_type), m_sym(sym) {} std::vector routine_t::decompose() { - std::uint32_t offset = 0u; - xed_error_enum_t err; - - xed_decoded_inst_t instr; - std::vector instrs; - xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; - xed_decoded_inst_zero_set_mode(&instr, &istate); - - // keep looping over the section, lower the number of bytes each time... - // - while ((err = xed_decode(&instr, m_data.data() + offset, - m_data.size() - offset)) == XED_ERROR_NONE) { - char buff[255]; - offset += xed_decoded_inst_get_length(&instr); - xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, 0, 0, 0); - spdlog::info("{}", buff); - instrs.push_back(instr); - - // need to set this so that instr can be used to decode again... - xed_decoded_inst_zero_set_mode(&instr, &istate); + std::vector result; + + switch (m_dcmp_type) { + case none: { + std::vector relocs; + auto scn_relocs = reinterpret_cast( + m_scn->ptr_relocs + reinterpret_cast(m_img)); + + for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { + auto scn_reloc = &scn_relocs[idx]; + auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); + auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + + spdlog::info("{} reloc to: {} at offset: {}", + m_sym->name.to_string(m_img->get_strings()), sym_name, + scn_reloc->virtual_address); + + relocs.push_back(comp::reloc_t(scn_reloc->virtual_address, sym_hash)); + } + + result.push_back( + decomp::symbol_t(m_sym->name.to_string(m_img->get_strings()).data(), + m_sym->value, m_data, m_scn, relocs)); + break; + } + case instr_split: { + std::uint32_t offset = 0u; + xed_error_enum_t err; + + xed_decoded_inst_t instr; + xed_state_t istate{XED_MACHINE_MODE_LONG_64, XED_ADDRESS_WIDTH_64b}; + xed_decoded_inst_zero_set_mode(&instr, &istate); + + // keep looping over the section, lower the number of bytes each time... + // + while ((err = xed_decode(&instr, m_data.data() + offset, + m_data.size() - offset)) == XED_ERROR_NONE) { + // symbol name is of the format: symbol@instroffset, I.E: main@11... + // + auto new_sym_name = + std::string(m_sym->name.to_string(m_img->get_strings())); + + // first instruction doesnt need the @offset... + // + if (offset) + new_sym_name.append("@").append(std::to_string(offset)); + + std::vector relocs; + auto scn_relocs = reinterpret_cast( + m_scn->ptr_relocs + reinterpret_cast(m_img)); + + // find if this instruction has a relocation or not... + // if so, return the reloc_t... + // + auto reloc = std::find_if( + scn_relocs, scn_relocs + m_scn->num_relocs, + [&](coff::reloc_t reloc) { + return reloc.virtual_address >= offset && + reloc.virtual_address < + offset + xed_decoded_inst_get_length(&instr); + }); + + // if there is indeed a reloc for this instruction... + // + if (reloc != scn_relocs + m_scn->num_relocs) { + auto sym_reloc = m_img->get_symbol(reloc->symbol_index); + auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + auto reloc_offset = reloc->virtual_address - offset; + + spdlog::info("{} reloc to: {} at offset: {}", new_sym_name, sym_name, + reloc_offset); + + relocs.push_back(comp::reloc_t(reloc_offset, sym_hash)); + } + + std::vector inst_bytes( + m_data.data() + offset, + m_data.data() + offset + xed_decoded_inst_get_length(&instr)); + + result.push_back( + decomp::symbol_t(new_sym_name, offset, inst_bytes, m_scn, relocs)); + + // after creating the symbol and dealing with relocs then print the + // information we have concluded... + // + char buff[255]; + offset += xed_decoded_inst_get_length(&instr); + xed_format_context(XED_SYNTAX_INTEL, &instr, buff, sizeof buff, NULL, + NULL, NULL); + + spdlog::info("{}: {}", new_sym_name, buff); + // need to set this so that instr can be used to decode again... + xed_decoded_inst_zero_set_mode(&instr, &istate); + } + + break; + } + default: + break; } - return {}; + return result; } coff::section_header_t* routine_t::scn() { diff --git a/src/theo/decomp/symbol.cpp b/src/theo/decomp/symbol.cpp index f00cf71..635b733 100644 --- a/src/theo/decomp/symbol.cpp +++ b/src/theo/decomp/symbol.cpp @@ -4,12 +4,12 @@ namespace theo::decomp { symbol_t::symbol_t(std::string name, std::uintptr_t offset, std::vector data, - coff::section_header_t scn_hdr, + coff::section_header_t* scn, std::vector relocs) : m_name(name), m_offset(offset), m_data(data), - m_scn_hdr(scn_hdr), + m_scn(scn), m_relocs(relocs), m_allocated_at(0) {} @@ -41,8 +41,7 @@ std::size_t symbol_t::hash() { return hash(m_name); } -std::size_t symbol_t::hash(const std::string& sym) -{ +std::size_t symbol_t::hash(const std::string& sym) { return std::hash{}(sym); } } // namespace theo::decomp \ No newline at end of file diff --git a/src/theo/theo.cpp b/src/theo/theo.cpp index 5fabf1e..b540edf 100644 --- a/src/theo/theo.cpp +++ b/src/theo/theo.cpp @@ -17,6 +17,9 @@ std::optional theo_t::decompose() { } spdlog::info("decompose successful... {} symbols", res.value()->size()); + res.value()->for_each([&](decomp::symbol_t& sym) { + spdlog::info("hash: {:X}, name: {}", sym.hash(), sym.name()); + }); return res.value()->size(); } } // namespace theo \ No newline at end of file