From 367a62d8de5cca7997bcb42de7cc39539ea44886 Mon Sep 17 00:00:00 2001 From: _xeroxz Date: Sat, 9 Apr 2022 15:34:44 -0700 Subject: [PATCH] if the obj has more symbols with data than the max section size then the compiler will start putting multiple symbols into a single section... i fixed my code so that it can handle decomposing functions and relocations with this fact... however there is still an issue with private_symbol's... private symbols with no name use the section name as their symbol name... this means there can be duplicate symbol names... if the symbol is private and the base type is none and it has a section, then we need to create a symbol name... create a function that handles both situations... do something like std::string symbol_t::get_symbol_name(coff::symbol_t* sym)... --- include/decomp/decomp.hpp | 5 ++- include/recomp/reloc.hpp | 8 ++-- src/theo/decomp/decomp.cpp | 75 +++++++++++++++++++++++++------------ src/theo/decomp/routine.cpp | 25 ++++++++----- 4 files changed, 75 insertions(+), 38 deletions(-) diff --git a/include/decomp/decomp.hpp b/include/decomp/decomp.hpp index ae52c11..018472e 100644 --- a/include/decomp/decomp.hpp +++ b/include/decomp/decomp.hpp @@ -13,7 +13,7 @@ #include namespace theo::decomp { -using sym_data_t = std::pair; +using sym_data_t = std::tuple; class decomp_t { public: explicit decomp_t(std::vector& lib, @@ -30,6 +30,9 @@ class decomp_t { private: std::uint32_t ext_used_syms(const std::string&& entry_sym); std::optional get_symbol(const std::string_view& name); + std::uint32_t next_sym(coff::image_t* img, + coff::section_header_t* hdr, + coff::symbol_t* s); const std::vector m_lib; std::vector m_objs; diff --git a/include/recomp/reloc.hpp b/include/recomp/reloc.hpp index beba759..977dc22 100644 --- a/include/recomp/reloc.hpp +++ b/include/recomp/reloc.hpp @@ -6,15 +6,15 @@ namespace theo::recomp { class reloc_t { public: - explicit reloc_t(std::uint16_t offset, + explicit reloc_t(std::uint32_t offset, std::size_t hash, const std::string&& sym_name) : m_offset(offset), m_hash(hash), m_sym_name(sym_name) {} std::size_t hash() { return m_hash; } std::string name() { return m_sym_name; } - std::uint16_t offset() { return m_offset; } - void offset(std::uint16_t offset) { m_offset = offset; } + std::uint32_t offset() { return m_offset; } + void offset(std::uint32_t offset) { m_offset = offset; } void add_transform( std::pair entry) { @@ -31,6 +31,6 @@ class reloc_t { m_transforms; std::string m_sym_name; std::size_t m_hash; - std::uint16_t m_offset; + std::uint32_t m_offset; }; } // namespace theo::recomp \ No newline at end of file diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index 4da9d60..a2cc29d 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -27,7 +27,12 @@ std::optional decomp_t::decompose( auto sym_name = sym->name.to_string(img->get_strings()); if (sym_name.length()) { auto sym_hash = symbol_t::hash(sym_name.data()); - m_lookup_tbl[sym_hash].push_back({img, sym}); + auto sym_size = + sym->has_section() + ? next_sym(img, img->get_section(sym->section_index - 1), sym) + : 0u; + + m_lookup_tbl[sym_hash].push_back({img, sym, sym_size}); } } }); @@ -41,7 +46,7 @@ std::optional decomp_t::decompose( // extracted from the archive file... // std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) { - auto [img, sym] = data; + auto [img, sym, size] = data; // populate section hash table with sections for the img of this // symbol... only populate the hash table if its not been populated for // this obj before... @@ -73,8 +78,10 @@ std::optional decomp_t::decompose( scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME ? decomp::sym_type_t::instruction : decomp::sym_type_t::function; - auto fn_size = scn->size_raw_data; - auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img); + + auto fn_size = next_sym(img, scn, sym); + auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img) + + sym->value; std::vector fn(fn_bgn, fn_bgn + fn_size); decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); @@ -97,10 +104,16 @@ std::optional decomp_t::decompose( .append("!") .append(std::to_string(img->file_header.timedate_stamp)); - std::vector scn_data( - reinterpret_cast(img) + scn->ptr_raw_data, - reinterpret_cast(img) + scn->ptr_raw_data + - scn->size_raw_data); + std::vector scn_data; + if (scn->characteristics.cnt_uninit_data) { + scn_data.insert(scn_data.begin(), scn->size_raw_data, 0); + } else { + scn_data.insert( + scn_data.begin(), + reinterpret_cast(img) + scn->ptr_raw_data, + reinterpret_cast(img) + scn->ptr_raw_data + + scn->size_raw_data); + } decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {}, {}, sym_type_t::section); @@ -137,30 +150,44 @@ std::optional decomp_t::decompose( return m_syms; } +std::uint32_t decomp_t::next_sym(coff::image_t* img, + coff::section_header_t* hdr, + coff::symbol_t* s) { + // loop over all symbols in this object... + // find the next symbol inside of the same section... + // if there is no next symbol then we use the end of the section... + std::uint32_t res = hdr->size_raw_data; + for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) { + auto q = img->get_symbol(idx); + if (q->derived_type == coff::derived_type_id::function && + q->section_index == s->section_index) + if (q->value > s->value && q->value < res) + res = q->value; + } + return res; +} + std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { - std::optional> entry; + std::optional entry; if (!(entry = get_symbol(entry_sym.data())).has_value()) return 0u; std::set cache; const auto finding_syms = [&]() -> bool { for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) { - auto [img, sym] = *itr; - if (sym->has_section() && !cache.count(sym)) { + auto [img, sym, size] = *itr; + if (sym->has_section() && !cache.count(sym) && size) { auto scn = img->get_section(sym->section_index - 1); auto num_relocs = scn->num_relocs; auto relocs = reinterpret_cast( scn->ptr_relocs + reinterpret_cast(img)); for (auto idx = 0u; idx < num_relocs; ++idx) { - auto reloc_sym = img->get_symbol(relocs[idx].symbol_index); - // if the symbol is defined in the current obj then we dont need to go - // looking for where its actually defined... - if (img->get_symbol(relocs[idx].symbol_index)->has_section()) { - sym_data_t sym_data = {img, reloc_sym}; - if (m_used_syms.emplace(sym_data).second) - return true; - } else { + auto reloc = &relocs[idx]; + // if the reloc is inside of the current symbol... + if (reloc->virtual_address >= sym->value && + reloc->virtual_address < sym->value + size) { + auto reloc_sym = img->get_symbol(reloc->symbol_index); auto sym_name = reloc_sym->name.to_string(img->get_strings()); entry = get_symbol(sym_name); if (m_used_syms.emplace(entry.value()).second) @@ -183,15 +210,17 @@ std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { std::optional decomp_t::get_symbol(const std::string_view& name) { coff::image_t* img = {}; coff::symbol_t* sym = {}; + std::uint32_t size = {}; auto& syms = m_lookup_tbl[symbol_t::hash(name.data())]; for (auto idx = 0u; idx < syms.size(); ++idx) { - img = syms[idx].first; - sym = syms[idx].second; + img = std::get<0>(syms[idx]); + sym = std::get<1>(syms[idx]); + size = std::get<2>(syms[idx]); if (sym->has_section()) - return {{img, sym}}; + return {{img, sym, size}}; } - return {{img, sym}}; + return {{img, sym, size}}; } std::vector decomp_t::rtns() { diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index b9773ea..7bedb4e 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -19,12 +19,16 @@ std::vector routine_t::decompose() { for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { auto scn_reloc = &scn_relocs[idx]; - auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); - auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); - auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - - relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash, - sym_name.data())); + // if the reloc is in the current function... + if (scn_reloc->virtual_address >= m_sym->value && + scn_reloc->virtual_address < m_sym->value + m_data.size()) { + auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); + auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + relocs.push_back( + recomp::reloc_t(scn_reloc->virtual_address - m_sym->value, + sym_hash, sym_name.data())); + } } result.push_back(decomp::symbol_t( @@ -33,7 +37,7 @@ std::vector routine_t::decompose() { break; } case instruction: { - std::uint32_t offset = 0u; + std::uint32_t offset = {}; xed_error_enum_t err; xed_decoded_inst_t instr; @@ -64,9 +68,10 @@ std::vector routine_t::decompose() { auto reloc = std::find_if( scn_relocs, scn_relocs + m_scn->num_relocs, [&](coff::reloc_t reloc) { - return reloc.virtual_address >= offset && + return reloc.virtual_address >= m_sym->value + offset && reloc.virtual_address < - offset + xed_decoded_inst_get_length(&instr); + m_sym->value + offset + + xed_decoded_inst_get_length(&instr); }); // if there is indeed a reloc for this instruction... @@ -75,7 +80,7 @@ std::vector routine_t::decompose() { auto sym_reloc = m_img->get_symbol(reloc->symbol_index); auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - auto reloc_offset = reloc->virtual_address - offset; + auto reloc_offset = reloc->virtual_address - m_sym->value - offset; relocs.push_back( recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));