diff --git a/include/decomp/decomp.hpp b/include/decomp/decomp.hpp index ae52c11..018472e 100644 --- a/include/decomp/decomp.hpp +++ b/include/decomp/decomp.hpp @@ -13,7 +13,7 @@ #include namespace theo::decomp { -using sym_data_t = std::pair; +using sym_data_t = std::tuple; class decomp_t { public: explicit decomp_t(std::vector& lib, @@ -30,6 +30,9 @@ class decomp_t { private: std::uint32_t ext_used_syms(const std::string&& entry_sym); std::optional get_symbol(const std::string_view& name); + std::uint32_t next_sym(coff::image_t* img, + coff::section_header_t* hdr, + coff::symbol_t* s); const std::vector m_lib; std::vector m_objs; diff --git a/include/recomp/reloc.hpp b/include/recomp/reloc.hpp index beba759..977dc22 100644 --- a/include/recomp/reloc.hpp +++ b/include/recomp/reloc.hpp @@ -6,15 +6,15 @@ namespace theo::recomp { class reloc_t { public: - explicit reloc_t(std::uint16_t offset, + explicit reloc_t(std::uint32_t offset, std::size_t hash, const std::string&& sym_name) : m_offset(offset), m_hash(hash), m_sym_name(sym_name) {} std::size_t hash() { return m_hash; } std::string name() { return m_sym_name; } - std::uint16_t offset() { return m_offset; } - void offset(std::uint16_t offset) { m_offset = offset; } + std::uint32_t offset() { return m_offset; } + void offset(std::uint32_t offset) { m_offset = offset; } void add_transform( std::pair entry) { @@ -31,6 +31,6 @@ class reloc_t { m_transforms; std::string m_sym_name; std::size_t m_hash; - std::uint16_t m_offset; + std::uint32_t m_offset; }; } // namespace theo::recomp \ No newline at end of file diff --git a/src/theo/decomp/decomp.cpp b/src/theo/decomp/decomp.cpp index 4da9d60..a2cc29d 100644 --- a/src/theo/decomp/decomp.cpp +++ b/src/theo/decomp/decomp.cpp @@ -27,7 +27,12 @@ std::optional decomp_t::decompose( auto sym_name = sym->name.to_string(img->get_strings()); if (sym_name.length()) { auto sym_hash = symbol_t::hash(sym_name.data()); - m_lookup_tbl[sym_hash].push_back({img, sym}); + auto sym_size = + sym->has_section() + ? next_sym(img, img->get_section(sym->section_index - 1), sym) + : 0u; + + m_lookup_tbl[sym_hash].push_back({img, sym, sym_size}); } } }); @@ -41,7 +46,7 @@ std::optional decomp_t::decompose( // extracted from the archive file... // std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) { - auto [img, sym] = data; + auto [img, sym, size] = data; // populate section hash table with sections for the img of this // symbol... only populate the hash table if its not been populated for // this obj before... @@ -73,8 +78,10 @@ std::optional decomp_t::decompose( scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME ? decomp::sym_type_t::instruction : decomp::sym_type_t::function; - auto fn_size = scn->size_raw_data; - auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img); + + auto fn_size = next_sym(img, scn, sym); + auto fn_bgn = scn->ptr_raw_data + reinterpret_cast(img) + + sym->value; std::vector fn(fn_bgn, fn_bgn + fn_size); decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); @@ -97,10 +104,16 @@ std::optional decomp_t::decompose( .append("!") .append(std::to_string(img->file_header.timedate_stamp)); - std::vector scn_data( - reinterpret_cast(img) + scn->ptr_raw_data, - reinterpret_cast(img) + scn->ptr_raw_data + - scn->size_raw_data); + std::vector scn_data; + if (scn->characteristics.cnt_uninit_data) { + scn_data.insert(scn_data.begin(), scn->size_raw_data, 0); + } else { + scn_data.insert( + scn_data.begin(), + reinterpret_cast(img) + scn->ptr_raw_data, + reinterpret_cast(img) + scn->ptr_raw_data + + scn->size_raw_data); + } decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {}, {}, sym_type_t::section); @@ -137,30 +150,44 @@ std::optional decomp_t::decompose( return m_syms; } +std::uint32_t decomp_t::next_sym(coff::image_t* img, + coff::section_header_t* hdr, + coff::symbol_t* s) { + // loop over all symbols in this object... + // find the next symbol inside of the same section... + // if there is no next symbol then we use the end of the section... + std::uint32_t res = hdr->size_raw_data; + for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) { + auto q = img->get_symbol(idx); + if (q->derived_type == coff::derived_type_id::function && + q->section_index == s->section_index) + if (q->value > s->value && q->value < res) + res = q->value; + } + return res; +} + std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { - std::optional> entry; + std::optional entry; if (!(entry = get_symbol(entry_sym.data())).has_value()) return 0u; std::set cache; const auto finding_syms = [&]() -> bool { for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) { - auto [img, sym] = *itr; - if (sym->has_section() && !cache.count(sym)) { + auto [img, sym, size] = *itr; + if (sym->has_section() && !cache.count(sym) && size) { auto scn = img->get_section(sym->section_index - 1); auto num_relocs = scn->num_relocs; auto relocs = reinterpret_cast( scn->ptr_relocs + reinterpret_cast(img)); for (auto idx = 0u; idx < num_relocs; ++idx) { - auto reloc_sym = img->get_symbol(relocs[idx].symbol_index); - // if the symbol is defined in the current obj then we dont need to go - // looking for where its actually defined... - if (img->get_symbol(relocs[idx].symbol_index)->has_section()) { - sym_data_t sym_data = {img, reloc_sym}; - if (m_used_syms.emplace(sym_data).second) - return true; - } else { + auto reloc = &relocs[idx]; + // if the reloc is inside of the current symbol... + if (reloc->virtual_address >= sym->value && + reloc->virtual_address < sym->value + size) { + auto reloc_sym = img->get_symbol(reloc->symbol_index); auto sym_name = reloc_sym->name.to_string(img->get_strings()); entry = get_symbol(sym_name); if (m_used_syms.emplace(entry.value()).second) @@ -183,15 +210,17 @@ std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { std::optional decomp_t::get_symbol(const std::string_view& name) { coff::image_t* img = {}; coff::symbol_t* sym = {}; + std::uint32_t size = {}; auto& syms = m_lookup_tbl[symbol_t::hash(name.data())]; for (auto idx = 0u; idx < syms.size(); ++idx) { - img = syms[idx].first; - sym = syms[idx].second; + img = std::get<0>(syms[idx]); + sym = std::get<1>(syms[idx]); + size = std::get<2>(syms[idx]); if (sym->has_section()) - return {{img, sym}}; + return {{img, sym, size}}; } - return {{img, sym}}; + return {{img, sym, size}}; } std::vector decomp_t::rtns() { diff --git a/src/theo/decomp/routine.cpp b/src/theo/decomp/routine.cpp index b9773ea..7bedb4e 100644 --- a/src/theo/decomp/routine.cpp +++ b/src/theo/decomp/routine.cpp @@ -19,12 +19,16 @@ std::vector routine_t::decompose() { for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { auto scn_reloc = &scn_relocs[idx]; - auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); - auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); - auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - - relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash, - sym_name.data())); + // if the reloc is in the current function... + if (scn_reloc->virtual_address >= m_sym->value && + scn_reloc->virtual_address < m_sym->value + m_data.size()) { + auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); + auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); + auto sym_hash = decomp::symbol_t::hash(sym_name.data()); + relocs.push_back( + recomp::reloc_t(scn_reloc->virtual_address - m_sym->value, + sym_hash, sym_name.data())); + } } result.push_back(decomp::symbol_t( @@ -33,7 +37,7 @@ std::vector routine_t::decompose() { break; } case instruction: { - std::uint32_t offset = 0u; + std::uint32_t offset = {}; xed_error_enum_t err; xed_decoded_inst_t instr; @@ -64,9 +68,10 @@ std::vector routine_t::decompose() { auto reloc = std::find_if( scn_relocs, scn_relocs + m_scn->num_relocs, [&](coff::reloc_t reloc) { - return reloc.virtual_address >= offset && + return reloc.virtual_address >= m_sym->value + offset && reloc.virtual_address < - offset + xed_decoded_inst_get_length(&instr); + m_sym->value + offset + + xed_decoded_inst_get_length(&instr); }); // if there is indeed a reloc for this instruction... @@ -75,7 +80,7 @@ std::vector routine_t::decompose() { auto sym_reloc = m_img->get_symbol(reloc->symbol_index); auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); auto sym_hash = decomp::symbol_t::hash(sym_name.data()); - auto reloc_offset = reloc->virtual_address - offset; + auto reloc_offset = reloc->virtual_address - m_sym->value - offset; relocs.push_back( recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));