if the obj has more symbols with data than the max section size then the

compiler will start putting multiple symbols into a single section... i
fixed my code so that it can handle decomposing functions and
relocations with this fact...

however there is still an issue with private_symbol's... private symbols
with no name use the section name as their symbol name... this means
there can be duplicate symbol names... if the symbol is private and the
base type is none and it has a section, then we need to create a symbol
name...

create a function that handles both situations... do something like
std::string symbol_t::get_symbol_name(coff::symbol_t* sym)...
3.0
_xeroxz 3 years ago
parent a0d35c1d5e
commit 367a62d8de

@ -13,7 +13,7 @@
#include <coff/image.hpp> #include <coff/image.hpp>
namespace theo::decomp { namespace theo::decomp {
using sym_data_t = std::pair<coff::image_t*, coff::symbol_t*>; using sym_data_t = std::tuple<coff::image_t*, coff::symbol_t*, std::uint32_t>;
class decomp_t { class decomp_t {
public: public:
explicit decomp_t(std::vector<std::uint8_t>& lib, explicit decomp_t(std::vector<std::uint8_t>& lib,
@ -30,6 +30,9 @@ class decomp_t {
private: private:
std::uint32_t ext_used_syms(const std::string&& entry_sym); std::uint32_t ext_used_syms(const std::string&& entry_sym);
std::optional<sym_data_t> get_symbol(const std::string_view& name); std::optional<sym_data_t> get_symbol(const std::string_view& name);
std::uint32_t next_sym(coff::image_t* img,
coff::section_header_t* hdr,
coff::symbol_t* s);
const std::vector<std::uint8_t> m_lib; const std::vector<std::uint8_t> m_lib;
std::vector<coff::image_t*> m_objs; std::vector<coff::image_t*> m_objs;

@ -6,15 +6,15 @@
namespace theo::recomp { namespace theo::recomp {
class reloc_t { class reloc_t {
public: public:
explicit reloc_t(std::uint16_t offset, explicit reloc_t(std::uint32_t offset,
std::size_t hash, std::size_t hash,
const std::string&& sym_name) const std::string&& sym_name)
: m_offset(offset), m_hash(hash), m_sym_name(sym_name) {} : m_offset(offset), m_hash(hash), m_sym_name(sym_name) {}
std::size_t hash() { return m_hash; } std::size_t hash() { return m_hash; }
std::string name() { return m_sym_name; } std::string name() { return m_sym_name; }
std::uint16_t offset() { return m_offset; } std::uint32_t offset() { return m_offset; }
void offset(std::uint16_t offset) { m_offset = offset; } void offset(std::uint32_t offset) { m_offset = offset; }
void add_transform( void add_transform(
std::pair<obf::transform::transform_t*, std::uint32_t> entry) { std::pair<obf::transform::transform_t*, std::uint32_t> entry) {
@ -31,6 +31,6 @@ class reloc_t {
m_transforms; m_transforms;
std::string m_sym_name; std::string m_sym_name;
std::size_t m_hash; std::size_t m_hash;
std::uint16_t m_offset; std::uint32_t m_offset;
}; };
} // namespace theo::recomp } // namespace theo::recomp

@ -27,7 +27,12 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
auto sym_name = sym->name.to_string(img->get_strings()); auto sym_name = sym->name.to_string(img->get_strings());
if (sym_name.length()) { if (sym_name.length()) {
auto sym_hash = symbol_t::hash(sym_name.data()); auto sym_hash = symbol_t::hash(sym_name.data());
m_lookup_tbl[sym_hash].push_back({img, sym}); auto sym_size =
sym->has_section()
? next_sym(img, img->get_section(sym->section_index - 1), sym)
: 0u;
m_lookup_tbl[sym_hash].push_back({img, sym, sym_size});
} }
} }
}); });
@ -41,7 +46,7 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
// extracted from the archive file... // extracted from the archive file...
// //
std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) { std::for_each(m_used_syms.begin(), m_used_syms.end(), [&](sym_data_t data) {
auto [img, sym] = data; auto [img, sym, size] = data;
// populate section hash table with sections for the img of this // populate section hash table with sections for the img of this
// symbol... only populate the hash table if its not been populated for // symbol... only populate the hash table if its not been populated for
// this obj before... // this obj before...
@ -73,8 +78,10 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME scn->name.to_string(img->get_strings()) == INSTR_SPLIT_SECTION_NAME
? decomp::sym_type_t::instruction ? decomp::sym_type_t::instruction
: decomp::sym_type_t::function; : decomp::sym_type_t::function;
auto fn_size = scn->size_raw_data;
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img); auto fn_size = next_sym(img, scn, sym);
auto fn_bgn = scn->ptr_raw_data + reinterpret_cast<std::uint8_t*>(img) +
sym->value;
std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size); std::vector<std::uint8_t> fn(fn_bgn, fn_bgn + fn_size);
decomp::routine_t rtn(sym, img, scn, fn, dcmp_type); decomp::routine_t rtn(sym, img, scn, fn, dcmp_type);
@ -97,10 +104,16 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
.append("!") .append("!")
.append(std::to_string(img->file_header.timedate_stamp)); .append(std::to_string(img->file_header.timedate_stamp));
std::vector<std::uint8_t> scn_data( std::vector<std::uint8_t> scn_data;
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data, if (scn->characteristics.cnt_uninit_data) {
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data + scn_data.insert(scn_data.begin(), scn->size_raw_data, 0);
scn->size_raw_data); } else {
scn_data.insert(
scn_data.begin(),
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data,
reinterpret_cast<std::uint8_t*>(img) + scn->ptr_raw_data +
scn->size_raw_data);
}
decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {}, decomp::symbol_t new_scn_sym(img, scn_sym_name, 0, scn_data, scn, {},
{}, sym_type_t::section); {}, sym_type_t::section);
@ -137,30 +150,44 @@ std::optional<recomp::symbol_table_t*> decomp_t::decompose(
return m_syms; return m_syms;
} }
std::uint32_t decomp_t::next_sym(coff::image_t* img,
coff::section_header_t* hdr,
coff::symbol_t* s) {
// loop over all symbols in this object...
// find the next symbol inside of the same section...
// if there is no next symbol then we use the end of the section...
std::uint32_t res = hdr->size_raw_data;
for (auto idx = 0u; idx < img->file_header.num_symbols; ++idx) {
auto q = img->get_symbol(idx);
if (q->derived_type == coff::derived_type_id::function &&
q->section_index == s->section_index)
if (q->value > s->value && q->value < res)
res = q->value;
}
return res;
}
std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) { std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
std::optional<std::pair<coff::image_t*, coff::symbol_t*>> entry; std::optional<sym_data_t> entry;
if (!(entry = get_symbol(entry_sym.data())).has_value()) if (!(entry = get_symbol(entry_sym.data())).has_value())
return 0u; return 0u;
std::set<coff::symbol_t*> cache; std::set<coff::symbol_t*> cache;
const auto finding_syms = [&]() -> bool { const auto finding_syms = [&]() -> bool {
for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) { for (auto itr = m_used_syms.begin(); itr != m_used_syms.end(); ++itr) {
auto [img, sym] = *itr; auto [img, sym, size] = *itr;
if (sym->has_section() && !cache.count(sym)) { if (sym->has_section() && !cache.count(sym) && size) {
auto scn = img->get_section(sym->section_index - 1); auto scn = img->get_section(sym->section_index - 1);
auto num_relocs = scn->num_relocs; auto num_relocs = scn->num_relocs;
auto relocs = reinterpret_cast<coff::reloc_t*>( auto relocs = reinterpret_cast<coff::reloc_t*>(
scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img)); scn->ptr_relocs + reinterpret_cast<std::uint8_t*>(img));
for (auto idx = 0u; idx < num_relocs; ++idx) { for (auto idx = 0u; idx < num_relocs; ++idx) {
auto reloc_sym = img->get_symbol(relocs[idx].symbol_index); auto reloc = &relocs[idx];
// if the symbol is defined in the current obj then we dont need to go // if the reloc is inside of the current symbol...
// looking for where its actually defined... if (reloc->virtual_address >= sym->value &&
if (img->get_symbol(relocs[idx].symbol_index)->has_section()) { reloc->virtual_address < sym->value + size) {
sym_data_t sym_data = {img, reloc_sym}; auto reloc_sym = img->get_symbol(reloc->symbol_index);
if (m_used_syms.emplace(sym_data).second)
return true;
} else {
auto sym_name = reloc_sym->name.to_string(img->get_strings()); auto sym_name = reloc_sym->name.to_string(img->get_strings());
entry = get_symbol(sym_name); entry = get_symbol(sym_name);
if (m_used_syms.emplace(entry.value()).second) if (m_used_syms.emplace(entry.value()).second)
@ -183,15 +210,17 @@ std::uint32_t decomp_t::ext_used_syms(const std::string&& entry_sym) {
std::optional<sym_data_t> decomp_t::get_symbol(const std::string_view& name) { std::optional<sym_data_t> decomp_t::get_symbol(const std::string_view& name) {
coff::image_t* img = {}; coff::image_t* img = {};
coff::symbol_t* sym = {}; coff::symbol_t* sym = {};
std::uint32_t size = {};
auto& syms = m_lookup_tbl[symbol_t::hash(name.data())]; auto& syms = m_lookup_tbl[symbol_t::hash(name.data())];
for (auto idx = 0u; idx < syms.size(); ++idx) { for (auto idx = 0u; idx < syms.size(); ++idx) {
img = syms[idx].first; img = std::get<0>(syms[idx]);
sym = syms[idx].second; sym = std::get<1>(syms[idx]);
size = std::get<2>(syms[idx]);
if (sym->has_section()) if (sym->has_section())
return {{img, sym}}; return {{img, sym, size}};
} }
return {{img, sym}}; return {{img, sym, size}};
} }
std::vector<routine_t> decomp_t::rtns() { std::vector<routine_t> decomp_t::rtns() {

@ -19,12 +19,16 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) { for (auto idx = 0u; idx < m_scn->num_relocs; ++idx) {
auto scn_reloc = &scn_relocs[idx]; auto scn_reloc = &scn_relocs[idx];
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index); // if the reloc is in the current function...
auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); if (scn_reloc->virtual_address >= m_sym->value &&
auto sym_hash = decomp::symbol_t::hash(sym_name.data()); scn_reloc->virtual_address < m_sym->value + m_data.size()) {
auto sym_reloc = m_img->get_symbol(scn_relocs[idx].symbol_index);
relocs.push_back(recomp::reloc_t(scn_reloc->virtual_address, sym_hash, auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
sym_name.data())); auto sym_hash = decomp::symbol_t::hash(sym_name.data());
relocs.push_back(
recomp::reloc_t(scn_reloc->virtual_address - m_sym->value,
sym_hash, sym_name.data()));
}
} }
result.push_back(decomp::symbol_t( result.push_back(decomp::symbol_t(
@ -33,7 +37,7 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
break; break;
} }
case instruction: { case instruction: {
std::uint32_t offset = 0u; std::uint32_t offset = {};
xed_error_enum_t err; xed_error_enum_t err;
xed_decoded_inst_t instr; xed_decoded_inst_t instr;
@ -64,9 +68,10 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto reloc = std::find_if( auto reloc = std::find_if(
scn_relocs, scn_relocs + m_scn->num_relocs, scn_relocs, scn_relocs + m_scn->num_relocs,
[&](coff::reloc_t reloc) { [&](coff::reloc_t reloc) {
return reloc.virtual_address >= offset && return reloc.virtual_address >= m_sym->value + offset &&
reloc.virtual_address < reloc.virtual_address <
offset + xed_decoded_inst_get_length(&instr); m_sym->value + offset +
xed_decoded_inst_get_length(&instr);
}); });
// if there is indeed a reloc for this instruction... // if there is indeed a reloc for this instruction...
@ -75,7 +80,7 @@ std::vector<decomp::symbol_t> routine_t::decompose() {
auto sym_reloc = m_img->get_symbol(reloc->symbol_index); auto sym_reloc = m_img->get_symbol(reloc->symbol_index);
auto sym_name = sym_reloc->name.to_string(m_img->get_strings()); auto sym_name = sym_reloc->name.to_string(m_img->get_strings());
auto sym_hash = decomp::symbol_t::hash(sym_name.data()); auto sym_hash = decomp::symbol_t::hash(sym_name.data());
auto reloc_offset = reloc->virtual_address - offset; auto reloc_offset = reloc->virtual_address - m_sym->value - offset;
relocs.push_back( relocs.push_back(
recomp::reloc_t(reloc_offset, sym_hash, sym_name.data())); recomp::reloc_t(reloc_offset, sym_hash, sym_name.data()));

Loading…
Cancel
Save